Extreme Learning Machine

R
Code
Extreme Learning Machine
Author

Simon-Pierre Boucher

Published

February 11, 2023

elm <- function(X, y, n_hidden=NULL, active_fun=tanh) {
  # X: an N observations x p features matrix
  # y: the target
  # n_hidden: the number of hidden nodes
  # active_fun: activation function
  pp1 = ncol(X) + 1
  w0 = matrix(rnorm(pp1*n_hidden), pp1, n_hidden)       # random weights
  h = active_fun(cbind(1, scale(X)) %*% w0)             # compute hidden layer
  B = MASS::ginv(h) %*% y                               # find weights for hidden layer
  fit = h %*% B                                         # fitted values
  list(fit= fit, loss=crossprod(fit - y), B=B, w0=w0)
}



# one variable, complex function -------------------------------------------
library(tidyverse); library(mgcv)
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.1     ✔ purrr   1.0.1
✔ tibble  3.1.8     ✔ dplyr   1.1.0
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.3     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
Le chargement a nécessité le package : nlme


Attachement du package : 'nlme'


L'objet suivant est masqué depuis 'package:dplyr':

    collapse


This is mgcv 1.8-41. For overview type 'help("mgcv-package")'.
set.seed(123)
n = 5000
x = runif(n)
# x = rnorm(n)
mu = sin(2*(4*x-2)) + 2*exp(-(16^2)*((x-.5)^2))
y = rnorm(n, mu, .3)
# qplot(x, y)
d = data.frame(x,y) 

X_ = as.matrix(x, ncol=1)

test = elm(X_, y, n_hidden=100)
str(test)
List of 4
 $ fit : num [1:5000, 1] -1.0239 0.7311 -0.413 0.0806 -0.4112 ...
 $ loss: num [1, 1] 442
 $ B   : num [1:100, 1] 217 -608 1408 -1433 -4575 ...
 $ w0  : num [1:2, 1:100] 0.35 0.814 -0.517 -2.692 -1.097 ...
# qplot(x, y) + geom_line(aes(y=test$fit), color='#1e90ff')
cor(test$fit[,1], y)^2
[1] 0.8862518
gam_comparison = gam(y~s(x))
summary(gam_comparison)$r.sq
[1] 0.8482127
d %>% 
  mutate(fit_elm = test$fit,
         fit_gam = fitted(gam_comparison)) %>% 
  ggplot() + 
  geom_point(aes(x, y), alpha=.1) +
  geom_line(aes(x, y=fit_elm), color='#1e90ff') + 
  geom_line(aes(x, y=fit_gam), color='darkred')

# motorcycle accident data ------------------------------------------------

data('mcycle', package='MASS')
x = mcycle[,1]
X_ = matrix(x, ncol=1)
y = mcycle[,2]

test = elm(X_, y, n_hidden=100)
cor(test$fit[,1], y)^2
[1] 0.8122349
gam_comparison = gam(y~s(x))
summary(gam_comparison)$r.sq
[1] 0.7832988
qplot(x, y) +
  geom_line(aes(y=test$fit), color='#1e90ff') + 
  geom_line(aes(y=fitted(gam_comparison)), color='darkred')
Warning: `qplot()` was deprecated in ggplot2 3.4.0.

# add covariates ----------------------------------------------------------

d = gamSim(eg=7, n=10000)
Gu & Wahba 4 term additive model, correlated predictors
X_ = as.matrix(d[,2:5])
y = d[,1]

n_nodes = c(10, 25, 100, 250, 500, 1000)
test = lapply(n_nodes, function(n) elm(X_, y, n_hidden=n))       # this will take a few seconds
final_n = which.min(sapply(test, function(x) x$loss))
best = test[[final_n]]
# str(best)
qplot(best$fit[,1], y, alpha=.2)

cor(best$fit[,1], y)^2
[1] 0.7241967
gam_comparison = gam(y~s(x0) + s(x1) + s(x2) + s(x3), data=d)
gam.check(gam_comparison)


Method: GCV   Optimizer: magic
Smoothing parameter selection converged after 15 iterations.
The RMS GCV score gradient at convergence was 9.309879e-07 .
The Hessian was positive definite.
Model rank =  37 / 37 

Basis dimension (k) checking results. Low p-value (k-index<1) may
indicate that k is too low, especially if edf is close to k'.

        k'  edf k-index p-value
s(x0) 9.00 4.71    1.00    0.48
s(x1) 9.00 4.89    1.00    0.35
s(x2) 9.00 8.96    0.99    0.20
s(x3) 9.00 1.00    1.00    0.47
summary(gam_comparison)$r.sq
[1] 0.6952978
test_data0 = gamSim(eg=7)  # default n = 400
Gu & Wahba 4 term additive model, correlated predictors
test_data =  cbind(1, scale(test_data0[,2:5]))

elm_prediction = tanh(test_data %*% best$w0) %*% best$B          # remember to use your specific activation function here
gam_prediction = predict(gam_comparison, newdata=test_data0)
cor(data.frame(elm_prediction, gam_prediction), test_data0$y)^2
                    [,1]
elm_prediction 0.6873090
gam_prediction 0.7185687