MODELEX.CO - Nelder Mead

nelder_mead = function(
  f, 
  x_start,
  step = 0.1,
  no_improve_thr  = 1e-12,
  no_improv_break = 10,
  max_iter = 0,
  alpha    = 1,
  gamma    = 2,
  rho      = 0.5,
  sigma    = 0.5,
  verbose  = FALSE
  ) {
  # init
  dim = length(x_start)
  prev_best = f(x_start)
  no_improv = 0
  res = list(list(x_start = x_start, prev_best = prev_best))
  
  
  for (i in 1:dim) {
    x = x_start
    x[i]  = x[i] + step
    score = f(x)
    res = append(res, list(list(x_start = x, prev_best = score)))
  }
  
  # simplex iter
  iters = 0
  
  while (TRUE) {
    # order
    idx  = sapply(res, `[[`, 2)
    res  = res[order(idx)]   # ascending order
    best = res[[1]][[2]]
    
    # break after max_iter
    if (max_iter > 0 & iters >= max_iter) return(res[[1]])
    iters = iters + 1
    
    # break after no_improv_break iterations with no improvement
    if (verbose) message(paste('...best so far:', best))
    
    if (best < (prev_best - no_improve_thr)) {
      no_improv = 0
      prev_best = best
    } else {
      no_improv = no_improv + 1
    }
    
    if (no_improv >= no_improv_break) return(res[[1]])
    
    # centroid
    x0 = rep(0, dim)
    for (tup in 1:(length(res)-1)) {
      for (i in 1:dim) {
        x0[i] = x0[i] + res[[tup]][[1]][i] / (length(res)-1)
      }
    }
    
   # reflection
   xr = x0 + alpha * (x0 - res[[length(res)]][[1]])
   rscore = f(xr)
   if (res[[1]][[2]] <= rscore & 
       rscore < res[[length(res)-1]][[2]]) {
     res[[length(res)]] = list(xr, rscore)
     next
   }
     
   # expansion
   if (rscore < res[[1]][[2]]) {
     # xe = x0 + gamma*(x0 - res[[length(res)]][[1]])   # issue with this
     xe = x0 + gamma * (xr - x0)   
     escore = f(xe)
     if (escore < rscore) {
       res[[length(res)]] = list(xe, escore)
       next
     } else {
       res[[length(res)]] = list(xr, rscore)
       next
     }
   }
   
   # contraction
   # xc = x0 + rho*(x0 - res[[length(res)]][[1]])  # issue with wiki consistency for rho values (and optim)
   xc = x0 + rho * (res[[length(res)]][[1]] - x0)
   cscore = f(xc)
   if (cscore < res[[length(res)]][[2]]) {
     res[[length(res)]] = list(xc, cscore)
     next
   }
   
   # reduction
   x1   = res[[1]][[1]]
   nres = list()
   for (tup in res) {
     redx  = x1 + sigma * (tup[[1]] - x1)
     score = f(redx)
     nres  = append(nres, list(list(redx, score)))
   }
   
   res = nres
  }
}




#' ## Example
#' The function to minimize.
#' 
f = function(x) {
  sin(x[1]) * cos(x[2]) * (1 / (abs(x[3]) + 1))
}

nelder_mead(
  f, 
  c(0, 0, 0), 
  max_iter = 1000, 
  no_improve_thr = 1e-12
)

[[1]]
[1] -1.570797e+00 -2.235577e-07  1.637460e-14

[[2]]
[1] -1

#' Compare to `optimx`.  You may see warnings.
optimx::optimx(
  par = c(0, 0, 0),
  fn = f,
  method = "Nelder-Mead",
  control = list(
    alpha = 1,
    gamma = 2,
    beta = 0.5,
    maxit = 1000,
    reltol = 1e-12
  )
)

Warning in max(logpar): aucun argument pour max ; -Inf est renvoyé

Warning in min(logpar): aucun argument trouvé pour min ; Inf est renvoyé

                   p1           p2           p3 value fevals gevals niter
Nelder-Mead -1.570796 1.394018e-08 1.088215e-16    -1    861     NA    NA
            convcode kkt1 kkt2 xtime
Nelder-Mead        0 TRUE TRUE     0

#' ## A Regression Model
#' 
#' I find a regression model to be more applicable/intuitive for my needs, so
#' provide an example for that case.
#' 
#' 
#' ### Data setup

set.seed(8675309)
N = 500
npreds = 5
X = cbind(1, matrix(rnorm(N * npreds), ncol = npreds))
beta = runif(ncol(X), -1, 1)
y = X %*% beta + rnorm(nrow(X))


#' Least squares loss function.

f = function(b) {
  crossprod(y - X %*% b)[,1]  # if using optimx need scalar
}

# lm estimates
lm.fit(X, y)$coef

         x1          x2          x3          x4          x5          x6 
-0.96214657  0.59432481  0.04864576  0.27573466  0.97525840 -0.07470287

nm_result = nelder_mead(
  f, 
  runif(ncol(X)), 
  max_iter = 2000,
  no_improve_thr = 1e-12,
  verbose = FALSE
)

#' ### Comparison
#' Compare to `optimx`.

opt_out = optimx::optimx(
  runif(ncol(X)),
  fn = f,  # model function
  method  = 'Nelder-Mead',
  control = list(
    alpha = 1,
    gamma = 2,
    beta  = 0.5,
    #rho
    maxit  = 2000,
    reltol = 1e-12
  )
)

rbind(
  nm_func = unlist(nm_result),
  nm_optimx = opt_out[1:7]
)

                  p1       p2         p3        p4        p5          p6
nm_func   -0.9621510 0.594327 0.04864183 0.2757265 0.9752524 -0.07470389
nm_optimx -0.9621494 0.594325 0.04864620 0.2757383 0.9752579 -0.07470054
             value
nm_func   501.3155
nm_optimx 501.3155

#' # Second version
#' 
#' This is a more natural R approach in my opinion.

nelder_mead2 = function(
  f,
  x_start,
  step = 0.1,
  no_improve_thr  = 1e-12,
  no_improv_break = 10,
  max_iter = 0,
  alpha = 1,
  gamma = 2,
  rho   = 0.5,
  sigma = 0.5,
  verbose = FALSE
) {
  
  # init
  npar = length(x_start)
  nc = npar + 1
  prev_best = f(x_start)
  no_improv = 0
  res = matrix(c(x_start, prev_best), ncol = nc)
  colnames(res) = c(paste('par', 1:npar, sep = '_'), 'score')
  
  for (i in 1:npar) {
    x     = x_start
    x[i]  = x[i] + step
    score = f(x)
    res   = rbind(res, c(x, score))
  }
  
  # simplex iter
  iters = 0
  
  while (TRUE) {
    # order
    res  = res[order(res[, nc]), ]   # ascending order
    best = res[1, nc]
    
    # break after max_iter
    if (max_iter & iters >= max_iter) return(res[1, ])
    iters = iters + 1
    
    # break after no_improv_break iterations with no improvement
    if (verbose) message(paste('...best so far:', best))
    
    if (best < (prev_best - no_improve_thr)) {
      no_improv = 0
      prev_best = best
    } else {
      no_improv = no_improv + 1
    }
    
    if (no_improv >= no_improv_break)
      return(res[1, ])
    
    nr = nrow(res)
    
    # centroid: more efficient than previous double loop
    x0 = colMeans(res[(1:npar), -nc])
    
    # reflection
    xr = x0 + alpha * (x0 - res[nr, -nc])
    
    rscore = f(xr)
    
    if (res[1, 'score'] <= rscore & rscore < res[npar, 'score']) {
      res[nr,] = c(xr, rscore)
      next
    }
    
    # expansion
    if (rscore < res[1, 'score']) {
      xe = x0 + gamma * (xr - x0)
      escore = f(xe)
      if (escore < rscore) {
        res[nr, ] = c(xe, escore)
        next
      } else {
        res[nr, ] = c(xr, rscore)
        next
      }
    }
    
    # contraction
    xc = x0 + rho * (res[nr, -nc] - x0)
    
    cscore = f(xc)
    
    if (cscore < res[nr, 'score']) {
      res[nr,] = c(xc, cscore)
      next
    }
    
    # reduction
    x1 = res[1, -nc]
    
    nres = res
    
    for (i in 1:nr) {
      redx  = x1 + sigma * (res[i, -nc] - x1)
      score = f(redx)
      nres[i, ] = c(redx, score)
    }
    
    res = nres
  }
}


#' ## Example function

f = function(x) {
  sin(x[1]) * cos(x[2]) * (1 / (abs(x[3]) + 1))
}

nelder_mead2(
  f, 
  c(0, 0, 0), 
  max_iter = 1000, 
  no_improve_thr = 1e-12
)

        par_1         par_2         par_3         score 
-1.570797e+00 -2.235577e-07  1.622809e-14 -1.000000e+00

optimx::optimx(
  par = c(0, 0, 0), 
  fn = f, 
  method   = "Nelder-Mead",
  control  = list(
    alpha  = 1,
    gamma  = 2,
    beta   = 0.5,
    maxit  = 1000,
    reltol = 1e-12
  )
)

Warning in max(logpar): aucun argument pour max ; -Inf est renvoyé

Warning in max(logpar): aucun argument trouvé pour min ; Inf est renvoyé

                   p1           p2           p3 value fevals gevals niter
Nelder-Mead -1.570796 1.394018e-08 1.088215e-16    -1    861     NA    NA
            convcode kkt1 kkt2 xtime
Nelder-Mead        0 TRUE TRUE     0

#' ## A Regression Model

set.seed(8675309)
N = 500
npreds = 5
X = cbind(1, matrix(rnorm(N * npreds), ncol = npreds))
beta = runif(ncol(X), -1, 1)
y = X %*% beta + rnorm(nrow(X))

# least squares loss
f = function(b) {
  crossprod(y - X %*% b)[,1]  # if using optimx need scalar
}


lm_par = lm.fit(X, y)$coef

nm_par = nelder_mead2(
  f, 
  runif(ncol(X)),
  max_iter = 2000,
  no_improve_thr = 1e-12
)


#' ## Comparison
#' Compare to `optimx`.

opt_par = optimx::optimx(
  runif(ncol(X)),
  fn = f,
  method   = 'Nelder-Mead',
  control  = list(
    alpha  = 1,
    gamma  = 2,
    beta   = 0.5,
    maxit  = 2000,
    reltol = 1e-12
  )
)[1:(npreds + 1)]

rbind(
  lm = lm_par,
  nm = nm_par,
  optimx = opt_par,
  truth  = beta
)

Warning in rbind(deparse.level, ...): number of columns of result, 6, is not a
multiple of vector length 7 of arg 2

               p1        p2         p3        p4        p5          p6
lm     -0.9621466 0.5943248 0.04864576 0.2757347 0.9752584 -0.07470287
nm     -0.9621510 0.5943270 0.04864183 0.2757265 0.9752524 -0.07470389
optimx -0.9621494 0.5943250 0.04864620 0.2757383 0.9752579 -0.07470054
truth  -0.9087584 0.6195267 0.07358131 0.3196977 0.9561050 -0.07977885