diff options
author | Clement Farabet <clement.farabet@gmail.com> | 2011-09-29 22:59:22 +0400 |
---|---|---|
committer | Clement Farabet <clement.farabet@gmail.com> | 2011-09-29 22:59:22 +0400 |
commit | ba5cb832916589daead581756e489a58bae851b1 (patch) | |
tree | b1c61bed8b7de9119c64d594737d40a6d3c8dbd2 | |
parent | be617aeef103ad45f32c79c53fc4fa511ef5dee9 (diff) | |
parent | 7edad5d81e6c659149c9fb030df572e3a8e6926c (diff) |
Merge branch 'master' of github.com:clementfarabet/lua---nnx
-rw-r--r-- | CGOptimization.lua | 66 | ||||
-rw-r--r-- | LBFGSOptimization.lua | 39 | ||||
-rw-r--r-- | lbfgs.c | 376 | ||||
-rw-r--r-- | lbfgs.h | 866 | ||||
-rw-r--r-- | test/rosenbrock.lua | 50 | ||||
-rw-r--r-- | test/test_cg.lua | 48 | ||||
-rw-r--r-- | test/test_lbfgs.lua | 48 |
7 files changed, 871 insertions, 622 deletions
diff --git a/CGOptimization.lua b/CGOptimization.lua index 7912784..ed276d9 100644 --- a/CGOptimization.lua +++ b/CGOptimization.lua @@ -4,27 +4,65 @@ function CG:__init(...) require 'liblbfgs' parent.__init(self, ...) xlua.unpack_class(self, {...}, - 'CGOptimization', nil, - {arg='maxEvaluation', type='number', - help='maximum nb of function evaluations per pass (0 = no max)', default=0}, - {arg='maxIterations', type='number', - help='maximum nb of iterations per pass (0 = no max)', default=0}, - {arg='maxLineSearch', type='number', - help='maximum nb of steps in line search', default=20}, - {arg='sparsity', type='number', - help='sparsity coef (Orthantwise C)', default=0}, - {arg='parallelize', type='number', - help='parallelize onto N cores (experimental!)', default=1} - ) + 'CGOptimization', nil, + {arg='maxEvaluation', type='number', + help='maximum nb of function evaluations per pass (0 = no max)', default=0}, + {arg='maxIterations', type='number', + help='maximum nb of iterations per pass (0 = no max)', default=0}, + {arg='maxLineSearch', type='number', + help='maximum nb of steps in line search', default=20}, + {arg='sparsity', type='number', + help='sparsity coef (Orthantwise C)', default=0}, + {arg='linesearch', type='string', + help=[[ type of linesearch used: + "morethuente", "m", + "armijo", "a", + "wolfe", "w", + "strong_wolfe", "s" + ]], + default='wolfe'}, + {arg='momentum', type='string', + help=[[ type of momentum used: + "fletcher-reeves", "fr", + "polack-ribiere", "pr", + "hestens-steifel", "hs", + "gilbert-nocedal", "gn" + ]], + default='fletcher-reeves'}, + {arg='parallelize', type='number', + help='parallelize onto N cores (experimental!)', default=1} + ) + local linesearch = 2 + if not (self.linesearch == 'w' or self.linesearch == 'wolfe') then + if self.linesearch == 'm' or self.linesearch == 'morethuente' then + linesearch = 0 + elseif self.linesearch == 'a' or self.linesearch == 'armijo' then + linesearch = 1 + elseif self.linesearch == 's' or self.linesearch == 'strong_wolfe' then + linesearch = 3 + end + end + + local momentum = 0 + if not (self.momentum == 'fr' or self.momentum == 'fletcher-reeves') then + if self.momentum == 'pr' or self.momentum == 'polack-ribiere' then + momentum = 1 + elseif self.momentum == 'hs' or self.momentum == 'hestens-steifel' then + momentum = 2 + elseif self.momentum == 'gn' or self.momentum == 'gilbert-nocedal' then + momentum = 3 + end + end + -- init CG state cg.init(self.parameters, self.gradParameters, self.maxEvaluation, self.maxIterations, self.maxLineSearch, - self.sparsity, self.verbose) + momentum, linesearch, self.verbose) end function CG:optimize() -- callback for lBFGS lbfgs.evaluate = self.evaluate - -- the magic function: will update the parameter vector according to the l-BFGS method + -- the magic function: will update the parameter vector using CG self.output = cg.run() end diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua index 9fd0a3f..53dfe70 100644 --- a/LBFGSOptimization.lua +++ b/LBFGSOptimization.lua @@ -4,22 +4,35 @@ function LBFGS:__init(...) require 'liblbfgs' parent.__init(self, ...) xlua.unpack_class(self, {...}, - 'LBFGSOptimization', nil, - {arg='maxEvaluation', type='number', - help='maximum nb of function evaluations per pass (0 = no max)', default=0}, - {arg='maxIterations', type='number', - help='maximum nb of iterations per pass (0 = no max)', default=0}, - {arg='maxLineSearch', type='number', - help='maximum nb of steps in line search', default=20}, - {arg='sparsity', type='number', - help='sparsity coef (Orthantwise C)', default=0}, - {arg='parallelize', type='number', - help='parallelize onto N cores (experimental!)', default=1} - ) + 'LBFGSOptimization', nil, + {arg='maxEvaluation', type='number', + help='maximum nb of function evaluations per pass (0 = no max)', default=0}, + {arg='maxIterations', type='number', + help='maximum nb of iterations per pass (0 = no max)', default=0}, + {arg='maxLineSearch', type='number', + help='maximum nb of steps in line search', default=20}, + {arg='sparsity', type='number', + help='sparsity coef (Orthantwise C)', default=0}, + {arg='linesearch', type='string', + help='type of linesearch used: morethuente, armijo, wolfe, strong_wolfe', + default='wolfe'}, + {arg='parallelize', type='number', + help='parallelize onto N cores (experimental!)', default=1} + ) + local linesearch = 2 + if not (self.linesearch == 'wolfe') then + if self.linesearch == 'morethuente' then + linesearch = 0 + elseif self.linesearch == 'armijo' then + linesearch = 1 + elseif self.linesearch == 'strong_wolfe' then + linesearch = 3 + end + end -- init LBFGS state lbfgs.init(self.parameters, self.gradParameters, self.maxEvaluation, self.maxIterations, self.maxLineSearch, - self.sparsity, self.verbose) + self.sparsity, linesearch, self.verbose) end function LBFGS:optimize() @@ -107,12 +107,14 @@ struct tag_iteration_data { }; typedef struct tag_iteration_data iteration_data_t; -static const lbfgs_parameter_t _defparam = { +static const lbfgs_parameter_t _def_param = { 6, /* max nb or corrections stored, to estimate hessian */ 1e-5, /* espilon = stop condition on f(x) */ - 0, /* - */ - 1e-5, /* - */ - 0, /* number of complete bfgs iterations (0 = inf) */ + 0, /* - past */ + 1e-5, /* - delta */ + 0, /* number of complete iterations (0 = inf) */ + 0, /* number of function evaluations (0 = inf) */ + 1.0e-16, /* floating-point precision */ LBFGS_LINESEARCH_DEFAULT, /* line search method */ 40, /* max number of trials for line search */ 1e-20, /* min step for line search */ @@ -120,12 +122,14 @@ static const lbfgs_parameter_t _defparam = { 1e-4, /* ftol = granularity for f(x) estimation */ 0.9, /* wolfe */ 0.9, /* gtol = granularity for df/dx estimation */ - 1.0e-16, /* floating-point precision */ 0.0, /* sparsity constraint */ 0, /* sparsity offset */ - -1 /* sparsity end */ + -1, /* sparsity end */ + CG_FLETCHER_REEVES, /* momentum type */ }; + + /* Forward function declarations. */ typedef int (*line_search_proc)( @@ -248,44 +252,11 @@ void lbfgs_free(lbfgsfloatval_t *x) void lbfgs_parameter_init(lbfgs_parameter_t *param) { - memcpy(param, &_defparam, sizeof(*param)); + memcpy(param, &_def_param, sizeof(*param)); } -int lbfgs( - int n, - lbfgsfloatval_t *x, - lbfgsfloatval_t *ptr_fx, - lbfgs_evaluate_t proc_evaluate, - lbfgs_progress_t proc_progress, - void *instance, - lbfgs_parameter_t *_param - ) +int check_params (int n, lbfgs_parameter_t param) { - int ret; - int i, j, k, ls, end, bound; - lbfgsfloatval_t step; - - /* Constant parameters and their default values. */ - lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam; - const int m = param.m; - - lbfgsfloatval_t *xp = NULL; - lbfgsfloatval_t *g = NULL, *gp = NULL, *pg = NULL; - lbfgsfloatval_t *d = NULL, *w = NULL, *pf = NULL; - iteration_data_t *lm = NULL, *it = NULL; - lbfgsfloatval_t ys, yy; - lbfgsfloatval_t xnorm, gnorm, beta; - lbfgsfloatval_t fx = 0.; - lbfgsfloatval_t rate = 0.; - line_search_proc linesearch = line_search_morethuente; - - /* Construct a callback data. */ - callback_data_t cd; - cd.n = n; - cd.instance = instance; - cd.proc_evaluate = proc_evaluate; - cd.proc_progress = proc_progress; - #if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) /* Round out the number of variables. */ n = round_out_variables(n); @@ -336,6 +307,51 @@ int lbfgs( if (param.max_linesearch <= 0) { return LBFGSERR_INVALID_MAXLINESEARCH; } + return 0; +} + +int lbfgs( + int n, + lbfgsfloatval_t *x, + lbfgsfloatval_t *ptr_fx, + lbfgs_evaluate_t proc_evaluate, + lbfgs_progress_t proc_progress, + void *instance, + lbfgs_parameter_t *_param + ) +{ + int ret; + int i, j, k, ls, end, bound; + lbfgsfloatval_t step; + + /* Constant parameters and their default values. */ + lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _def_param; + const int m = param.m; + + lbfgsfloatval_t *xp = NULL; + lbfgsfloatval_t *g = NULL, *gp = NULL, *pg = NULL; + lbfgsfloatval_t *d = NULL, *w = NULL, *pf = NULL; + iteration_data_t *lm = NULL, *it = NULL; + lbfgsfloatval_t ys, yy; + lbfgsfloatval_t xnorm, gnorm, beta; + lbfgsfloatval_t fx = 0.; + lbfgsfloatval_t rate = 0.; + line_search_proc linesearch = line_search_morethuente; + + /* Construct a callback data. */ + callback_data_t cd; + cd.n = n; + cd.instance = instance; + cd.proc_evaluate = proc_evaluate; + cd.proc_progress = proc_progress; + + /* check common params */ + ret = check_params(n,param); + if (ret < 0) { + return ret; + } + + /* check params specific to lbfgs() */ if (param.orthantwise_c < 0.) { return LBFGSERR_INVALID_ORTHANTWISE; } @@ -513,10 +529,11 @@ int lbfgs( } /* Count number of function evaluations */ - if ((maxEval != 0)&&(nEvaluation > maxEval)) { + if ((param.max_evaluations != 0)&&(nEvaluation > param.max_evaluations)) { if (verbose > 1){ printf("Stopping b/c exceeded max number of function evaluations\n"); } + ret = LBFGSERR_MAXIMUMEVALUATION; goto lbfgs_exit; } /* @@ -676,6 +693,7 @@ int lbfgs( return ret; } + int cg( int n, lbfgsfloatval_t *x, @@ -691,15 +709,14 @@ int cg( lbfgsfloatval_t step; /* Constant parameters and their default values. */ - lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _defparam; - const int m = param.m; + lbfgs_parameter_t param = (_param != NULL) ? (*_param) : _def_param; lbfgsfloatval_t *xp = NULL; lbfgsfloatval_t *g = NULL, *gp = NULL, *pg = NULL; lbfgsfloatval_t *d = NULL, *dp = NULL, *w = NULL, *pf = NULL; - iteration_data_t *lm = NULL, *it = NULL; + lbfgsfloatval_t *tmp = NULL; lbfgsfloatval_t xnorm, gnorm; - lbfgsfloatval_t B, g0dot, g1dot; + lbfgsfloatval_t B, gptgp, gtg, gtgp, gnum, gden, B_FR, B_PR; lbfgsfloatval_t fx = 0.; lbfgsfloatval_t rate = 0.; line_search_proc linesearch = line_search_morethuente; @@ -711,92 +728,29 @@ int cg( cd.proc_evaluate = proc_evaluate; cd.proc_progress = proc_progress; -#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) - /* Round out the number of variables. */ - n = round_out_variables(n); -#endif/*defined(USE_SSE)*/ - - /* Check the input parameters for errors. */ - if (n <= 0) { - return LBFGSERR_INVALID_N; + /* check common params */ + ret = check_params(n,param); + if (ret < 0) { + return ret; } -#if defined(USE_SSE) && (defined(__SSE__) || defined(__SSE2__)) - if (n % 8 != 0) { - return LBFGSERR_INVALID_N_SSE; + /* check CG specific params */ + if (param.momentum < 0 || param.momentum > 3 ){ + return CGERR_INVALID_MOMENTUM; } - if ((uintptr_t)(const void*)x % 16 != 0) { - return LBFGSERR_INVALID_X_SSE; - } -#endif/*defined(USE_SSE)*/ - if (param.epsilon < 0.) { - return LBFGSERR_INVALID_EPSILON; - } - if (param.past < 0) { - return LBFGSERR_INVALID_TESTPERIOD; - } - if (param.delta < 0.) { - return LBFGSERR_INVALID_DELTA; - } - if (param.min_step < 0.) { - return LBFGSERR_INVALID_MINSTEP; - } - if (param.max_step < param.min_step) { - return LBFGSERR_INVALID_MAXSTEP; - } - if (param.ftol < 0.) { - return LBFGSERR_INVALID_FTOL; - } - if (param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_WOLFE || - param.linesearch == LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE) { - if (param.wolfe <= param.ftol || 1. <= param.wolfe) { - return LBFGSERR_INVALID_WOLFE; - } - } - if (param.gtol < 0.) { - return LBFGSERR_INVALID_GTOL; - } - if (param.xtol < 0.) { - return LBFGSERR_INVALID_XTOL; - } - if (param.max_linesearch <= 0) { - return LBFGSERR_INVALID_MAXLINESEARCH; - } - if (param.orthantwise_c < 0.) { - return LBFGSERR_INVALID_ORTHANTWISE; - } - if (param.orthantwise_start < 0 || n < param.orthantwise_start) { - return LBFGSERR_INVALID_ORTHANTWISE_START; - } - if (param.orthantwise_end < 0) { - param.orthantwise_end = n; - } - if (n < param.orthantwise_end) { - return LBFGSERR_INVALID_ORTHANTWISE_END; - } - if (param.orthantwise_c != 0.) { - switch (param.linesearch) { - case LBFGS_LINESEARCH_BACKTRACKING: - linesearch = line_search_backtracking_owlqn; - break; - default: - /* Only the backtracking method is available. */ - return LBFGSERR_INVALID_LINESEARCH; - } - } else { - switch (param.linesearch) { - case LBFGS_LINESEARCH_MORETHUENTE: - linesearch = line_search_morethuente; - break; - case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: - case LBFGS_LINESEARCH_BACKTRACKING_WOLFE: - case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: - linesearch = line_search_backtracking; - break; - default: - return LBFGSERR_INVALID_LINESEARCH; - } + switch (param.linesearch) { + case LBFGS_LINESEARCH_MORETHUENTE: + linesearch = line_search_morethuente; + break; + case LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: + case LBFGS_LINESEARCH_BACKTRACKING_WOLFE: + case LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: + linesearch = line_search_backtracking; + break; + default: + return LBFGSERR_INVALID_LINESEARCH; } + /* Allocate working space. */ xp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); g = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); @@ -804,7 +758,9 @@ int cg( d = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); dp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); w = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); - if (xp == NULL || g == NULL || gp == NULL || d == NULL || w == NULL) { + tmp = (lbfgsfloatval_t*)vecalloc(n * sizeof(lbfgsfloatval_t)); + if (xp == NULL || g == NULL || gp == NULL || + d == NULL || dp == NULL || w == NULL || tmp == NULL) { ret = LBFGSERR_OUTOFMEMORY; goto lbfgs_exit; } @@ -818,7 +774,7 @@ int cg( fx = cd.proc_evaluate(cd.instance, x, g, cd.n, 0); /* used to compute the momentum term for CG */ - vecdot(&g0dot,g,g,n); + vecdot(>g,g,g,n); /* Store the initial value of the objective function. */ if (pf != NULL) { @@ -844,8 +800,7 @@ int cg( } /* Compute the initial step: - step = 1.0 / sqrt(vecdot(d, d, n)) - Do we want to do this for CG? + 1 / |d| + 1 */ vec2norminv(&step, d, n); @@ -944,19 +899,60 @@ int cg( break; } - /* compute 'momentum' term */ - vecdot(&g1dot, g, g, n); - B = g1dot / g0dot; - /* store val for next iteration */ - g0dot = g1dot; + /* compute 'momentum' term (following min func) */ + if (param.momentum != CG_HESTENES_STIEFEL) { + vecdot(>g, g, g, n); + } + switch(param.momentum) { + case CG_FLETCHER_REEVES : + /* B = (g'*g)/(gp'*gp) */ + B = gtg / gptgp; + break; + case CG_POLAK_RIBIERE : + /* B = (g'*(g-gp)) /(gp'*gp);*/ + vecdiff(tmp,g,gp,n); + vecdot(&gnum,g,tmp,n); + B = gnum / gptgp; + break; + case CG_HESTENES_STIEFEL : + /* B = (g'*(g-gp))/((g-gp)'*d); */ + vecdiff(tmp,g,gp,n); + vecdot(&gnum,g,tmp,n); + vecdot(&gden,tmp,d,n); + B = gnum / gden; + break; + case CG_GILBERT_NOCEDAL : + /* B_FR = (g'*(g-gp)) /(gp'*gp); */ + /* B_PR = (g'*g-(g'*gp))/(gp'*gp); */ + /* B = max(-B_FR,min(B_PR,B_FR)); */ + vecdiff(tmp,g,gp,n); /* g-gp */ + vecdot(&gnum,g,tmp,n); /* g'*(g-gp) */ + B_FR = gnum / gptgp; /* (g'*(g-gp)) /(gp'*gp) */ + vecdot(>gp,g,gp,n); /* g'*gp */ + gnum = gtg - gtgp; /* g'*g-(g'*gp) */ + B_PR = gnum / gptgp; /* (g'*g-(g'*gp))/(gp'*gp) */ + B = max2(-B_FR,min2(B_PR,B_FR)); + break; + default : + ret = CGERR_INVALID_MOMENTUM; + break; + } + if (param.momentum != CG_HESTENES_STIEFEL) { + /* store val for next iteration */ + gptgp = gtg; + } /* Compute the steepest direction. */ /* Compute the negative of gradients. */ vecncpy(d, g, n); + /* add the 'momentum' term */ /* d_1 = -g_1 + B*d_0 */ vecadd(d, dp, B, n); - + + /* increment the number of iterations */ + ++k; + /* Now the search direction d is ready. We try step = 1 first. */ @@ -970,27 +966,18 @@ int cg( } vecfree(pf); - - /* Free memory blocks used by this function. */ - if (lm != NULL) { - for (i = 0;i < m;++i) { - vecfree(lm[i].s); - vecfree(lm[i].y); - } - vecfree(lm); - } vecfree(pg); vecfree(w); vecfree(d); vecfree(gp); vecfree(g); vecfree(xp); + vecfree(dp); + vecfree(tmp); return ret; } - - static int line_search_backtracking( int n, lbfgsfloatval_t *x, @@ -1640,10 +1627,6 @@ static int update_trial_interval( return 0; } - - - - static lbfgsfloatval_t owlqn_x1norm( const lbfgsfloatval_t* x, const int start, @@ -1792,15 +1775,15 @@ static lbfgsfloatval_t evaluate(void *instance, } static int cg_progress(void *instance, - const lbfgsfloatval_t *x, - const lbfgsfloatval_t *g, - const lbfgsfloatval_t fx, - const lbfgsfloatval_t xnorm, - const lbfgsfloatval_t gnorm, - const lbfgsfloatval_t step, - int n, - int k, - int ls) + const lbfgsfloatval_t *x, + const lbfgsfloatval_t *g, + const lbfgsfloatval_t fx, + const lbfgsfloatval_t xnorm, + const lbfgsfloatval_t gnorm, + const lbfgsfloatval_t step, + int n, + int k, + int ls) { nIteration = k; if (verbose > 1) { @@ -1813,15 +1796,15 @@ static int cg_progress(void *instance, } static int lbfgs_progress(void *instance, - const lbfgsfloatval_t *x, - const lbfgsfloatval_t *g, - const lbfgsfloatval_t fx, - const lbfgsfloatval_t xnorm, - const lbfgsfloatval_t gnorm, - const lbfgsfloatval_t step, - int n, - int k, - int ls) + const lbfgsfloatval_t *x, + const lbfgsfloatval_t *g, + const lbfgsfloatval_t fx, + const lbfgsfloatval_t xnorm, + const lbfgsfloatval_t gnorm, + const lbfgsfloatval_t step, + int n, + int k, + int ls) { nIteration = k; if (verbose > 1) { @@ -1833,6 +1816,38 @@ static int lbfgs_progress(void *instance, return 0; } +int lbfgs_init(lua_State *L){ + /* initialize the parameters for the L-BFGS optimization */ + lbfgs_parameter_init(&lbfgs_param); + lbfgs_param.max_evaluations = lua_tonumber(L, 3); + lbfgs_param.max_iterations = lua_tonumber(L, 4); + lbfgs_param.max_linesearch = lua_tonumber(L, 5); + lbfgs_param.orthantwise_c = lua_tonumber(L, 6); + lbfgs_param.linesearch = lua_tonumber(L, 7); + /* get verbose level */ + verbose = lua_tonumber(L,8); + /* now load the common parameter and gradient vectors */ + init(L); + + return 0; +} + +int cg_init(lua_State *L){ + /* initialize the parameters for the L-BFGS optimization */ + lbfgs_parameter_init(&lbfgs_param); + lbfgs_param.max_evaluations = lua_tonumber(L, 3); + lbfgs_param.max_iterations = lua_tonumber(L, 4); + lbfgs_param.max_linesearch = lua_tonumber(L, 5); + lbfgs_param.momentum = lua_tonumber(L, 6); + lbfgs_param.linesearch = lua_tonumber(L, 7); + /* get verbose level */ + verbose = lua_tonumber(L,8); + /* now load the common parameter and gradient vectors */ + init(L); + + return 0; +} + int init(lua_State *L) { /* get params from userspace */ GL = L; @@ -1868,7 +1883,9 @@ int init(lua_State *L) { } #endif else - luaL_typerror(L,1,"torch.*Tensor"); + { + luaL_typerror(L,1,"torch.*Tensor"); + } /* parameters for algorithm */ nEvaluation = 0; @@ -1884,15 +1901,6 @@ int init(lua_State *L) { THCudaTensor_copy_init(x,(THCudaTensor *)parameters,nParameter); #endif - /* initialize the parameters for the L-BFGS optimization */ - lbfgs_parameter_init(&lbfgs_param); - maxEval = lua_tonumber(L,3); - lbfgs_param.max_iterations = lua_tonumber(L, 4); - lbfgs_param.max_linesearch = lua_tonumber(L, 5); - lbfgs_param.linesearch = LBFGS_LINESEARCH_BACKTRACKING; - lbfgs_param.orthantwise_c = lua_tonumber(L, 6); - /* get verbose level */ - verbose = lua_tonumber(L,7); /* done */ return 0; @@ -1949,6 +1957,8 @@ int cg_run(lua_State *L) { printf(" + f(X) = %f\n", fx); printf(" + X = [%f , ... %f]\n",x[0],x[nParameter-1]); printf(" + nb evaluations = %d\n", nEvaluation); + printf(" + linesearch = %d , momentum = %d\n", + lbfgs_param.linesearch, lbfgs_param.momentum); } /* return current error */ @@ -1958,14 +1968,14 @@ int cg_run(lua_State *L) { static const struct luaL_Reg cg_methods__ [] = { /* init and clear are the same methods */ - {"init", init}, + {"init", cg_init}, {"clear", clear}, {"run", cg_run}, {NULL, NULL} }; static const struct luaL_Reg lbfgs_methods__ [] = { - {"init", init}, + {"init", lbfgs_init}, {"clear", clear}, {"run", lbfgs_run}, {NULL, NULL} @@ -33,25 +33,25 @@ extern "C" { #endif/*__cplusplus*/ -/* - * The default precision of floating point values is 64bit (double). - */ + /* + * The default precision of floating point values is 64bit (double). + */ #ifndef LBFGS_FLOAT #define LBFGS_FLOAT 64 #endif/*LBFGS_FLOAT*/ -/* - * Activate optimization routines for IEEE754 floating point values. - */ + /* + * Activate optimization routines for IEEE754 floating point values. + */ #ifndef LBFGS_IEEE_FLOAT #define LBFGS_IEEE_FLOAT 1 #endif/*LBFGS_IEEE_FLOAT*/ #if LBFGS_FLOAT == 32 -typedef float lbfgsfloatval_t; + typedef float lbfgsfloatval_t; #elif LBFGS_FLOAT == 64 -typedef double lbfgsfloatval_t; + typedef double lbfgsfloatval_t; #else #error "libLBFGS supports single (float; LBFGS_FLOAT = 32) or double (double; LBFGS_FLOAT=64) precision only." @@ -59,19 +59,19 @@ typedef double lbfgsfloatval_t; #endif -/** - * \addtogroup liblbfgs_api libLBFGS API - * @{ - * - * The libLBFGS API. - */ + /** + * \addtogroup liblbfgs_api libLBFGS API + * @{ + * + * The libLBFGS API. + */ -/** - * Return values of lbfgs(). - * - * Roughly speaking, a negative value indicates an error. - */ -enum { + /** + * Return values of lbfgs(). + * + * Roughly speaking, a negative value indicates an error. + */ + enum { /** L-BFGS reaches convergence. */ LBFGS_SUCCESS = 0, LBFGS_CONVERGENCE = 0, @@ -137,6 +137,8 @@ enum { LBFGSERR_MAXIMUMLINESEARCH, /** The algorithm routine reaches the maximum number of iterations. */ LBFGSERR_MAXIMUMITERATION, + /** The algorithm routine reaches the maximum number of iterations. */ + LBFGSERR_MAXIMUMEVALUATION, /** Relative width of the interval of uncertainty is at most lbfgs_parameter_t::xtol. */ LBFGSERR_WIDTHTOOSMALL, @@ -144,12 +146,14 @@ enum { LBFGSERR_INVALIDPARAMETERS, /** The current search direction increases the objective function value. */ LBFGSERR_INCREASEGRADIENT, -}; - -/** - * Line search algorithms. - */ -enum { + /** Invalid momentum type for CG */ + CGERR_INVALID_MOMENTUM, + }; + + /** + * Line search algorithms. + */ + enum { /** The default algorithm (MoreThuente method). */ LBFGS_LINESEARCH_DEFAULT = 0, /** MoreThuente method proposd by More and Thuente. */ @@ -188,14 +192,36 @@ enum { * a is the step length. */ LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE = 3, -}; - -/** - * L-BFGS optimization parameters. - * Call lbfgs_parameter_init() function to initialize parameters to the - * default values. - */ -typedef struct { + }; + + /** + * CG momentum terms + */ + + enum { + /* beta = (g'*g)/(gotgo); */ + CG_DEFAULT = 0, + CG_FLETCHER_REEVES = 0, + + /* beta = (g'*(g-g_old)) /(gotgo);*/ + CG_POLAK_RIBIERE = 1, + + /* beta = (g'*(g-g_old))/((g-g_old)'*d); */ + CG_HESTENES_STIEFEL = 2, + + /* beta_FR = (g'*(g-g_old)) /(gotgo); */ + /* beta_PR = (g'*g-gtgo)/(gotgo); */ + /* beta = max(-beta_FR,min(beta_PR,beta_FR)); */ + CG_GILBERT_NOCEDAL = 3, + }; + + + /** + * L-BFGS optimization parameters. + * Call lbfgs_parameter_init() function to initialize parameters to the + * default values. + */ + typedef struct { /** * The number of corrections to approximate the inverse hessian matrix. * The L-BFGS routine stores the computation results of previous \ref m @@ -248,6 +274,23 @@ typedef struct { int max_iterations; /** + * The maximum number of function evaluations. + * The lbfgs() function terminates an optimization process with + * ::LBFGSERR_MAXIMUMEVALUATION status code when the iteration count + * exceedes this parameter. Setting this parameter to zero continues an + * optimization process until a convergence or error. The default value + * is \c 0. + */ + int max_evaluations; + /** + * The machine precision for floating-point values. + * This parameter must be a positive value set by a client program to + * estimate the machine precision. The line search routine will terminate + * with the status code (::LBFGSERR_ROUNDING_ERROR) if the relative width + * of the interval of uncertainty is less than this parameter. + */ + lbfgsfloatval_t xtol; + /** * The line search algorithm. * This parameter specifies a line search algorithm to be used by the * L-BFGS routine. @@ -310,15 +353,6 @@ typedef struct { lbfgsfloatval_t gtol; /** - * The machine precision for floating-point values. - * This parameter must be a positive value set by a client program to - * estimate the machine precision. The line search routine will terminate - * with the status code (::LBFGSERR_ROUNDING_ERROR) if the relative width - * of the interval of uncertainty is less than this parameter. - */ - lbfgsfloatval_t xtol; - - /** * Coeefficient for the L1 norm of variables. * This parameter should be set to zero for standard minimization * problems. Setting this parameter to a positive value activates @@ -355,167 +389,175 @@ typedef struct { * L1 norm of the variables x, */ int orthantwise_end; -} lbfgs_parameter_t; -/** - * Callback interface to provide objective function and gradient evaluations. - * - * The lbfgs() function call this function to obtain the values of objective - * function and its gradients when needed. A client program must implement - * this function to evaluate the values of the objective function and its - * gradients, given current values of variables. - * - * @param instance The user data sent for lbfgs() function by the client. - * @param x The current values of variables. - * @param g The gradient vector. The callback function must compute - * the gradient values for the current variables. - * @param n The number of variables. - * @param step The current step of the line search routine. - * @retval lbfgsfloatval_t The value of the objective function for the current - * variables. - */ -typedef lbfgsfloatval_t (*lbfgs_evaluate_t)( - void *instance, - const lbfgsfloatval_t *x, - lbfgsfloatval_t *g, - const int n, - const lbfgsfloatval_t step - ); - -/** - * Callback interface to receive the progress of the optimization process. - * - * The lbfgs() function call this function for each iteration. Implementing - * this function, a client program can store or display the current progress - * of the optimization process. - * - * @param instance The user data sent for lbfgs() function by the client. - * @param x The current values of variables. - * @param g The current gradient values of variables. - * @param fx The current value of the objective function. - * @param xnorm The Euclidean norm of the variables. - * @param gnorm The Euclidean norm of the gradients. - * @param step The line-search step used for this iteration. - * @param n The number of variables. - * @param k The iteration count. - * @param ls The number of evaluations called for this iteration. - * @retval int Zero to continue the optimization process. Returning a - * non-zero value will cancel the optimization process. - */ -typedef int (*lbfgs_progress_t)( - void *instance, - const lbfgsfloatval_t *x, - const lbfgsfloatval_t *g, - const lbfgsfloatval_t fx, - const lbfgsfloatval_t xnorm, - const lbfgsfloatval_t gnorm, - const lbfgsfloatval_t step, - int n, - int k, - int ls - ); - -/* -A user must implement a function compatible with ::lbfgs_evaluate_t (evaluation -callback) and pass the pointer to the callback function to lbfgs() arguments. -Similarly, a user can implement a function compatible with ::lbfgs_progress_t -(progress callback) to obtain the current progress (e.g., variables, function -value, ||G||, etc) and to cancel the iteration process if necessary. -Implementation of a progress callback is optional: a user can pass \c NULL if -progress notification is not necessary. - -In addition, a user must preserve two requirements: + /** + * momentum criteria : by what value do you multiply the previous + * gradient to the current: CG_FLETCHER_REEVES,etc. + */ + int momentum; + + } lbfgs_parameter_t; + + + /** + * Callback interface to provide objective function and gradient evaluations. + * + * The lbfgs() function call this function to obtain the values of objective + * function and its gradients when needed. A client program must implement + * this function to evaluate the values of the objective function and its + * gradients, given current values of variables. + * + * @param instance The user data sent for lbfgs() function by the client. + * @param x The current values of variables. + * @param g The gradient vector. The callback function must compute + * the gradient values for the current variables. + * @param n The number of variables. + * @param step The current step of the line search routine. + * @retval lbfgsfloatval_t The value of the objective function for the current + * variables. + */ + typedef lbfgsfloatval_t (*lbfgs_evaluate_t)( + void *instance, + const lbfgsfloatval_t *x, + lbfgsfloatval_t *g, + const int n, + const lbfgsfloatval_t step + ); + + /** + * Callback interface to receive the progress of the optimization process. + * + * The lbfgs() function call this function for each iteration. Implementing + * this function, a client program can store or display the current progress + * of the optimization process. + * + * @param instance The user data sent for lbfgs() function by the client. + * @param x The current values of variables. + * @param g The current gradient values of variables. + * @param fx The current value of the objective function. + * @param xnorm The Euclidean norm of the variables. + * @param gnorm The Euclidean norm of the gradients. + * @param step The line-search step used for this iteration. + * @param n The number of variables. + * @param k The iteration count. + * @param ls The number of evaluations called for this iteration. + * @retval int Zero to continue the optimization process. Returning a + * non-zero value will cancel the optimization process. + */ + typedef int (*lbfgs_progress_t)( + void *instance, + const lbfgsfloatval_t *x, + const lbfgsfloatval_t *g, + const lbfgsfloatval_t fx, + const lbfgsfloatval_t xnorm, + const lbfgsfloatval_t gnorm, + const lbfgsfloatval_t step, + int n, + int k, + int ls + ); + + /* + A user must implement a function compatible with ::lbfgs_evaluate_t (evaluation + callback) and pass the pointer to the callback function to lbfgs() arguments. + Similarly, a user can implement a function compatible with ::lbfgs_progress_t + (progress callback) to obtain the current progress (e.g., variables, function + value, ||G||, etc) and to cancel the iteration process if necessary. + Implementation of a progress callback is optional: a user can pass \c NULL if + progress notification is not necessary. + + In addition, a user must preserve two requirements: - The number of variables must be multiples of 16 (this is not 4). - The memory block of variable array ::x must be aligned to 16. -This algorithm terminates an optimization -when: + This algorithm terminates an optimization + when: ||G|| < \epsilon \cdot \max(1, ||x||) . -In this formula, ||.|| denotes the Euclidean norm. -*/ - -/** - * Start a L-BFGS optimization. - * - * @param n The number of variables. - * @param x The array of variables. A client program can set - * default values for the optimization and receive the - * optimization result through this array. This array - * must be allocated by ::lbfgs_malloc function - * for libLBFGS built with SSE/SSE2 optimization routine - * enabled. The library built without SSE/SSE2 - * optimization does not have such a requirement. - * @param ptr_fx The pointer to the variable that receives the final - * value of the objective function for the variables. - * This argument can be set to \c NULL if the final - * value of the objective function is unnecessary. - * @param proc_evaluate The callback function to provide function and - * gradient evaluations given a current values of - * variables. A client program must implement a - * callback function compatible with \ref - * lbfgs_evaluate_t and pass the pointer to the - * callback function. - * @param proc_progress The callback function to receive the progress - * (the number of iterations, the current value of - * the objective function) of the minimization - * process. This argument can be set to \c NULL if - * a progress report is unnecessary. - * @param instance A user data for the client program. The callback - * functions will receive the value of this argument. - * @param param The pointer to a structure representing parameters for - * L-BFGS optimization. A client program can set this - * parameter to \c NULL to use the default parameters. - * Call lbfgs_parameter_init() function to fill a - * structure with the default values. - * @retval int The status code. This function returns zero if the - * minimization process terminates without an error. A - * non-zero value indicates an error. - */ -int lbfgs( - int n, - lbfgsfloatval_t *x, - lbfgsfloatval_t *ptr_fx, - lbfgs_evaluate_t proc_evaluate, - lbfgs_progress_t proc_progress, - void *instance, - lbfgs_parameter_t *param - ); - -/** - * Initialize L-BFGS parameters to the default values. - * - * Call this function to fill a parameter structure with the default values - * and overwrite parameter values if necessary. - * - * @param param The pointer to the parameter structure. - */ -void lbfgs_parameter_init(lbfgs_parameter_t *param); - -/** - * Allocate an array for variables. - * - * This function allocates an array of variables for the convenience of - * ::lbfgs function; the function has a requreiemt for a variable array - * when libLBFGS is built with SSE/SSE2 optimization routines. A user does - * not have to use this function for libLBFGS built without SSE/SSE2 - * optimization. - * - * @param n The number of variables. - */ -lbfgsfloatval_t* lbfgs_malloc(int n); - -/** - * Free an array of variables. - * - * @param x The array of variables allocated by ::lbfgs_malloc - * function. - */ -void lbfgs_free(lbfgsfloatval_t *x); - -/** @} */ + In this formula, ||.|| denotes the Euclidean norm. + */ + + /** + * Start a L-BFGS optimization. + * + * @param n The number of variables. + * @param x The array of variables. A client program can set + * default values for the optimization and receive the + * optimization result through this array. This array + * must be allocated by ::lbfgs_malloc function + * for libLBFGS built with SSE/SSE2 optimization routine + * enabled. The library built without SSE/SSE2 + * optimization does not have such a requirement. + * @param ptr_fx The pointer to the variable that receives the final + * value of the objective function for the variables. + * This argument can be set to \c NULL if the final + * value of the objective function is unnecessary. + * @param proc_evaluate The callback function to provide function and + * gradient evaluations given a current values of + * variables. A client program must implement a + * callback function compatible with \ref + * lbfgs_evaluate_t and pass the pointer to the + * callback function. + * @param proc_progress The callback function to receive the progress + * (the number of iterations, the current value of + * the objective function) of the minimization + * process. This argument can be set to \c NULL if + * a progress report is unnecessary. + * @param instance A user data for the client program. The callback + * functions will receive the value of this argument. + * @param param The pointer to a structure representing parameters for + * L-BFGS optimization. A client program can set this + * parameter to \c NULL to use the default parameters. + * Call lbfgs_parameter_init() function to fill a + * structure with the default values. + * @retval int The status code. This function returns zero if the + * minimization process terminates without an error. A + * non-zero value indicates an error. + */ + int lbfgs( + int n, + lbfgsfloatval_t *x, + lbfgsfloatval_t *ptr_fx, + lbfgs_evaluate_t proc_evaluate, + lbfgs_progress_t proc_progress, + void *instance, + lbfgs_parameter_t *param + ); + + /** + * Initialize L-BFGS parameters to the default values. + * + * Call this function to fill a parameter structure with the default values + * and overwrite parameter values if necessary. + * + * @param param The pointer to the parameter structure. + */ + void lbfgs_parameter_init(lbfgs_parameter_t *param); + + /** + * Allocate an array for variables. + * + * This function allocates an array of variables for the convenience of + * ::lbfgs function; the function has a requreiemt for a variable array + * when libLBFGS is built with SSE/SSE2 optimization routines. A user does + * not have to use this function for libLBFGS built without SSE/SSE2 + * optimization. + * + * @param n The number of variables. + */ + lbfgsfloatval_t* lbfgs_malloc(int n); + + /** + * Free an array of variables. + * + * @param x The array of variables allocated by ::lbfgs_malloc + * function. + */ + void lbfgs_free(lbfgsfloatval_t *x); + + /** @} */ #ifdef __cplusplus } @@ -524,222 +566,222 @@ void lbfgs_free(lbfgsfloatval_t *x); /** -@mainpage libLBFGS: a library of Limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) - -@section intro Introduction - -This library is a C port of the implementation of Limited-memory -Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal. -The original FORTRAN source code is available at: -http://www.ece.northwestern.edu/~nocedal/lbfgs.html - -The L-BFGS method solves the unconstrainted minimization problem, - -<pre> - minimize F(x), x = (x1, x2, ..., xN), -</pre> - -only if the objective function F(x) and its gradient G(x) are computable. The -well-known Newton's method requires computation of the inverse of the hessian -matrix of the objective function. However, the computational cost for the -inverse hessian matrix is expensive especially when the objective function -takes a large number of variables. The L-BFGS method iteratively finds a -minimizer by approximating the inverse hessian matrix by information from last -m iterations. This innovation saves the memory storage and computational time -drastically for large-scaled problems. - -Among the various ports of L-BFGS, this library provides several features: -- <b>Optimization with L1-norm (Orthant-Wise Limited-memory Quasi-Newton - (OWL-QN) method)</b>: - In addition to standard minimization problems, the library can minimize - a function F(x) combined with L1-norm |x| of the variables, - {F(x) + C |x|}, where C is a constant scalar parameter. This feature is - useful for estimating parameters of sparse log-linear models (e.g., - logistic regression and maximum entropy) with L1-regularization (or - Laplacian prior). -- <b>Clean C code</b>: - Unlike C codes generated automatically by f2c (Fortran 77 into C converter), - this port includes changes based on my interpretations, improvements, - optimizations, and clean-ups so that the ported code would be well-suited - for a C code. In addition to comments inherited from the original code, - a number of comments were added through my interpretations. -- <b>Callback interface</b>: - The library receives function and gradient values via a callback interface. - The library also notifies the progress of the optimization by invoking a - callback function. In the original implementation, a user had to set - function and gradient values every time the function returns for obtaining - updated values. -- <b>Thread safe</b>: - The library is thread-safe, which is the secondary gain from the callback - interface. -- <b>Cross platform.</b> The source code can be compiled on Microsoft Visual - Studio 2010, GNU C Compiler (gcc), etc. -- <b>Configurable precision</b>: A user can choose single-precision (float) - or double-precision (double) accuracy by changing ::LBFGS_FLOAT macro. -- <b>SSE/SSE2 optimization</b>: - This library includes SSE/SSE2 optimization (written in compiler intrinsics) - for vector arithmetic operations on Intel/AMD processors. The library uses - SSE for float values and SSE2 for double values. The SSE/SSE2 optimization - routine is disabled by default. - -This library is used by: -- <a href="http://www.chokkan.org/software/crfsuite/">CRFsuite: A fast implementation of Conditional Random Fields (CRFs)</a> -- <a href="http://www.chokkan.org/software/classias/">Classias: A collection of machine-learning algorithms for classification</a> -- <a href="http://www.public.iastate.edu/~gdancik/mlegp/">mlegp: an R package for maximum likelihood estimates for Gaussian processes</a> -- <a href="http://infmath.uibk.ac.at/~matthiasf/imaging2/">imaging2: the imaging2 class library</a> -- <a href="http://search.cpan.org/~laye/Algorithm-LBFGS-0.16/">Algorithm::LBFGS - Perl extension for L-BFGS</a> -- <a href="http://www.cs.kuleuven.be/~bernd/yap-lbfgs/">YAP-LBFGS (an interface to call libLBFGS from YAP Prolog)</a> - -@section download Download - -- <a href="https://github.com/downloads/chokkan/liblbfgs/liblbfgs-1.10.tar.gz">Source code</a> -- <a href="https://github.com/chokkan/liblbfgs">GitHub repository</a> - -libLBFGS is distributed under the term of the -<a href="http://opensource.org/licenses/mit-license.php">MIT license</a>. - -@section changelog History -- Version 1.10 (2010-12-22): - - Fixed compiling errors on Mac OS X; this patch was kindly submitted by - Nic Schraudolph. - - Reduced compiling warnings on Mac OS X; this patch was kindly submitted - by Tamas Nepusz. - - Replaced memalign() with posix_memalign(). - - Updated solution and project files for Microsoft Visual Studio 2010. -- Version 1.9 (2010-01-29): - - Fixed a mistake in checking the validity of the parameters "ftol" and - "wolfe"; this was discovered by Kevin S. Van Horn. -- Version 1.8 (2009-07-13): - - Accepted the patch submitted by Takashi Imamichi; - the backtracking method now has three criteria for choosing the step - length: - - ::LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: sufficient decrease (Armijo) - condition only - - ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE: regular Wolfe condition - (sufficient decrease condition + curvature condition) - - ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: strong Wolfe condition - - Updated the documentation to explain the above three criteria. -- Version 1.7 (2009-02-28): - - Improved OWL-QN routines for stability. - - Removed the support of OWL-QN method in MoreThuente algorithm because - it accidentally fails in early stages of iterations for some objectives. - Because of this change, <b>the OW-LQN method must be used with the - backtracking algorithm (::LBFGS_LINESEARCH_BACKTRACKING)</b>, or the - library returns ::LBFGSERR_INVALID_LINESEARCH. - - Renamed line search algorithms as follows: - - ::LBFGS_LINESEARCH_BACKTRACKING: regular Wolfe condition. - - ::LBFGS_LINESEARCH_BACKTRACKING_LOOSE: regular Wolfe condition. - - ::LBFGS_LINESEARCH_BACKTRACKING_STRONG: strong Wolfe condition. - - Source code clean-up. -- Version 1.6 (2008-11-02): - - Improved line-search algorithm with strong Wolfe condition, which was - contributed by Takashi Imamichi. This routine is now default for - ::LBFGS_LINESEARCH_BACKTRACKING. The previous line search algorithm - with regular Wolfe condition is still available as - ::LBFGS_LINESEARCH_BACKTRACKING_LOOSE. - - Configurable stop index for L1-norm computation. A member variable - ::lbfgs_parameter_t::orthantwise_end was added to specify the index - number at which the library stops computing the L1 norm of the - variables. This is useful to prevent some variables from being - regularized by the OW-LQN method. - - A sample program written in C++ (sample/sample.cpp). -- Version 1.5 (2008-07-10): - - Configurable starting index for L1-norm computation. A member variable - ::lbfgs_parameter_t::orthantwise_start was added to specify the index - number from which the library computes the L1 norm of the variables. - This is useful to prevent some variables from being regularized by the - OWL-QN method. - - Fixed a zero-division error when the initial variables have already - been a minimizer (reported by Takashi Imamichi). In this case, the - library returns ::LBFGS_ALREADY_MINIMIZED status code. - - Defined ::LBFGS_SUCCESS status code as zero; removed unused constants, - LBFGSFALSE and LBFGSTRUE. - - Fixed a compile error in an implicit down-cast. -- Version 1.4 (2008-04-25): - - Configurable line search algorithms. A member variable - ::lbfgs_parameter_t::linesearch was added to choose either MoreThuente - method (::LBFGS_LINESEARCH_MORETHUENTE) or backtracking algorithm - (::LBFGS_LINESEARCH_BACKTRACKING). - - Fixed a bug: the previous version did not compute psuedo-gradients - properly in the line search routines for OWL-QN. This bug might quit - an iteration process too early when the OWL-QN routine was activated - (0 < ::lbfgs_parameter_t::orthantwise_c). - - Configure script for POSIX environments. - - SSE/SSE2 optimizations with GCC. - - New functions ::lbfgs_malloc and ::lbfgs_free to use SSE/SSE2 routines - transparently. It is uncessary to use these functions for libLBFGS built - without SSE/SSE2 routines; you can still use any memory allocators if - SSE/SSE2 routines are disabled in libLBFGS. -- Version 1.3 (2007-12-16): - - An API change. An argument was added to lbfgs() function to receive the - final value of the objective function. This argument can be set to - \c NULL if the final value is unnecessary. - - Fixed a null-pointer bug in the sample code (reported by Takashi Imamichi). - - Added build scripts for Microsoft Visual Studio 2005 and GCC. - - Added README file. -- Version 1.2 (2007-12-13): - - Fixed a serious bug in orthant-wise L-BFGS. - An important variable was used without initialization. -- Version 1.1 (2007-12-01): - - Implemented orthant-wise L-BFGS. - - Implemented lbfgs_parameter_init() function. - - Fixed several bugs. - - API documentation. -- Version 1.0 (2007-09-20): - - Initial release. - -@section api Documentation - -- @ref liblbfgs_api "libLBFGS API" - -@section sample Sample code - -@include sample.c - -@section ack Acknowledgements - -The L-BFGS algorithm is described in: - - Jorge Nocedal. - Updating Quasi-Newton Matrices with Limited Storage. - <i>Mathematics of Computation</i>, Vol. 35, No. 151, pp. 773--782, 1980. - - Dong C. Liu and Jorge Nocedal. - On the limited memory BFGS method for large scale optimization. - <i>Mathematical Programming</i> B, Vol. 45, No. 3, pp. 503-528, 1989. - -The line search algorithms used in this implementation are described in: - - John E. Dennis and Robert B. Schnabel. - <i>Numerical Methods for Unconstrained Optimization and Nonlinear - Equations</i>, Englewood Cliffs, 1983. - - Jorge J. More and David J. Thuente. - Line search algorithm with guaranteed sufficient decrease. - <i>ACM Transactions on Mathematical Software (TOMS)</i>, Vol. 20, No. 3, - pp. 286-307, 1994. - -This library also implements Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) -method presented in: - - Galen Andrew and Jianfeng Gao. - Scalable training of L1-regularized log-linear models. - In <i>Proceedings of the 24th International Conference on Machine - Learning (ICML 2007)</i>, pp. 33-40, 2007. - -Special thanks go to: - - Yoshimasa Tsuruoka and Daisuke Okanohara for technical information about - OWL-QN - - Takashi Imamichi for the useful enhancements of the backtracking method - - Kevin S. Van Horn, Nic Schraudolph, and Tamas Nepusz for bug fixes - -Finally I would like to thank the original author, Jorge Nocedal, who has been -distributing the effieicnt and explanatory implementation in an open source -licence. - -@section reference Reference - -- <a href="http://www.ece.northwestern.edu/~nocedal/lbfgs.html">L-BFGS</a> by Jorge Nocedal. -- <a href="http://research.microsoft.com/en-us/downloads/b1eb1016-1738-4bd5-83a9-370c9d498a03/default.aspx">Orthant-Wise Limited-memory Quasi-Newton Optimizer for L1-regularized Objectives</a> by Galen Andrew. -- <a href="http://chasen.org/~taku/software/misc/lbfgs/">C port (via f2c)</a> by Taku Kudo. -- <a href="http://www.alglib.net/optimization/lbfgs.php">C#/C++/Delphi/VisualBasic6 port</a> in ALGLIB. -- <a href="http://cctbx.sourceforge.net/">Computational Crystallography Toolbox</a> includes - <a href="http://cctbx.sourceforge.net/current_cvs/c_plus_plus/namespacescitbx_1_1lbfgs.html">scitbx::lbfgs</a>. + @mainpage libLBFGS: a library of Limited-memory Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) + + @section intro Introduction + + This library is a C port of the implementation of Limited-memory + Broyden-Fletcher-Goldfarb-Shanno (L-BFGS) method written by Jorge Nocedal. + The original FORTRAN source code is available at: + http://www.ece.northwestern.edu/~nocedal/lbfgs.html + + The L-BFGS method solves the unconstrainted minimization problem, + + <pre> + minimize F(x), x = (x1, x2, ..., xN), + </pre> + + only if the objective function F(x) and its gradient G(x) are computable. The + well-known Newton's method requires computation of the inverse of the hessian + matrix of the objective function. However, the computational cost for the + inverse hessian matrix is expensive especially when the objective function + takes a large number of variables. The L-BFGS method iteratively finds a + minimizer by approximating the inverse hessian matrix by information from last + m iterations. This innovation saves the memory storage and computational time + drastically for large-scaled problems. + + Among the various ports of L-BFGS, this library provides several features: + - <b>Optimization with L1-norm (Orthant-Wise Limited-memory Quasi-Newton + (OWL-QN) method)</b>: + In addition to standard minimization problems, the library can minimize + a function F(x) combined with L1-norm |x| of the variables, + {F(x) + C |x|}, where C is a constant scalar parameter. This feature is + useful for estimating parameters of sparse log-linear models (e.g., + logistic regression and maximum entropy) with L1-regularization (or + Laplacian prior). + - <b>Clean C code</b>: + Unlike C codes generated automatically by f2c (Fortran 77 into C converter), + this port includes changes based on my interpretations, improvements, + optimizations, and clean-ups so that the ported code would be well-suited + for a C code. In addition to comments inherited from the original code, + a number of comments were added through my interpretations. + - <b>Callback interface</b>: + The library receives function and gradient values via a callback interface. + The library also notifies the progress of the optimization by invoking a + callback function. In the original implementation, a user had to set + function and gradient values every time the function returns for obtaining + updated values. + - <b>Thread safe</b>: + The library is thread-safe, which is the secondary gain from the callback + interface. + - <b>Cross platform.</b> The source code can be compiled on Microsoft Visual + Studio 2010, GNU C Compiler (gcc), etc. + - <b>Configurable precision</b>: A user can choose single-precision (float) + or double-precision (double) accuracy by changing ::LBFGS_FLOAT macro. + - <b>SSE/SSE2 optimization</b>: + This library includes SSE/SSE2 optimization (written in compiler intrinsics) + for vector arithmetic operations on Intel/AMD processors. The library uses + SSE for float values and SSE2 for double values. The SSE/SSE2 optimization + routine is disabled by default. + + This library is used by: + - <a href="http://www.chokkan.org/software/crfsuite/">CRFsuite: A fast implementation of Conditional Random Fields (CRFs)</a> + - <a href="http://www.chokkan.org/software/classias/">Classias: A collection of machine-learning algorithms for classification</a> + - <a href="http://www.public.iastate.edu/~gdancik/mlegp/">mlegp: an R package for maximum likelihood estimates for Gaussian processes</a> + - <a href="http://infmath.uibk.ac.at/~matthiasf/imaging2/">imaging2: the imaging2 class library</a> + - <a href="http://search.cpan.org/~laye/Algorithm-LBFGS-0.16/">Algorithm::LBFGS - Perl extension for L-BFGS</a> + - <a href="http://www.cs.kuleuven.be/~bernd/yap-lbfgs/">YAP-LBFGS (an interface to call libLBFGS from YAP Prolog)</a> + + @section download Download + + - <a href="https://github.com/downloads/chokkan/liblbfgs/liblbfgs-1.10.tar.gz">Source code</a> + - <a href="https://github.com/chokkan/liblbfgs">GitHub repository</a> + + libLBFGS is distributed under the term of the + <a href="http://opensource.org/licenses/mit-license.php">MIT license</a>. + + @section changelog History + - Version 1.10 (2010-12-22): + - Fixed compiling errors on Mac OS X; this patch was kindly submitted by + Nic Schraudolph. + - Reduced compiling warnings on Mac OS X; this patch was kindly submitted + by Tamas Nepusz. + - Replaced memalign() with posix_memalign(). + - Updated solution and project files for Microsoft Visual Studio 2010. + - Version 1.9 (2010-01-29): + - Fixed a mistake in checking the validity of the parameters "ftol" and + "wolfe"; this was discovered by Kevin S. Van Horn. + - Version 1.8 (2009-07-13): + - Accepted the patch submitted by Takashi Imamichi; + the backtracking method now has three criteria for choosing the step + length: + - ::LBFGS_LINESEARCH_BACKTRACKING_ARMIJO: sufficient decrease (Armijo) + condition only + - ::LBFGS_LINESEARCH_BACKTRACKING_WOLFE: regular Wolfe condition + (sufficient decrease condition + curvature condition) + - ::LBFGS_LINESEARCH_BACKTRACKING_STRONG_WOLFE: strong Wolfe condition + - Updated the documentation to explain the above three criteria. + - Version 1.7 (2009-02-28): + - Improved OWL-QN routines for stability. + - Removed the support of OWL-QN method in MoreThuente algorithm because + it accidentally fails in early stages of iterations for some objectives. + Because of this change, <b>the OW-LQN method must be used with the + backtracking algorithm (::LBFGS_LINESEARCH_BACKTRACKING)</b>, or the + library returns ::LBFGSERR_INVALID_LINESEARCH. + - Renamed line search algorithms as follows: + - ::LBFGS_LINESEARCH_BACKTRACKING: regular Wolfe condition. + - ::LBFGS_LINESEARCH_BACKTRACKING_LOOSE: regular Wolfe condition. + - ::LBFGS_LINESEARCH_BACKTRACKING_STRONG: strong Wolfe condition. + - Source code clean-up. + - Version 1.6 (2008-11-02): + - Improved line-search algorithm with strong Wolfe condition, which was + contributed by Takashi Imamichi. This routine is now default for + ::LBFGS_LINESEARCH_BACKTRACKING. The previous line search algorithm + with regular Wolfe condition is still available as + ::LBFGS_LINESEARCH_BACKTRACKING_LOOSE. + - Configurable stop index for L1-norm computation. A member variable + ::lbfgs_parameter_t::orthantwise_end was added to specify the index + number at which the library stops computing the L1 norm of the + variables. This is useful to prevent some variables from being + regularized by the OW-LQN method. + - A sample program written in C++ (sample/sample.cpp). + - Version 1.5 (2008-07-10): + - Configurable starting index for L1-norm computation. A member variable + ::lbfgs_parameter_t::orthantwise_start was added to specify the index + number from which the library computes the L1 norm of the variables. + This is useful to prevent some variables from being regularized by the + OWL-QN method. + - Fixed a zero-division error when the initial variables have already + been a minimizer (reported by Takashi Imamichi). In this case, the + library returns ::LBFGS_ALREADY_MINIMIZED status code. + - Defined ::LBFGS_SUCCESS status code as zero; removed unused constants, + LBFGSFALSE and LBFGSTRUE. + - Fixed a compile error in an implicit down-cast. + - Version 1.4 (2008-04-25): + - Configurable line search algorithms. A member variable + ::lbfgs_parameter_t::linesearch was added to choose either MoreThuente + method (::LBFGS_LINESEARCH_MORETHUENTE) or backtracking algorithm + (::LBFGS_LINESEARCH_BACKTRACKING). + - Fixed a bug: the previous version did not compute psuedo-gradients + properly in the line search routines for OWL-QN. This bug might quit + an iteration process too early when the OWL-QN routine was activated + (0 < ::lbfgs_parameter_t::orthantwise_c). + - Configure script for POSIX environments. + - SSE/SSE2 optimizations with GCC. + - New functions ::lbfgs_malloc and ::lbfgs_free to use SSE/SSE2 routines + transparently. It is uncessary to use these functions for libLBFGS built + without SSE/SSE2 routines; you can still use any memory allocators if + SSE/SSE2 routines are disabled in libLBFGS. + - Version 1.3 (2007-12-16): + - An API change. An argument was added to lbfgs() function to receive the + final value of the objective function. This argument can be set to + \c NULL if the final value is unnecessary. + - Fixed a null-pointer bug in the sample code (reported by Takashi Imamichi). + - Added build scripts for Microsoft Visual Studio 2005 and GCC. + - Added README file. + - Version 1.2 (2007-12-13): + - Fixed a serious bug in orthant-wise L-BFGS. + An important variable was used without initialization. + - Version 1.1 (2007-12-01): + - Implemented orthant-wise L-BFGS. + - Implemented lbfgs_parameter_init() function. + - Fixed several bugs. + - API documentation. + - Version 1.0 (2007-09-20): + - Initial release. + + @section api Documentation + + - @ref liblbfgs_api "libLBFGS API" + + @section sample Sample code + + @include sample.c + + @section ack Acknowledgements + + The L-BFGS algorithm is described in: + - Jorge Nocedal. + Updating Quasi-Newton Matrices with Limited Storage. + <i>Mathematics of Computation</i>, Vol. 35, No. 151, pp. 773--782, 1980. + - Dong C. Liu and Jorge Nocedal. + On the limited memory BFGS method for large scale optimization. + <i>Mathematical Programming</i> B, Vol. 45, No. 3, pp. 503-528, 1989. + + The line search algorithms used in this implementation are described in: + - John E. Dennis and Robert B. Schnabel. + <i>Numerical Methods for Unconstrained Optimization and Nonlinear + Equations</i>, Englewood Cliffs, 1983. + - Jorge J. More and David J. Thuente. + Line search algorithm with guaranteed sufficient decrease. + <i>ACM Transactions on Mathematical Software (TOMS)</i>, Vol. 20, No. 3, + pp. 286-307, 1994. + + This library also implements Orthant-Wise Limited-memory Quasi-Newton (OWL-QN) + method presented in: + - Galen Andrew and Jianfeng Gao. + Scalable training of L1-regularized log-linear models. + In <i>Proceedings of the 24th International Conference on Machine + Learning (ICML 2007)</i>, pp. 33-40, 2007. + + Special thanks go to: + - Yoshimasa Tsuruoka and Daisuke Okanohara for technical information about + OWL-QN + - Takashi Imamichi for the useful enhancements of the backtracking method + - Kevin S. Van Horn, Nic Schraudolph, and Tamas Nepusz for bug fixes + + Finally I would like to thank the original author, Jorge Nocedal, who has been + distributing the effieicnt and explanatory implementation in an open source + licence. + + @section reference Reference + + - <a href="http://www.ece.northwestern.edu/~nocedal/lbfgs.html">L-BFGS</a> by Jorge Nocedal. + - <a href="http://research.microsoft.com/en-us/downloads/b1eb1016-1738-4bd5-83a9-370c9d498a03/default.aspx">Orthant-Wise Limited-memory Quasi-Newton Optimizer for L1-regularized Objectives</a> by Galen Andrew. + - <a href="http://chasen.org/~taku/software/misc/lbfgs/">C port (via f2c)</a> by Taku Kudo. + - <a href="http://www.alglib.net/optimization/lbfgs.php">C#/C++/Delphi/VisualBasic6 port</a> in ALGLIB. + - <a href="http://cctbx.sourceforge.net/">Computational Crystallography Toolbox</a> includes + <a href="http://cctbx.sourceforge.net/current_cvs/c_plus_plus/namespacescitbx_1_1lbfgs.html">scitbx::lbfgs</a>. */ #endif/*__LBFGS_H__*/ diff --git a/test/rosenbrock.lua b/test/rosenbrock.lua new file mode 100644 index 0000000..572e8a5 --- /dev/null +++ b/test/rosenbrock.lua @@ -0,0 +1,50 @@ +require 'torch' +-- rosenbrock.m This function returns the function value, partial derivatives +-- and Hessian of the (general dimension) rosenbrock function, given by: +-- +-- f(x) = sum_{i=1:D-1} 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 +-- +-- where D is the dimension of x. The true minimum is 0 at x = (1 1 ... 1). +-- +-- Carl Edward Rasmussen, 2001-07-21. + +function rosenbrock(x) + + -- (1) compute f(x) + local d = x:size(1) + -- x1 = x(i)^2 + local x1 = torch.Tensor(d-1):copy(x:narrow(1,1,d-1)) + -- x(i+1) - x(i)^2 + x1:cmul(x1):mul(-1):add(x:narrow(1,2,d-1)) + + -- 100*(x(i+1) - x(i)^2)^2 + x1:cmul(x1):mul(100) + + -- x(i) + local x0 = torch.Tensor(d-1):copy(x:narrow(1,1,d-1)) + -- 1-x(i) + x0:mul(-1):add(1) + -- (1-x(i))^2 + x0:cmul(x0) + -- 100*(x(i+1) - x(i)^2)^2 + (1-x(i))^2 + x1:add(x0) + local fout = x1:sum() + + -- (2) compute f(x)/dx + local dxout = torch.Tensor():resizeAs(x):zero() + -- df(1:D-1) = - 400*x(1:D-1).*(x(2:D)-x(1:D-1).^2) - 2*(1-x(1:D-1)); + + x1:copy(x:narrow(1,1,d-1)) + x1:cmul(x1):mul(-1):add(x:narrow(1,2,d-1)):cmul(x:narrow(1,1,d-1)):mul(-400) + x0:copy(x:narrow(1,1,d-1)):mul(-1):add(1):mul(-2) + x1:add(x0) + dxout:narrow(1,1,d-1):copy(x1) + + -- df(2:D) = df(2:D) + 200*(x(2:D)-x(1:D-1).^2); + x0:copy(x:narrow(1,1,d-1)) + x0:cmul(x0):mul(-1):add(x:narrow(1,2,d-1)):mul(200) + dxout:narrow(1,2,d-1):add(x0) + + return fout,dxout + +end
\ No newline at end of file diff --git a/test/test_cg.lua b/test/test_cg.lua new file mode 100644 index 0000000..1ff0ec8 --- /dev/null +++ b/test/test_cg.lua @@ -0,0 +1,48 @@ +dofile('rosenbrock.lua') + +require 'liblbfgs' +neval = 0 +maxIterations = 100 +maxLineSearch = 40 +linesearch = 2 +sparsity = 0 +verbose = 2 +nparam = 8 + +local parameters = torch.Tensor(nparam):fill(0.1) + +output, gradParameters = rosenbrock(parameters) + +function printstats () + print('nEval: '..neval) + print('+ fx: '..output) + local xstring = "" + for i = 1,parameters:size(1) do + xstring = string.format("%s, %2.2f", xstring, parameters[i]) + end + print('+ x: ['..xstring..']') + local dxstring = "" + for i = 1,gradParameters:size(1) do + dxstring = string.format("%s, %2.2f", dxstring, gradParameters[i]) + end + + print('+ dx: ['..dxstring..']') +end +print('Starting:') +printstats() +lbfgs.evaluate + = function() + output, gradParameters = rosenbrock(parameters) + neval = neval + 1 + printstats() + return output + end + +-- init CG state +cg.init(parameters, gradParameters, + maxEvaluation, maxIterations, maxLineSearch, + sparsity, linesearch, verbose) + +output = cg.run() + +printstats() diff --git a/test/test_lbfgs.lua b/test/test_lbfgs.lua new file mode 100644 index 0000000..88e5b9a --- /dev/null +++ b/test/test_lbfgs.lua @@ -0,0 +1,48 @@ +dofile('rosenbrock.lua') + +require 'liblbfgs' +neval = 0 +maxIterations = 100 +maxLineSearch = 40 +linesearch = 2 +sparsity = 0 +verbose = 2 +nparam = 8 + +local parameters = torch.Tensor(nparam):fill(0.1) + +output, gradParameters = rosenbrock(parameters) + +function printstats () + print('nEval: '..neval) + print('+ fx: '..output) + local xstring = "" + for i = 1,parameters:size(1) do + xstring = string.format("%s, %2.2f", xstring, parameters[i]) + end + print('+ x: ['..xstring..']') + local dxstring = "" + for i = 1,gradParameters:size(1) do + dxstring = string.format("%s, %2.2f", dxstring, gradParameters[i]) + end + + print('+ dx: ['..dxstring..']') +end +print('Starting:') +printstats() +lbfgs.evaluate + = function() + output, gradParameters = rosenbrock(parameters) + neval = neval + 1 + printstats() + return output + end + +-- init LBFGS state +lbfgs.init(parameters, gradParameters, + maxEvaluation, maxIterations, maxLineSearch, + sparsity, linesearch, verbose) + +output = lbfgs.run() + +printstats() |