diff options
author | Clement Farabet <clement.farabet@gmail.com> | 2011-08-24 18:14:17 +0400 |
---|---|---|
committer | Clement Farabet <clement.farabet@gmail.com> | 2011-08-24 18:14:17 +0400 |
commit | 3675e32fded83807fa5e96604dbfab7d72c04d5b (patch) | |
tree | 0b3a16ebeaef954f5b9b7963aafce1e45283e3f9 | |
parent | 9efb053b8775c11f2c4a7907b623d50eacc45e00 (diff) |
Unified SGD/LBFGS
-rw-r--r-- | LBFGSOptimization.lua | 11 | ||||
-rw-r--r-- | OnlineTrainer.lua | 33 | ||||
-rw-r--r-- | Optimization.lua | 5 | ||||
-rw-r--r-- | SGDOptimization.lua | 36 | ||||
-rw-r--r-- | init.lua | 1 | ||||
-rw-r--r-- | lbfgs.c | 14 | ||||
-rw-r--r-- | nnx-1.0-1.rockspec | 1 |
7 files changed, 56 insertions, 45 deletions
diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua index 32267da..7d41844 100644 --- a/LBFGSOptimization.lua +++ b/LBFGSOptimization.lua @@ -6,7 +6,7 @@ function LBFGS:__init(...) xlua.unpack_class(self, {...}, 'LBFGSOptimization', nil, {arg='module', type='nn.Module', help='a module to train', req=true}, - {arg='criterion', type='nn.Criterion', help='a criterion to estimate the error'} + {arg='criterion', type='nn.Criterion', help='a criterion to estimate the error', req=true} ) self.parametersT = nnx.getParameters(self.module) self.gradParametersT = nnx.getGradParameters(self.module) @@ -38,8 +38,8 @@ function LBFGS:forward(inputs, targets) end -- update state from computed parameters self:flatten(self.parametersT, self.gradParametersT) - -- return f(X) - return self.output + -- return average f(X) + return self.output/#inputs end -- (2) store current parameters/gradParameters @@ -47,8 +47,11 @@ function LBFGS:forward(inputs, targets) -- (3) the magic function: will update the parameter vector -- according to the l-BFGS method - lbfgs.run(self.parameters, self.gradParameters) + self.output = lbfgs.run(self.parameters, self.gradParameters) -- (4) last: read parameters back into the model self:unflatten(self.parametersT, self.gradParametersT) + + -- (5) return current output after optimization + return self.output end diff --git a/OnlineTrainer.lua b/OnlineTrainer.lua index 9dd81f2..8a06aa7 100644 --- a/OnlineTrainer.lua +++ b/OnlineTrainer.lua @@ -64,15 +64,10 @@ function OnlineTrainer:train(dataset) shuffledIndices = lab.randperm(dataset:size()) end - local parameters = nnx.getParameters(module) - local gradParameters = nnx.getGradParameters(module) - while true do print('<trainer> on training set:') print("<trainer> stochastic gradient descent epoch # " .. self.epoch) - module:zeroGradParameters() - self.time = sys.clock() self.currentError = 0 for t = 1,dataset:size() do @@ -85,40 +80,16 @@ function OnlineTrainer:train(dataset) local sample = dataset[self.trainOffset + shuffledIndices[t]] local input = sample[1] local target = sample[2] - local sample_x = sample.x - local sample_y = sample.y -- optional preprocess (no learning is done for that guy) if self.preprocessor then input = self.preprocessor:forward(input) end - -- forward through model and criterion - -- (if no criterion, it is assumed to be contained in the model) - local modelOut, error - if criterion then - modelOut = module:forward(input) - error = criterion:forward(modelOut, target) - else - modelOut, error = module:forward(input, target, sample_x, sample_y) - end + -- optimize the model given current input/target set + local error = self.optimizer:forward({input}, {target}) -- accumulate error self.currentError = self.currentError + error - -- reset gradients - module:zeroGradParameters() - - -- backward through model - -- (if no criterion, it is assumed that derror is internally generated) - if criterion then - local derror = criterion:backward(module.output, target) - module:backward(input, derror) - else - module:backward(input) - end - - -- update parameters in the model - self.optimizer:forward(parameters, gradParameters) - -- call user hook, if any if self.hookTrainSample then self.hookTrainSample(self, sample) diff --git a/Optimization.lua b/Optimization.lua index ed230e5..f18c635 100644 --- a/Optimization.lua +++ b/Optimization.lua @@ -3,10 +3,11 @@ local Optimization = torch.class('nn.Optimization') function Optimization:__init() end -function Optimization:forward(parameters, gradParameters) +function Optimization:forward(inputs, targets) self:flatten(parameters, gradParameters) - -- do your thing + self.output = 0 self:unflatten(parameters, gradParameters) + return self.output end function Optimization:flatten(parameters, gradParameters) diff --git a/SGDOptimization.lua b/SGDOptimization.lua index 8cf4b03..514c1a8 100644 --- a/SGDOptimization.lua +++ b/SGDOptimization.lua @@ -4,14 +4,40 @@ function SGD:__init(...) parent.__init(self) xlua.unpack_class(self, {...}, 'SGDOptimization', nil, + {arg='module', type='nn.Module', help='a module to train', req=true}, + {arg='criterion', type='nn.Criterion', help='a criterion to estimate the error', req=true}, {arg='learningRate', type='number', help='learning rate (W = W - rate*dE/dW)', default=1e-2}, {arg='weightDecay', type='number', help='amount of weight decay (W = W - decay*W)', default=0}, {arg='momentum', type='number', help='amount of momentum on weights (dE/W = dE/dW*(1-momentum) + prev(dE/dW)*momentum)', default=0} ) + self.parametersT = nnx.getParameters(self.module) + self.gradParametersT = nnx.getGradParameters(self.module) end -function SGD:forward(parameters, gradParameters) - self:flatten(parameters, gradParameters) +function SGD:forward(inputs, targets) + -- reset gradients + self.module:zeroGradParameters() + + -- f is the average of all criterions + self.output = 0 + + -- given all inputs, evaluate gradients + for i = 1,#inputs do + -- estimate f + local output = self.module:forward(inputs[i]) + local err = self.criterion:forward(output, targets[i]) + self.output = self.output + err + + -- estimate df/dW + local df_do = self.criterion:backward(output, targets[i]) + self.module:backward(inputs[i], df_do) + end + + -- renorm f + self.output = self.output / #inputs + + -- update state from computed parameters + self:flatten(self.parametersT, self.gradParametersT) -- apply momentum if self.momentum ~= 0 then @@ -32,5 +58,9 @@ function SGD:forward(parameters, gradParameters) -- update parameters self.parameters:add(-self.learningRate, self.currentGradParameters) - self:unflatten(parameters, gradParameters) + -- write compute parameters back in place + self:unflatten(self.parametersT, self.gradParametersT) + + -- return current output + return self.output end @@ -100,6 +100,7 @@ torch.include('nnx', 'SpatialSparseCriterion.lua') -- optimizations: torch.include('nnx', 'Optimization.lua') torch.include('nnx', 'SGDOptimization.lua') +torch.include('nnx', 'LBFGSOptimization.lua') -- trainers: torch.include('nnx', 'Trainer.lua') @@ -1397,10 +1397,10 @@ static int progress(void *instance, int k, int ls) { - printf("Iteration %d:\n", k); - printf(" fx = %f, x[0] = %f, x[1] = %f\n", fx, x[0], x[1]); - printf(" xnorm = %f, gnorm = %f, step = %f\n", xnorm, gnorm, step); - printf("\n"); + //printf("Iteration %d:\n", k); + //printf(" fx = %f, x[0] = %f, x[1] = %f\n", fx, x[0], x[1]); + //printf(" xnorm = %f, gnorm = %f, step = %f\n", xnorm, gnorm, step); + //printf("\n"); return 0; } @@ -1427,8 +1427,12 @@ int lbfgs_run(lua_State *L) { // evaluate() and progress() when necessary. int ret = lbfgs(nParameter, x, &fx, evaluate, progress, NULL, ¶m); + // cleanup lbfgs_free(x); - return 0; + + // return current error + lua_pushnumber(L, fx); + return 1; } static const struct luaL_Reg lbfgs_methods__ [] = { diff --git a/nnx-1.0-1.rockspec b/nnx-1.0-1.rockspec index 9f8337b..4529d24 100644 --- a/nnx-1.0-1.rockspec +++ b/nnx-1.0-1.rockspec @@ -101,6 +101,7 @@ build = { install_files(/lua/nnx SpatialColorTransform.lua) install_files(/lua/nnx SpatialRecursiveFovea.lua) install_files(/lua/nnx Optimization.lua) + install_files(/lua/nnx LBFGSOptimization.lua) install_files(/lua/nnx SGDOptimization.lua) add_subdirectory (test) install_targets(/lib nnx) |