Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/clementfarabet/lua---nnx.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClement Farabet <clement.farabet@gmail.com>2011-08-26 00:17:08 +0400
committerClement Farabet <clement.farabet@gmail.com>2011-08-26 00:17:08 +0400
commit087d96271f60bd3fd80e8b3aaab72626cd0b6e53 (patch)
tree701d54f2c87911bc5098d1f53fbd9c0682d24e04
parenta3f9ab6ef678d5f442f3f2d7e857e3110c0470c6 (diff)
Added map-reduce version of l-BFGS. In dev...
-rw-r--r--LBFGSOptimization.lua102
1 files changed, 102 insertions, 0 deletions
diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index 83393d9..f0e130a 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -10,6 +10,7 @@ function LBFGS:__init(...)
{arg='maxIterations', type='number', help='maximum nb of iterations per pass (0 = no max)', default=0},
{arg='maxLineSearch', type='number', help='maximum nb of steps in line search', default=20},
{arg='sparsity', type='number', help='sparsity coef (Orthantwise C)', default=0},
+ {arg='parallelize', type='number', help='parallelize onto N cores (experimental!)', default=1},
{arg='verbose', type='number', help='verbose level during training [0-2]', default=0}
)
self.parametersT = nnx.getParameters(self.module)
@@ -19,6 +20,14 @@ end
function LBFGS:forward(inputs, targets, options)
options = options or {}
+ if self.parallelize > 1 then
+ return self:forward_mapreduce(inputs, targets, options)
+ else
+ return self:forward_sequential(inputs, targets, options)
+ end
+end
+
+function LBFGS:forward_sequential(inputs, targets, options)
-- (1) construct a closure that compute f(inputs) + df/dW
-- after each call to that function:
-- + self.parameters contains the current X vector
@@ -73,3 +82,96 @@ function LBFGS:forward(inputs, targets, options)
-- (5) return current output after optimization
return self.output
end
+
+function LBFGS:forward_mapreduce(inputs, targets, options)
+ -- (0) clone module+criterion for parallel evaluations
+ local modules = {}
+ local criterions = {}
+ local outputs = {}
+ self.parametersT = {}
+ self.gradParametersT = {}
+ for m = 1,self.parallelize do
+ if m == 1 then
+ modules[m] = self.module
+ criterions[m] = self.criterion
+ else
+ modules[m] = self.module:clone()
+ criterions[m] = self.criterion:clone()
+ end
+ self.parametersT[m] = nnx.getParameters(modules[m])
+ self.gradParametersT[m] = nnx.getGradParameters(modules[m])
+ end
+
+ -- (1) construct a closure that compute f(inputs) + df/dW
+ -- after each call to that function:
+ -- + self.parameters contains the current X vector
+ -- + self.gradParameters contains the estimated dF/dX vector
+ -- + self.output contains the estimated (average) F(X)
+ lbfgs.evaluate
+ = function()
+ for t = 1,self.parallelize do
+ lbfgs.evaluate_map(t)
+ end
+ return lbfgs.evaluate_reduce()
+ end
+
+ -- (1a) the map part of the evaluation: compute partial gradients
+ -- in separate threads
+ lbfgs.evaluate_map
+ = function(thread)
+ -- set parameters from current state
+ self:unflatten(self.parametersT[thread], self.gradParametersT[thread])
+ -- reset gradients
+ modules[thread]:zeroGradParameters()
+ -- f is the average of all criterions
+ outputs[thread] = 0
+ -- given all inputs, evaluate gradients
+ for i = thread,#inputs,thread do
+ -- estimate f
+ local output = modules[thread]:forward(inputs[i])
+ local err = criterions[thread]:forward(output, targets[i])
+ outputs[thread] = outputs[thread] + err
+ -- estimate df/dW
+ local df_do = criterions[thread]:backward(output, targets[i])
+ modules[thread]:backward(inputs[i], df_do)
+ end
+ end
+
+ -- (1b) the reduce part of the evaluation: accumulate all
+ -- partial estimates of the gradients
+ lbfgs.evaluate_reduce
+ = function()
+ -- temp vectors for accumulation
+ self.gradParametersAcc = self.gradParametersAcc or torch.Tensor()
+ self.gradParametersAcc:resizeAs(self.gradParameters):zero()
+ -- update state from computed parameters
+ for t = 1,self.parallelize do
+ self:flatten(self.parametersT[1], self.gradParametersT[t])
+ self.gradParametersAcc:copy(self.gradParameters)
+ end
+ self.gradParameters:copy(self.gradParametersAcc)
+ -- normalize gradients
+ self.gradParameters:div(#inputs)
+ -- return average f(X)
+ self.output = 0
+ for t = 1,self.parallelize do
+ self.output = self.output + outputs[t]
+ end
+ return self.output/#inputs
+ end
+
+ -- (2) store current parameters/gradParameters
+ self:flatten(self.parametersT[1], self.gradParametersT[1])
+
+ -- (3) the magic function: will update the parameter vector
+ -- according to the l-BFGS method
+ self.output = lbfgs.run(self.parameters, self.gradParameters,
+ self.maxIterations, self.maxLineSearch,
+ self.sparsity)
+
+ -- (4) last: read parameters back into the model
+ self:unflatten(self.parametersT[1], self.gradParametersT[1])
+
+ -- (5) return current output after optimization
+ return self.output
+end