Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/clementfarabet/lua---nnx.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarco Scoffier <github@metm.org>2011-09-26 17:16:23 +0400
committerMarco Scoffier <github@metm.org>2011-09-26 17:16:23 +0400
commit64a47626531b7b1dd93eb505166490ef0c55246e (patch)
treecf8e9cb8f5486944a7a43c6cbf592739f93c4ea9
parent6a54a1d023cb7f5a421d8186f396f1d17fe53b66 (diff)
code before testing
-rw-r--r--GenSGDOptimization.lua147
-rw-r--r--init.lua1
-rw-r--r--nnx-1.0-1.rockspec1
3 files changed, 109 insertions, 40 deletions
diff --git a/GenSGDOptimization.lua b/GenSGDOptimization.lua
index 61f7476..d2c1d7a 100644
--- a/GenSGDOptimization.lua
+++ b/GenSGDOptimization.lua
@@ -15,31 +15,105 @@ function GenSGD:__init(...)
{arg='learningRate', type='number',
help='learning rate (W = W - rate*dE/dW)', default=1e-2},
{arg='learningRateDecay', type='number',
- help='learning rate decay (lr_t = lr_0 / (1 + samplesSeen*lrDecay))', default=0},
+ help='learning rate decay (lr_t = lr_0 / (1 + samplesSeen*lrDecay))',
+ default=0},
{arg='weightDecay', type='number',
help='amount of weight decay (W = W - decay*W)', default=0},
{arg='momentum', type='number',
- help='amount of momentum on weights (dE/W = dE/dW*(1-momentum) + prev(dE/dW)*momentum)', default=0}
+ help='amount of momentum on weights (dE/W = dE/dW*(1-momentum) + prev(dE/dW)*momentum)', default=0},
+ {arg='sigma', type='number',
+ help='sigma of gaussian used to randomize learningRate',
+ default = 1e3}
)
+ require 'lab'
if self.parallelize < 2 then
print('ERROR: GenSGD needs to work on several processors')
end
-- change the mapper to send the same batch to each worker
self.copyBatch = true
- self.currentLearningRate = learningRate
- self.workerRates = torch.Tensor(self.P)
+ -- create default parameter set which will be randomized for each worker
+ self.baseParameters = { momentum = self.momentum,
+ weightDecay = self.weightDecay,
+ learningRate = self.learningRate,
+ learningRateDecay = self.learningRateDecay
+ }
+ self.workerParameters = torch.Tensor(self.P)
end
+-- we are changing the way we map and reduce. It would be nice to
+-- change gradParametersPartial to ParametersPartial, as the logic is
+-- different for this kind of parallelization.
function GenSGD:map_hook()
+ -- transmit new parameters to all workers
+ self.children:join()
+ self.children:send(self.parameters)
+ -- randomize learning rate (could randomize other bits)
+ local n = self.learningRate + (lab.randn(P) * self.sigma)
+ for i = 1,P do
+ self.baseParameters[learningRate] = n[i]
+ self.children[t]:join()
+ self.children[t]:send(self.baseParameters)
+ end
+
+ end
+ -- then wait for all workers to return their partial gradParameters + outputs
+ gradParametersPartial = self.children:receive()
+ outputsPartial = self.children:receive()
+ -- force cleanup
+ collectgarbage()
end
function GenSGD:reduce_hook()
+ local id = 0
+ local mx = 1e9
+ for t = 1,P do
+ if outputsPartial[t].f_x < mx then
+ id = t
+ mx = outputsPartial[t].f_x
+ end
+ end
+ if id == 0 then
+ print('ERROR: diverging')
+ else
+ self.baseParameters = outputsPartial[id]
+ self.output = self.currentParameters.f_x
+ -- in this case we get the parameters back directly
+ self.parameters:copy(gradParametersPartial[id])
+ print('Winner: output = '..self.output..
+ 'learningRate = '..self.baseParameters['learningRate'])
+ end
end
function GenSGD:optimize()
self.evaluate()
end
+-- optimization (could do others in this mode)
+function GenSGD:optimizer(module,params)
+ -- apply momentum (store in the module)
+ if params.momentum ~= 0 then
+ if not module.currentGradParameters then
+ module.currentGradParameters =
+ torch.Tensor():resizeAs(module.gradParameters):copy(module.gradParameters)
+ else
+ module.currentGradParameters:mul(params.momentum):add(1-params.momentum, module.gradParameters)
+ end
+ else
+ module.currentGradParameters = module.gradParameters
+ end
+
+ -- weight decay
+ if params.weightDecay ~= 0 then
+ module.parameters:add(-params.weightDecay, module.parameters)
+ end
+
+ -- update parameters
+ local learningRate =
+ params.learningRate / (1 + params.sampleCounter*params.learningRateDecay)
+ module.parameters:add(-learningRate, module.currentGradParameters)
+ -- make keep track of final rate
+ params.learningRate = learningRate
+end
function GenSGD:worker_code()
-- require packages
@@ -47,12 +121,13 @@ function GenSGD:worker_code()
-- retrieve module + criterion at startup
parallel.yield()
+
module = parallel.parent:receive()
criterion = parallel.parent:receive()
optimizer = parallel.parent:receive()
- parameters = nnx.flattenParameters(nnx.getParameters(self.module))
- gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
+ module.parameters = nnx.flattenParameters(nnx.getParameters(module))
+ module.gradParameters = nnx.flattenParameters(nnx.getGradParameters(module))
-- outer loop: mini-batches
while true do
@@ -71,27 +146,42 @@ function GenSGD:worker_code()
-- receive new set of parameters
parameters:copy(parallel.parent:receive())
-
- -- f is the average of all criterions
- local f_x = 0
- -- evaluate gradients on inputs for this thread
+
+ -- receive the learning rate etc. parameters which are
+ -- tweaked for each thread
+ optimization_parameters = parallel.parent:receive()
+
+ -- evaluate gradients on inputs for this thread and perform
+ -- SGD on these inputs
+ -- reset gradients
+ gradParameters:zero()
for i = 1,#inputs do
- -- reset gradients
- gradParameters:zero()
-- estimate f
local output = module:forward(inputs[i])
local err = criterion:forward(output, targets[i])
- f_x = f_x + err
-- estimate df/dW
local df_do = criterion:backward(output, targets[i])
module:backward(inputs[i], df_do)
module:accGradParameters(inputs[i], df_do)
- optimizer
-
+ optimizer(module,optimization_parameters)
end
- -- now send back parameters b/c they are already optimized
+ -- we need the result averaged over all the samples _after_
+ -- the gradient steps so do one more loop to fprop through
+ -- the samples and collect the error _after_ the optimization
+ local f_x = 0
+ for i = 1,#inputs do
+ -- estimate f
+ local output = module:forward(inputs[i])
+ local err = criterion:forward(output, targets[i])
+ f_x = f_x + err
+ end
+ -- in this case send back parameters themselves b/c they are
+ -- already optimized
parallel.parent:send(parameters)
- parallel.parent:send(f_x)
+ -- need to make sure we keep track of what was used to
+ -- compute these params along with the outputs
+ optimization_parameters['f_x'] = f_x/#inputs
+ parallel.parent:send(optimization_parameters)
-- force cleanup
collectgarbage()
end
@@ -115,26 +205,3 @@ function GenSGD:setup()
self.children:send(self.optimizer)
end
-function GenSGD:post_hook(module,options)
- -- we do the SGD on the worker
- -- apply momentum
- if options.momentum ~= 0 then
- if not module.currentGradParameters then
- module.currentGradParameters = torch.Tensor():resizeAs(gradParameters):copy(gradParameters)
- else
- options.currentGradParameters:mul(options.momentum):add(1-options.momentum, gradParameters)
- end
- else
- options.currentGradParameters = gradParameters
- end
-
- -- weight decay
- if options.weightDecay ~= 0 then
- options.parameters:add(-options.weightDecay, options.parameters)
- end
-
- -- update parameters
- local learningRate = self.learningRate /
- (1 + self.sampleCounter*self.learningRateDecay)
- self.parameters:add(-learningRate, self.currentGradParameters)
-end
diff --git a/init.lua b/init.lua
index d34abba..3c9e9b6 100644
--- a/init.lua
+++ b/init.lua
@@ -102,6 +102,7 @@ torch.include('nnx', 'Optimization.lua')
torch.include('nnx', 'BatchOptimization.lua')
torch.include('nnx', 'SGDOptimization.lua')
torch.include('nnx', 'LBFGSOptimization.lua')
+torch.include('nnx', 'GenSGDOptimization.lua')
-- trainers:
torch.include('nnx', 'Trainer.lua')
diff --git a/nnx-1.0-1.rockspec b/nnx-1.0-1.rockspec
index 2650547..0145271 100644
--- a/nnx-1.0-1.rockspec
+++ b/nnx-1.0-1.rockspec
@@ -138,6 +138,7 @@ build = {
install_files(/lua/nnx Optimization.lua)
install_files(/lua/nnx LBFGSOptimization.lua)
install_files(/lua/nnx SGDOptimization.lua)
+ install_files(/lua/nnx GenSGDOptimization.lua)
install_files(/lua/nnx BatchOptimization.lua)
install_files(/lua/nnx BatchTrainer.lua)
add_subdirectory (test)