Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/clementfarabet/lua---nnx.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarco Scoffier <github@metm.org>2011-09-27 18:15:20 +0400
committerMarco Scoffier <github@metm.org>2011-09-27 18:15:20 +0400
commit0fa31ede87b5ea1a63657ce2be453dce6237fe4c (patch)
tree624baa733c51159d147e6af5c0bf41389fa50350
parentd3d60789dce2161702613d6ea5141cc3a30e75d5 (diff)
rename white space
-rw-r--r--GeneticSGDOptimization.lua (renamed from GenSGDOptimization.lua)104
-rw-r--r--init.lua2
-rw-r--r--nnx-1.0-1.rockspec2
3 files changed, 58 insertions, 50 deletions
diff --git a/GenSGDOptimization.lua b/GeneticSGDOptimization.lua
index fb883e5..1c1db4b 100644
--- a/GenSGDOptimization.lua
+++ b/GeneticSGDOptimization.lua
@@ -1,5 +1,5 @@
local GenSGD,parent = torch.class('nn.GenSGDOptimization',
- 'nn.BatchOptimization')
+ 'nn.BatchOptimization')
-- this module parallelizes SGD in a particular way. It sends out the
-- same batch to each of several workers, each with a different learning
@@ -10,19 +10,19 @@ function GenSGD:__init(...)
parent.__init(self,...)
xlua.unpack_class(self, {...},
'GenSGDOptimization', nil,
- {arg='maxIterations', type='number',
+ {arg='maxIterations', type='number',
help='maximum nb of iterations per pass', default=1},
- {arg='learningRate', type='number',
+ {arg='learningRate', type='number',
help='learning rate (W = W - rate*dE/dW)', default=1e-2},
- {arg='learningRateDecay', type='number',
- help='learning rate decay (lr_t = lr_0 / (1 + samplesSeen*lrDecay))',
+ {arg='learningRateDecay', type='number',
+ help='learning rate decay (lr_t = lr_0 / (1 + samplesSeen*lrDecay))',
default=0},
- {arg='weightDecay', type='number',
+ {arg='weightDecay', type='number',
help='amount of weight decay (W = W - decay*W)', default=0},
- {arg='momentum', type='number',
+ {arg='momentum', type='number',
help='amount of momentum on weights (dE/W = dE/dW*(1-momentum) + prev(dE/dW)*momentum)', default=0}
)
- require 'lab'
+ require 'lab'
if self.parallelize < 2 then
xerror('GenSGD needs to work on several processors: set parallelize',
'nn.GenSGDOptimization')
@@ -30,7 +30,7 @@ function GenSGD:__init(...)
-- change the mapper to send the same batch to each worker
self.copyBatch = true
-- create default parameter set which will be randomized for each worker
- self.baseParameters = { momentum = self.momentum,
+ self.baseParameters = { momentum = self.momentum,
weightDecay = self.weightDecay,
learningRate = self.learningRate,
learningRateDecay = self.learningRateDecay,
@@ -49,13 +49,13 @@ function GenSGD:map_hook()
-- randomize learning rate (could randomize other bits). Using a
-- log normal around the base rate.
local n = lab.randn(P):exp() * self.learningRate
-
+ n[1] = self.learningRate
self.baseParameters.sampleCounter = self.sampleCounter
for t = 1,P do
self.baseParameters.learningRate = n[t]
- --self.children[t]:join()
- self.children[t]:send(self.baseParameters)
+ --self.children[t]:join()
+ self.children[t]:send(self.baseParameters)
end
-- then wait for all workers to return their Parameters + outputs
-- should rename this to parametersParallel and optionsParallel
@@ -82,8 +82,14 @@ function GenSGD:reduce_hook()
self.output = self.baseParameters.f_x
-- in this case we get the parameters back directly
self.parameters:copy(gradParametersPartial[id])
- -- keep this learning rate for the next batch
- self.learningRate = self.baseParameters.learningRate
+ if not self.old_fx then
+ self.old_fx = self.baseParameters.f_x
+ elseif self.old_fx > self.baseParameters.f_x then
+ -- average towards this learning rate for the next batch
+ self.learningRate = 0.5 * self.learningRate * self.baseParameters.learningRate
+ self.old_fx = self.baseParameters.f_x
+ end
+ print('lr: '..self.learningRate..' fx: '..self.old_fx..' bfx: '..self.baseParameters.f_x)
end
end
@@ -92,32 +98,32 @@ function GenSGD:optimize()
end
-- optimization (could do others in this mode)
-GenSGD.optimizer =
+GenSGD.optimizer =
function (module,params)
-- apply momentum (store in the module)
- if params.momentum ~= 0 then
- if not module.currentGradParameters then
- module.currentGradParameters =
- torch.Tensor():resizeAs(module.gradParameters):copy(module.gradParameters)
+ if params.momentum ~= 0 then
+ if not module.currentGradParameters then
+ module.currentGradParameters =
+ torch.Tensor():resizeAs(module.gradParameters):copy(module.gradParameters)
+ else
+ module.currentGradParameters:mul(params.momentum):add(1-params.momentum, module.gradParameters)
+ end
else
- module.currentGradParameters:mul(params.momentum):add(1-params.momentum, module.gradParameters)
+ module.currentGradParameters = module.gradParameters
end
- else
- module.currentGradParameters = module.gradParameters
- end
- -- weight decay
- if params.weightDecay ~= 0 then
- module.parameters:add(-params.weightDecay, module.parameters)
- end
+ -- weight decay
+ if params.weightDecay ~= 0 then
+ module.parameters:add(-params.weightDecay, module.parameters)
+ end
- -- update parameters
- local learningRate =
- params.learningRate / (1 + params.sampleCounter*params.learningRateDecay)
- module.parameters:add(-learningRate, module.currentGradParameters)
- -- make keep track of final rate
- params.learningRate = learningRate
-end
+ -- update parameters
+ local learningRate =
+ params.learningRate / (1 + params.sampleCounter*params.learningRateDecay)
+ module.parameters:add(-learningRate, module.currentGradParameters)
+ -- make keep track of final rate
+ params.learningRate = learningRate
+ end
function GenSGD:setup_mapreduce ()
-- (0) startup parallel package
@@ -125,18 +131,18 @@ function GenSGD:setup_mapreduce ()
xerror('install parallel for Lua to enable parallel computing (luarocks install parallel)',
'nn.GenSGDOptimization')
end
- local worker_code =
+ local worker_code =
function()
-- require packages
require 'nnx'
-
+
-- retrieve module + criterion at startup
parallel.yield()
-
+
module = parallel.parent:receive()
criterion = parallel.parent:receive()
optimizer = parallel.parent:receive()
-
+
-- retrieve optional prehook/posthook
prehook = parallel.parent:receive()
posthook = parallel.parent:receive()
@@ -161,34 +167,36 @@ function GenSGD:setup_mapreduce ()
check(tableGradParameters)
parameters = torch.Tensor():set(tableParameters[1]:storage())
gradParameters = torch.Tensor():set(tableGradParameters[1]:storage())
-
+
-- outer loop: mini-batches
while true do
-- sync
if parallel.yield() == 'break' then break end
-
+
-- receive new mini-batch
inputs = parallel.parent:receive()
targets = parallel.parent:receive()
options = parallel.parent:receive()
-
+
-- inner loop: evaluations
while true do
-- sync
if parallel.yield() == 'break' then break end
-
+
-- receive new set of parameters
parameters:copy(parallel.parent:receive())
-- receive the learning rate etc. parameters which are
-- tweaked for each thread
- optimization_parameters = parallel.parent:receive()
-
+ optimization_parameters = parallel.parent:receive()
+
-- evaluate gradients on inputs for this thread and perform
-- SGD on these inputs
- -- reset gradients
+ -- reset gradients
gradParameters:zero()
+
module.parameters = parameters
module.gradParameters = gradParameters
+
for i = 1,#inputs do
-- estimate f
local output = module:forward(inputs[i])
@@ -197,7 +205,7 @@ function GenSGD:setup_mapreduce ()
local df_do = criterion:backward(output, targets[i])
module:backward(inputs[i], df_do)
module:accGradParameters(inputs[i], df_do)
- optimizer(module,optimization_parameters)
+ optimizer(module,optimization_parameters)
end
-- we need the result averaged over all the samples _after_
-- the gradient steps so do one more loop to fprop through
@@ -231,7 +239,7 @@ function GenSGD:setup_mapreduce ()
-- (2) startup all workers
self.children = parallel.sfork(self.parallelize)
self.children:exec(worker_code)
-
+
-- (4) and send them the module + criterion architecture
self.children:join()
self.children:send(self.module)
@@ -241,4 +249,4 @@ function GenSGD:setup_mapreduce ()
local ok,err = pcall(setup)
if not ok then parallel.close() error(err) end
-end \ No newline at end of file
+end
diff --git a/init.lua b/init.lua
index a0cab01..7f928f4 100644
--- a/init.lua
+++ b/init.lua
@@ -102,7 +102,7 @@ torch.include('nnx', 'Optimization.lua')
torch.include('nnx', 'BatchOptimization.lua')
torch.include('nnx', 'SGDOptimization.lua')
torch.include('nnx', 'LBFGSOptimization.lua')
-torch.include('nnx', 'GenSGDOptimization.lua')
+torch.include('nnx', 'GeneticSGDOptimization.lua')
-- trainers:
torch.include('nnx', 'Trainer.lua')
diff --git a/nnx-1.0-1.rockspec b/nnx-1.0-1.rockspec
index 8489dbf..deb5fc0 100644
--- a/nnx-1.0-1.rockspec
+++ b/nnx-1.0-1.rockspec
@@ -136,7 +136,7 @@ build = {
install_files(/lua/nnx Optimization.lua)
install_files(/lua/nnx LBFGSOptimization.lua)
install_files(/lua/nnx SGDOptimization.lua)
- install_files(/lua/nnx GenSGDOptimization.lua)
+ install_files(/lua/nnx GeneticSGDOptimization.lua)
install_files(/lua/nnx BatchOptimization.lua)
install_files(/lua/nnx BatchTrainer.lua)
add_subdirectory (test)