Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/clementfarabet/lua---nnx.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarco Scoffier <github@metm.org>2011-09-28 01:20:52 +0400
committerMarco Scoffier <github@metm.org>2011-09-28 01:20:52 +0400
commit31b97b114f144aa286493c9a83acfd2b8ed32ff7 (patch)
treec5bc888232e743616edad2f5b38ac41c65bbf9e4
parent07a8194cf5d9c737af1b291d2b5b057a1f369437 (diff)
parent2f609a3e56dfb05f3b5246690b341cb44c32ba9e (diff)
Merge branch 'genetic'
Conflicts: BatchOptimization.lua nnx-1.0-1.rockspec
-rw-r--r--BatchOptimization.lua4
-rw-r--r--GeneticSGDOptimization.lua261
-rw-r--r--init.lua3
-rw-r--r--nnx-1.0-1.rockspec2
4 files changed, 268 insertions, 2 deletions
diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index f25cdc9..5255e57 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -130,6 +130,10 @@ function Batch:forward_mapreduce(inputs, targets, options)
self.children[t]:send(targets)
self.children[t]:send(options)
end
+<<<<<<< HEAD
+=======
+
+>>>>>>> genetic
else
-- (0b) divide input/target batch into N batches, based on speed
-- of each worker
diff --git a/GeneticSGDOptimization.lua b/GeneticSGDOptimization.lua
new file mode 100644
index 0000000..412b7bf
--- /dev/null
+++ b/GeneticSGDOptimization.lua
@@ -0,0 +1,261 @@
+local GenSGD,parent = torch.class('nn.GeneticSGDOptimization',
+ 'nn.BatchOptimization')
+
+-- this module parallelizes SGD in a particular way. It sends out the
+-- same batch to each of several workers, each with a different learning
+-- rate. The workers run and the parameters from the best worker and
+-- it's learning rate are kept for the next batch.
+
+function GenSGD:__init(...)
+ parent.__init(self,...)
+ xlua.unpack_class(self, {...},
+ 'GenSGDOptimization', nil,
+ {arg='maxIterations', type='number',
+ help='maximum nb of iterations per pass', default=1},
+ {arg='learningRate', type='number',
+ help='learning rate (W = W - rate*dE/dW)', default=1e-2},
+ {arg='learningRateDecay', type='number',
+ help='learning rate decay (lr_t = lr_0 / (1 + samplesSeen*lrDecay))',
+ default=0},
+ {arg='weightDecay', type='number',
+ help='amount of weight decay (W = W - decay*W)', default=0},
+ {arg='momentum', type='number',
+ help='amount of momentum on weights (dE/W = dE/dW*(1-momentum) + prev(dE/dW)*momentum)', default=0}
+ )
+ require 'lab'
+ if self.parallelize < 2 then
+ xerror('GenSGD needs to work on several processors: set parallelize',
+ 'nn.GenSGDOptimization')
+ end
+ -- change the mapper to send the same batch to each worker
+ self.copyBatch = true
+ -- create default parameter set which will be randomized for each worker
+ self.baseParameters = { momentum = self.momentum,
+ weightDecay = self.weightDecay,
+ learningRate = self.learningRate,
+ learningRateDecay = self.learningRateDecay,
+ sampleCounter = self.sampleCounter
+ }
+end
+
+function ldn(n,m,s)
+ -- pdf = lambda s,m,x: exp(-(log(x)-m)**2 / (2.*s**2)) / ( x*sqrt(2.*pi*s**2) )
+ local x = lab.rand(n)
+ local num = x:log():add(-1*m)
+ num = num:cmul(num):mul(-1):div(2 * s * s):exp():cdiv(x:mul(math.sqrt(2*math.pi*s*s)))
+ return num
+end
+
+-- we are changing the way we map and reduce. It would be nice to
+-- change gradParametersPartial to ParametersPartial, as the logic is
+-- different for this kind of parallelization.
+function GenSGD:map_hook()
+ local P = self.parallelize
+ -- transmit new parameters to all workers
+ self.children:join()
+ self.children:send(self.parameters)
+ -- randomize learning rate (could randomize other bits). Using a
+ -- log normal around the base rate.
+ -- local n = ldn(P,self.learningRate,5)
+ local n = lab.rand(P)*self.learningRate
+ -- n[1] = self.learningRate
+ self.baseParameters.sampleCounter = self.sampleCounter
+
+ for t = 1,P do
+ self.baseParameters.learningRate = n[t]
+ self.children[t]:send(self.baseParameters)
+ end
+ -- then wait for all workers to return their Parameters + outputs
+ -- should rename this to parametersParallel and optionsParallel
+ gradParametersPartial = self.children:receive()
+ outputsPartial = self.children:receive()
+ -- force cleanup
+ collectgarbage()
+end
+
+function GenSGD:reduce_hook()
+ local P = self.parallelize
+ local id = 0
+ local mx = 1e9
+ for t = 1,P do
+ if outputsPartial[t].f_x < mx then
+ id = t
+ mx = outputsPartial[t].f_x
+ end
+ end
+ if id == 0 then
+ xerror('diverging','nn.GenSGDOptimization')
+ else
+ self.baseParameters = outputsPartial[id]
+ self.learningRate = self.baseParameters.learningRate
+ self.output = self.baseParameters.f_x
+ print('Chose: '..self.learningRate..' b/c '..self.output)
+ -- in this case we get the parameters back directly
+ self.parameters:copy(gradParametersPartial[id])
+ end
+end
+
+function GenSGD:optimize()
+ self.evaluate()
+end
+
+-- optimization (could do others in this mode)
+GenSGD.optimizer =
+ function (module,params)
+ -- apply momentum (store in the module)
+ if params.momentum ~= 0 then
+ if not module.currentGradParameters then
+ module.currentGradParameters =
+ torch.Tensor():resizeAs(module.gradParameters):copy(module.gradParameters)
+ else
+ module.currentGradParameters:mul(params.momentum):add(1-params.momentum, module.gradParameters)
+ end
+ else
+ module.currentGradParameters = module.gradParameters
+ end
+
+ -- weight decay
+ if params.weightDecay ~= 0 then
+ module.parameters:add(-params.weightDecay, module.parameters)
+ end
+
+ -- update parameters
+ local learningRate =
+ params.learningRate / (1 + params.sampleCounter*params.learningRateDecay)
+ module.parameters:add(-learningRate, module.currentGradParameters)
+ -- make keep track of final rate
+ params.learningRate = learningRate
+ end
+
+function GenSGD:setup_mapreduce ()
+ -- (0) startup parallel package
+ if not xrequire 'parallel' then
+ xerror('install parallel for Lua to enable parallel computing (luarocks install parallel)',
+ 'nn.GenSGDOptimization')
+ end
+ local worker_code =
+ function()
+ -- require packages
+ require 'nnx'
+
+ -- retrieve optional code to setup worker
+ precode = parallel.parent:receive()
+ if type(precode) == 'function' then precode() end
+
+ -- retrieve module + criterion + optimimzer at startup
+ parallel.yield()
+
+ module = parallel.parent:receive()
+ criterion = parallel.parent:receive()
+ optimizer = parallel.parent:receive()
+
+ -- retrieve optional prehook/posthook
+ prehook = parallel.parent:receive()
+ posthook = parallel.parent:receive()
+ if type(prehook) ~= 'function' then prehook = nil end
+ if type(posthook) ~= 'function' then posthook = nil end
+
+ -- I don't understand this [MS]
+ -- get pointer to parameter and gradParameter vectors
+ -- (this assumes that parameters+gradParameters are already flat parameters:
+ -- it should be the case, as the parent process flattens them at __init)
+ function check(tocheck)
+ for i = 2,#tocheck do
+ if tocheck[i]:storage() ~= tocheck[i-1]:storage() then
+ print('<BatchOptimization> error: inconsistent parameter vector (not flat)')
+ return
+ end
+ end
+ end
+ tableParameters = nnx.getParameters(module)
+ tableGradParameters = nnx.getGradParameters(module)
+ check(tableParameters)
+ check(tableGradParameters)
+ parameters = torch.Tensor():set(tableParameters[1]:storage())
+ gradParameters = torch.Tensor():set(tableGradParameters[1]:storage())
+
+ -- outer loop: mini-batches
+ while true do
+ -- sync
+ if parallel.yield() == 'break' then break end
+
+ -- receive new mini-batch
+ inputs = parallel.parent:receive()
+ targets = parallel.parent:receive()
+ options = parallel.parent:receive()
+
+ -- inner loop: evaluations
+ while true do
+ -- sync
+ if parallel.yield() == 'break' then break end
+
+ -- receive new set of parameters
+ parameters:copy(parallel.parent:receive())
+ -- receive the learning rate etc. parameters which are
+ -- tweaked for each thread
+ optimization_parameters = parallel.parent:receive()
+
+ -- evaluate gradients on inputs for this thread and perform
+ -- SGD on these inputs
+ -- reset gradients
+ gradParameters:zero()
+
+ module.parameters = parameters
+ module.gradParameters = gradParameters
+
+ for i = 1,#inputs do
+ -- estimate f
+ local output = module:forward(inputs[i])
+ local err = criterion:forward(output, targets[i])
+ -- estimate df/dW
+ local df_do = criterion:backward(output, targets[i])
+ module:backward(inputs[i], df_do)
+ module:accGradParameters(inputs[i], df_do)
+ optimizer(module,optimization_parameters)
+ end
+ -- we need the result averaged over all the samples _after_
+ -- the gradient steps so do one more loop to fprop through
+ -- the samples and collect the error _after_ the optimization
+ local f_x = 0
+ for i = 1,#inputs do
+ -- estimate f
+ local output = module:forward(inputs[i])
+ local err = criterion:forward(output, targets[i])
+ f_x = f_x + err
+ end
+ -- in this case send back parameters themselves b/c they are
+ -- already optimized
+ parallel.parent:send(parameters)
+ -- need to make sure we keep track of what was used to
+ -- compute these params along with the outputs
+ optimization_parameters['f_x'] = f_x/#inputs
+ parallel.parent:send(optimization_parameters)
+ -- force cleanup
+ collectgarbage()
+ end
+ end
+ end
+
+ local setup = function()
+ -- (1) optional calibration
+ if parallel.remotes then
+ parallel.calibrate()
+ end
+
+ -- (2) startup all workers
+ self.children = parallel.sfork(self.parallelize)
+ self.children:exec(worker_code)
+
+ -- (3) send them optional config code
+ self.children:send(self.precode or '')
+
+ -- (4) and send them the module + criterion architecture
+ self.children:join()
+ self.children:send(self.module)
+ self.children:send(self.criterion)
+ self.children:send(self.optimizer)
+ end
+
+ local ok,err = pcall(setup)
+ if not ok then parallel.close() error(err) end
+end
diff --git a/init.lua b/init.lua
index 6bc3617..ef4b030 100644
--- a/init.lua
+++ b/init.lua
@@ -104,6 +104,7 @@ torch.include('nnx', 'Optimization.lua')
torch.include('nnx', 'BatchOptimization.lua')
torch.include('nnx', 'SGDOptimization.lua')
torch.include('nnx', 'LBFGSOptimization.lua')
+torch.include('nnx', 'GeneticSGDOptimization.lua')
-- trainers:
torch.include('nnx', 'Trainer.lua')
@@ -212,7 +213,7 @@ function nnx.flattenParameters(parameters)
if param:storage() == parameters[i]:storage() then
offsets[k] = offsets[i]
if storageOffsets[k] ~= storageOffsets[i] or elements[k] ~= elements[i] then
- error('<nnx.flattenParameters> canot flatten shared weights with different structures')
+ error('<nnx.flattenParameters> cannot flatten shared weights with different structures')
end
isView = true
break
diff --git a/nnx-1.0-1.rockspec b/nnx-1.0-1.rockspec
index e53e1cd..101d0cd 100644
--- a/nnx-1.0-1.rockspec
+++ b/nnx-1.0-1.rockspec
@@ -138,7 +138,7 @@ build = {
install_files(/lua/nnx Optimization.lua)
install_files(/lua/nnx LBFGSOptimization.lua)
install_files(/lua/nnx SGDOptimization.lua)
- install_files(/lua/nnx GenSGDOptimization.lua)
+ install_files(/lua/nnx GeneticSGDOptimization.lua)
install_files(/lua/nnx BatchOptimization.lua)
install_files(/lua/nnx BatchTrainer.lua)
add_subdirectory (test)