Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/clementfarabet/lua---nnx.git - Unnamed repository; edit this file 'description' to name the repository.
diff options
authorClement Farabet <clement.farabet@gmail.com>2011-08-31 09:55:06 +0400
committerClement Farabet <clement.farabet@gmail.com>2011-08-31 09:55:06 +0400
commitcec2df99480f81c14b9e782ef84cb6440fd5044e (patch)
parentf97f610a34d2a323b56976186aeacf1a6c7bb6cb (diff)
parent9d7cdf26db61f8537555fc5045732b5e5102e9ac (diff)
Merge branch 'master' of github.com:clementfarabet/lua---nnx
4 files changed, 409 insertions, 0 deletions
diff --git a/BatchOptimization.lua b/BatchOptimization.lua
new file mode 100644
index 0000000..0951ae4
--- /dev/null
+++ b/BatchOptimization.lua
@@ -0,0 +1,278 @@
+local Batch,parent = torch.class('nn.BatchOptimization', 'nn.Optimization')
+-- this is a generic class for any batch optimization modeled after
+-- the LBFGS optimization. It simply provides a batch.evaluate() method
+-- which creates a self.parameters and self.gradParameters from your
+-- self.model
+function Batch:__init(...)
+ parent.__init(self)
+ xlua.unpack_class(self, {...},
+ 'BatchOptimization', nil,
+ {arg='module', type='nn.Module', help='a module to train', req=true},
+ {arg='criterion', type='nn.Criterion',
+ help='a criterion to estimate the error', req=true},
+ {arg='maxIterations', type='number',
+ help='maximum nb of iterations per pass (0 = no max)', default=0},
+ {arg='maxLineSearch', type='number',
+ help='maximum nb of steps in line search', default=20},
+ {arg='sparsity', type='number',
+ help='sparsity coef (Orthantwise C)', default=0},
+ {arg='parallelize', type='number',
+ help='parallelize onto N cores (experimental!)', default=1},
+ {arg='verbose', type='number',
+ help='verbose level during training [0-2]', default=0}
+ )
+ self.parameters = nnx.flattenParameters(nnx.getParameters(self.module))
+ self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
+ if self.parallelize > 1 then
+ self:setup_mapreduce()
+ end
+ batch = {}
+function Batch:forward(inputs, targets, options)
+ options = options or {}
+ if self.parallelize > 1 then
+ return self:forward_mapreduce(inputs, targets, options)
+ else
+ return self:forward_sequential(inputs, targets, options)
+ end
+function Batch:forward_sequential(inputs, targets, options)
+ -- (1) construct a closure that compute f(inputs) + df/dW
+ -- after each call to that function:
+ -- + self.parameters contains the current X vector
+ -- + self.gradParameters contains the estimated dF/dX vector
+ -- + self.output contains the estimated (average) F(X)
+ batch.evaluate
+ = function()
+ -- reset gradients
+ self.gradParameters:zero()
+ -- f is the average of all criterions
+ self.output = 0
+ -- given all inputs, evaluate gradients
+ for i = 1,#inputs do
+ -- user hook
+ if self.prehook then
+ self.prehook(self, {inputs[i], targets[i], options[i]})
+ end
+ -- estimate f
+ local output = self.module:forward(inputs[i])
+ local err = self.criterion:forward(output, targets[i])
+ self.output = self.output + err
+ -- estimate df/dW
+ local df_do = self.criterion:backward(output, targets[i])
+ self.module:backward(inputs[i], df_do)
+ -- user hook
+ if self.posthook then
+ self.posthook(self, {inputs[i], targets[i], options[i]})
+ end
+ end
+ -- normalize gradients
+ self.gradParameters:div(#inputs)
+ -- return average f(X)
+ return self.output/#inputs
+ end
+ -- (3) return current output after optimization
+ return self.output
+function Batch:forward_mapreduce(inputs, targets, options)
+ -- parameters
+ local P = self.parallelize
+ -- transmit user hooks, if defined
+ if not self.hooksets then
+ if self.prehook then
+ if type(self.prehook) == 'string' then
+ parallel.children:send(self.prehook)
+ else
+ print('\r<BatchOptimization> WARNING: when using para||el mode, hooks should be')
+ print('\r<BatchOptimization> WARNING: defined as strings. User prehook ignored.')
+ parallel.children:send('')
+ end
+ else
+ parallel.children:send('')
+ end
+ if self.posthook then
+ if type(self.posthook) == 'string' then
+ parallel.children:send(self.posthook)
+ else
+ print('\r<BatchOptimization> WARNING: when using para||el mode, hooks should be')
+ print('<\rBatchOptimization> WARNING: defined as strings. User posthook ignored.')
+ parallel.children:send('')
+ end
+ else
+ parallel.children:send('')
+ end
+ self.hooksets = true
+ end
+ -- (0a) replicate output and gradParameters
+ local outputsPartial = {}
+ local gradParametersPartial = {}
+ -- (0b) divide input/target batch into N batches
+ local inputss = {}
+ local targetss = {}
+ local optionss = {}
+ for t = 1,P do
+ inputss[t] = {}
+ targetss[t] = {}
+ optionss[t] = {}
+ for i = t,#inputs,P do
+ table.insert(inputss[t], inputs[i])
+ table.insert(targetss[t], targets[i])
+ if options then table.insert(optionss[t], options[i]) end
+ end
+ end
+ -- (0c) send mini-batch to all workers
+ for t = 1,P do
+ parallel.children[t]:send(inputss[t])
+ parallel.children[t]:send(targetss[t])
+ parallel.children[t]:send(optionss[t])
+ end
+ -- (1) construct a closure that compute f(inputs) + df/dW
+ -- after each call to that function:
+ -- + self.parameters contains the current X vector
+ -- + self.gradParameters contains the estimated dF/dX vector
+ -- + self.output contains the estimated (average) F(X)
+ batch.evaluate
+ = function()
+ batch.evaluate_map()
+ return batch.evaluate_reduce()
+ end
+ -- (1a) the map part of the evaluation: compute partial gradients
+ -- in separate threads
+ batch.evaluate_map
+ = function()
+ -- transmit new parameters to all workers
+ parallel.children:send(self.parameters)
+ -- then wait for all workers to return their partial gradParameters + outputs
+ for t = 1,P do
+ gradParametersPartial[t] = parallel.children[t]:receive()
+ outputsPartial[t] = parallel.children[t]:receive()
+ end
+ -- force cleanup
+ collectgarbage()
+ end
+ -- (1b) the reduce part of the evaluation: accumulate all
+ -- partial estimates of the gradients
+ batch.evaluate_reduce
+ = function()
+ -- accumulate partial gradients, and average
+ self.gradParameters:zero()
+ for t = 1,P do
+ self.gradParameters:add(gradParametersPartial[t])
+ end
+ self.gradParameters:div(#inputs)
+ -- return average f(X)
+ self.output = 0
+ for t = 1,P do
+ self.output = self.output + outputsPartial[t]
+ end
+ return self.output/#inputs
+ end
+ -- (3) reset workers so they're ready for next mini-batch
+ parallel.children:send('break')
+ -- (4) return current output after optimization
+ return self.output/#inputs
+function Batch:setup_mapreduce ()
+ -- (0) startup parallel package
+ if not xrequire 'parallel' then
+ xerror('install parallel for Lua to enable parallel computing (luarocks install parallel)',
+ 'nn.BatchOptimization')
+ end
+ parallel.setSharedSize(4*1024*1024)
+ local P = self.parallelize
+ -- (1) define code for workers
+ local worker_code = [[
+ -- require packages
+ require 'nnx'
+ -- retrieve module + criterion at startup
+ module = parallel.parent:receive()
+ criterion = parallel.parent:receive()
+ -- create fake optimizer, for hooks
+ optimizer = {module=module, criterion=criterion}
+ -- retrieve optional prehook/posthook
+ prehook = parallel.parent:receive()
+ posthook = parallel.parent:receive()
+ if prehook ~= '' then loadstring(prehook)() else prehook = nil end
+ if posthook ~= '' then loadstring(posthook)() else posthook = nil end
+ -- get pointer to parameter and gradParameter vectors
+ parameters = nnx.flattenParameters(nnx.getParameters(module))
+ gradParameters = nnx.flattenParameters(nnx.getGradParameters(module))
+ -- outter loop: mini-batches
+ while true do
+ -- receive new mini-batch
+ inputs = parallel.parent:receive()
+ if type(inputs) == 'string' and inputs == 'break' then break end
+ targets = parallel.parent:receive()
+ options = parallel.parent:receive()
+ -- inner loop: evaluations
+ while true do
+ -- receive new set of parameters
+ newParameters = parallel.parent:receive()
+ if type(newParameters) == 'string' and newParameters == 'break' then break end
+ parameters:copy(newParameters)
+ -- reset gradients
+ gradParameters:zero()
+ -- f is the average of all criterions
+ local f_x = 0
+ -- evaluate gradients on inputs for this thread
+ for i = 1,#inputs do
+ -- user hook
+ if prehook then
+ prehook(optimizer, {inputs[i], targets[i], options[i]})
+ end
+ -- estimate f
+ local output = module:forward(inputs[i])
+ local err = criterion:forward(output, targets[i])
+ f_x = f_x + err
+ -- estimate df/dW
+ local df_do = criterion:backward(output, targets[i])
+ module:backward(inputs[i], df_do)
+ -- user hook
+ if posthook then
+ posthook(optimizer, {inputs[i], targets[i], options[i]})
+ end
+ end
+ -- now send back gradParameters + partial output
+ parallel.parent:send(gradParameters)
+ parallel.parent:send(f_x)
+ -- force cleanup
+ collectgarbage()
+ end
+ end
+ ]]
+ -- (2) startup all workers
+ for t = 1,P do
+ parallel.run(worker_code)
+ end
+ -- (3) and send them the module + criterion architecture
+ parallel.children:send(self.module)
+ parallel.children:send(self.criterion)
diff --git a/BatchTrainer.lua b/BatchTrainer.lua
new file mode 100644
index 0000000..ab5ba92
--- /dev/null
+++ b/BatchTrainer.lua
@@ -0,0 +1,127 @@
+local BatchTrainer, parent = torch.class('nn.BatchTrainer', 'nn.OnlineTrainer')
+-- Essentially simialar to the OnlineTrainer but only used the parts
+-- of the code which prepare the data and the tester. train() has been
+-- replaced by nextBatch() which moves the trainer one batch further
+-- in the data. When the first epoch is finished then the batches are
+-- reused. Each call to optimizer.forward() in nextBatch() creates a
+-- closure with the current batch as input.
+function BatchTrainer:__init(...)
+ local args = {...}
+ parent.__init(self, args)
+ -- unpack args
+ xlua.unpack_class(
+ self, args,
+ 'BatchTrainer',
+ 'A modified version of the general-purpose online trainer class.\n'
+ .. ' which only preps the input batch and calls optimizer to\n'
+ .. ' create a closure\n',
+ {arg='trainset', type='nn.DataList',
+ help='dataset from which to draw batches', req=true},
+ {arg='module', type='nn.Module', help='a module to train', req=true},
+ {arg='criterion', type='nn.Criterion',
+ help='a criterion to estimate the error'},
+ {arg='preprocessor', type='nn.Module',
+ help='a preprocessor to prime the data before the module'},
+ {arg='optimizer', type='nn.Optimization',
+ help='an optimization method'},
+ {arg='batchSize', type='number',
+ help='[mini] batch size', default=1},
+ {arg='maxEpoch', type='number',
+ help='maximum number of epochs', default=50},
+ {arg='dispProgress', type='boolean',
+ help='display a progress bar during training/testing', default=true},
+ {arg='save', type='string',
+ help='path to save networks and log training'},
+ {arg='timestamp', type='boolean',
+ help='if true, appends a timestamp to each network saved', default=false}
+ )
+ self.epoch = 1
+ self.batch = nil
+ self.trainOffset = nil
+-- update the counters
+function BatchTrainer:next()
+ if not self.batch then
+ self.batch = 1
+ else
+ self.batch = self.batch + 1
+ end
+ if not self.trainOffset then
+ self.trainOffset = 1
+ else
+ self.trainOffset = self.trainOffset + self.batchSize
+ if self.trainOffset > self.trainset:size() then
+ self.trainOffset = 1
+ self.epoch = self.epoch + 1
+ self.batch = 1
+ if self.hookTrainEpoch then
+ self.hookTrainEpoch(self)
+ end
+ if self.save then self:log() end
+ end
+ end
+ -- disp progress
+ if self.dispProgress then
+ xlua.progress(self.trainOffset, self.trainset:size())
+ end
+-- this function is called train() in the online trainer. I seems to
+-- make more sense to call it next_batch() here as the training is
+-- done outside of this code.
+function BatchTrainer:nextBatch()
+ self:next()
+ local module = self.module
+ local criterion = self.criterion
+ local t = self.trainOffset
+ local ds = self.trainset:size()
+ local bs = self.batchSize
+ print('<trainer> on training set:')
+ print("<trainer> online epoch # " .. self.epoch
+ .. ' batch # '..self.batch
+ .. ' [batchSize = ' .. self.batchSize .. ']')
+ -- create mini batch
+ self.inputs = self.inputs or {}
+ self.targets = self.targets or {}
+ local inputs = {}
+ local targets = {}
+ if not self.inputs[self.batch] then
+ self.inputs[self.batch] = {}
+ inputs = self.inputs[self.batch]
+ self.targets[self.batch] = {}
+ targets = self.targets[self.batch]
+ for i = t,math.min(t+bs-1,ds) do
+ -- load new sample
+ local sample = self.trainset[t]
+ local input = sample[1]
+ local target = sample[2]
+ -- optional preprocess (no learning is done for that guy)
+ if self.preprocessor then input = self.preprocessor:forward(input) end
+ -- store input/target
+ table.insert(inputs, input)
+ table.insert(targets, target)
+ end
+ else
+ -- get batch from cache
+ inputs = self.inputs[self.batch]
+ targets = self.targets[self.batch]
+ end
+ -- set up closure batch.evaluate() for optimizer
+ local error = self.optimizer:forward(inputs, targets)
diff --git a/init.lua b/init.lua
index ea44de0..6b09afc 100644
--- a/init.lua
+++ b/init.lua
@@ -102,10 +102,12 @@ torch.include('nnx', 'SpatialSparseCriterion.lua')
torch.include('nnx', 'Optimization.lua')
torch.include('nnx', 'SGDOptimization.lua')
torch.include('nnx', 'LBFGSOptimization.lua')
+torch.include('nnx', 'BatchOptimization.lua')
-- trainers:
torch.include('nnx', 'Trainer.lua')
torch.include('nnx', 'OnlineTrainer.lua')
+torch.include('nnx', 'BatchTrainer.lua')
-- datasets:
torch.include('nnx', 'DataSet.lua')
diff --git a/nnx-1.0-1.rockspec b/nnx-1.0-1.rockspec
index 3af08d0..dcb8d1b 100644
--- a/nnx-1.0-1.rockspec
+++ b/nnx-1.0-1.rockspec
@@ -103,6 +103,8 @@ build = {
install_files(/lua/nnx Optimization.lua)
install_files(/lua/nnx LBFGSOptimization.lua)
install_files(/lua/nnx SGDOptimization.lua)
+ install_files(/lua/nnx BatchOptimization.lua)
+ install_files(/lua/nnx BatchTrainer.lua)
add_subdirectory (test)
install_targets(/lib nnx)