diff options
author | Marc Scoffier <mps309@cassio.cs.nyu.edu> | 2011-08-31 06:35:25 +0400 |
---|---|---|
committer | Marc Scoffier <mps309@cassio.cs.nyu.edu> | 2011-08-31 06:35:25 +0400 |
commit | 879c7f87c2e2727b5479c9e97a97aed59edfab13 (patch) | |
tree | be565e772c374a6d9b9317b226f1e346692f1fbf | |
parent | 470b00f1bbcf4b8d4ef671b703b8e6fa3209344a (diff) | |
parent | 987894db868ed9b5ad0cd746a8c3569985acd71d (diff) |
preliminary Batch modules for matlab optimization
Conflicts:
OnlineTrainer.lua
-rw-r--r-- | BatchOptimization.lua | 277 | ||||
-rw-r--r-- | BatchTrainer.lua | 120 | ||||
-rw-r--r-- | FindTorch.cmake | 6 | ||||
-rw-r--r-- | OnlineTrainer.lua | 34 | ||||
-rw-r--r-- | Trainer.lua | 7 | ||||
-rw-r--r-- | init.lua | 2 | ||||
-rw-r--r-- | nnx-1.0-1.rockspec | 2 |
7 files changed, 406 insertions, 42 deletions
diff --git a/BatchOptimization.lua b/BatchOptimization.lua new file mode 100644 index 0000000..93cbd2a --- /dev/null +++ b/BatchOptimization.lua @@ -0,0 +1,277 @@ +local Batch,parent = torch.class('nn.BatchOptimization', 'nn.Optimization') + +-- this is a generic class for any batch optimization modeled after +-- the LBFGS optimization. It simply provides a batch.evaluate() method +-- which creates a self.parameters and self.gradParameters from your +-- self.model + +function Batch:__init(...) + parent.__init(self) + xlua.unpack_class(self, {...}, + 'BatchOptimization', nil, + {arg='module', type='nn.Module', help='a module to train', req=true}, + {arg='criterion', type='nn.Criterion', + help='a criterion to estimate the error', req=true}, + {arg='maxIterations', type='number', + help='maximum nb of iterations per pass (0 = no max)', default=0}, + {arg='maxLineSearch', type='number', + help='maximum nb of steps in line search', default=20}, + {arg='sparsity', type='number', + help='sparsity coef (Orthantwise C)', default=0}, + {arg='parallelize', type='number', + help='parallelize onto N cores (experimental!)', default=1}, + {arg='verbose', type='number', + help='verbose level during training [0-2]', default=0} + ) + self.parameters = nnx.flattenParameters(nnx.getParameters(self.module)) + self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module)) + if self.parallelize > 1 then + self:setup_mapreduce() + end +end + +function Batch:forward(inputs, targets, options) + options = options or {} + if self.parallelize > 1 then + return self:forward_mapreduce(inputs, targets, options) + else + return self:forward_sequential(inputs, targets, options) + end +end + +function Batch:forward_sequential(inputs, targets, options) + -- (1) construct a closure that compute f(inputs) + df/dW + -- after each call to that function: + -- + self.parameters contains the current X vector + -- + self.gradParameters contains the estimated dF/dX vector + -- + self.output contains the estimated (average) F(X) + batch.evaluate + = function() + -- reset gradients + self.gradParameters:zero() + -- f is the average of all criterions + self.output = 0 + -- given all inputs, evaluate gradients + for i = 1,#inputs do + -- user hook + if self.prehook then + self.prehook(self, {inputs[i], targets[i], options[i]}) + end + -- estimate f + local output = self.module:forward(inputs[i]) + local err = self.criterion:forward(output, targets[i]) + self.output = self.output + err + -- estimate df/dW + local df_do = self.criterion:backward(output, targets[i]) + self.module:backward(inputs[i], df_do) + -- user hook + if self.posthook then + self.posthook(self, {inputs[i], targets[i], options[i]}) + end + end + -- normalize gradients + self.gradParameters:div(#inputs) + -- return average f(X) + return self.output/#inputs + end + + -- (3) return current output after optimization + return self.output +end + +function Batch:forward_mapreduce(inputs, targets, options) + -- parameters + local P = self.parallelize + + -- transmit user hooks, if defined + if not self.hooksets then + if self.prehook then + if type(self.prehook) == 'string' then + parallel.children:send(self.prehook) + else + print('\r<BatchOptimization> WARNING: when using para||el mode, hooks should be') + print('\r<BatchOptimization> WARNING: defined as strings. User prehook ignored.') + parallel.children:send('') + end + else + parallel.children:send('') + end + if self.posthook then + if type(self.posthook) == 'string' then + parallel.children:send(self.posthook) + else + print('\r<BatchOptimization> WARNING: when using para||el mode, hooks should be') + print('<\rBatchOptimization> WARNING: defined as strings. User posthook ignored.') + parallel.children:send('') + end + else + parallel.children:send('') + end + self.hooksets = true + end + + -- (0a) replicate output and gradParameters + local outputsPartial = {} + local gradParametersPartial = {} + + -- (0b) divide input/target batch into N batches + local inputss = {} + local targetss = {} + local optionss = {} + for t = 1,P do + inputss[t] = {} + targetss[t] = {} + optionss[t] = {} + for i = t,#inputs,P do + table.insert(inputss[t], inputs[i]) + table.insert(targetss[t], targets[i]) + if options then table.insert(optionss[t], options[i]) end + end + end + + -- (0c) send mini-batch to all workers + for t = 1,P do + parallel.children[t]:send(inputss[t]) + parallel.children[t]:send(targetss[t]) + parallel.children[t]:send(optionss[t]) + end + + -- (1) construct a closure that compute f(inputs) + df/dW + -- after each call to that function: + -- + self.parameters contains the current X vector + -- + self.gradParameters contains the estimated dF/dX vector + -- + self.output contains the estimated (average) F(X) + batch.evaluate + = function() + batch.evaluate_map() + return batch.evaluate_reduce() + end + + -- (1a) the map part of the evaluation: compute partial gradients + -- in separate threads + batch.evaluate_map + = function() + -- transmit new parameters to all workers + parallel.children:send(self.parameters) + -- then wait for all workers to return their partial gradParameters + outputs + for t = 1,P do + gradParametersPartial[t] = parallel.children[t]:receive() + outputsPartial[t] = parallel.children[t]:receive() + end + -- force cleanup + collectgarbage() + end + + -- (1b) the reduce part of the evaluation: accumulate all + -- partial estimates of the gradients + batch.evaluate_reduce + = function() + -- accumulate partial gradients, and average + self.gradParameters:zero() + for t = 1,P do + self.gradParameters:add(gradParametersPartial[t]) + end + self.gradParameters:div(#inputs) + -- return average f(X) + self.output = 0 + for t = 1,P do + self.output = self.output + outputsPartial[t] + end + return self.output/#inputs + end + + -- (3) reset workers so they're ready for next mini-batch + parallel.children:send('break') + + -- (4) return current output after optimization + return self.output/#inputs +end + +function Batch:setup_mapreduce () + -- (0) startup parallel package + if not xrequire 'parallel' then + xerror('install parallel for Lua to enable parallel computing (luarocks install parallel)', + 'nn.BatchOptimization') + end + parallel.setSharedSize(4*1024*1024) + local P = self.parallelize + + -- (1) define code for workers + local worker_code = [[ + -- require packages + require 'nnx' + + -- retrieve module + criterion at startup + module = parallel.parent:receive() + criterion = parallel.parent:receive() + + -- create fake optimizer, for hooks + optimizer = {module=module, criterion=criterion} + + -- retrieve optional prehook/posthook + prehook = parallel.parent:receive() + posthook = parallel.parent:receive() + if prehook ~= '' then loadstring(prehook)() else prehook = nil end + if posthook ~= '' then loadstring(posthook)() else posthook = nil end + + -- get pointer to parameter and gradParameter vectors + parameters = nnx.flattenParameters(nnx.getParameters(module)) + gradParameters = nnx.flattenParameters(nnx.getGradParameters(module)) + + -- outter loop: mini-batches + while true do + -- receive new mini-batch + inputs = parallel.parent:receive() + if type(inputs) == 'string' and inputs == 'break' then break end + targets = parallel.parent:receive() + options = parallel.parent:receive() + + -- inner loop: evaluations + while true do + -- receive new set of parameters + newParameters = parallel.parent:receive() + if type(newParameters) == 'string' and newParameters == 'break' then break end + parameters:copy(newParameters) + + -- reset gradients + gradParameters:zero() + -- f is the average of all criterions + local f_x = 0 + -- evaluate gradients on inputs for this thread + for i = 1,#inputs do + -- user hook + if prehook then + prehook(optimizer, {inputs[i], targets[i], options[i]}) + end + -- estimate f + local output = module:forward(inputs[i]) + local err = criterion:forward(output, targets[i]) + f_x = f_x + err + -- estimate df/dW + local df_do = criterion:backward(output, targets[i]) + module:backward(inputs[i], df_do) + -- user hook + if posthook then + posthook(optimizer, {inputs[i], targets[i], options[i]}) + end + end + + -- now send back gradParameters + partial output + parallel.parent:send(gradParameters) + parallel.parent:send(f_x) + + -- force cleanup + collectgarbage() + end + end + ]] + + -- (2) startup all workers + for t = 1,P do + parallel.run(worker_code) + end + + -- (3) and send them the module + criterion architecture + parallel.children:send(self.module) + parallel.children:send(self.criterion) +end diff --git a/BatchTrainer.lua b/BatchTrainer.lua new file mode 100644 index 0000000..b774a9b --- /dev/null +++ b/BatchTrainer.lua @@ -0,0 +1,120 @@ +local BatchTrainer, parent = torch.class('nn.BatchTrainer', 'nn.OnlineTrainer') + +-- Essentially simialar to the OnlineTrainer but only used the parts +-- of the code which prepare the data and the tester. train() has been +-- replaced by nextBatch() which moves the trainer one batch further +-- in the data. When the first epoch is finished then the batches are +-- reused. Each call to optimizer.forward() in nextBatch() creates a +-- closure with the current batch as input. + +function BatchTrainer:__init(...) + -- parent.__init(self) + -- unpack args + xlua.unpack_class(self, {...}, + 'BatchTrainer', + + 'A general-purpose online trainer class.\n' + .. 'Provides 4 user hooks to perform extra work after each sample, or each epoch:\n' + .. '> trainer = nn.BatchTrainer(...) \n' + .. '> trainer.hookTrainSample = function(trainer, sample) ... end \n' + .. '> trainer.hookTrainEpoch = function(trainer) ... end \n' + .. '> trainer.hookTestSample = function(trainer, sample) ... end \n' + .. '> trainer.hookTestEpoch = function(trainer) ... end \n' + .. '> ', + + {arg='trainset', type='nn.DataList', + help='dataset to split into batches for closures',req=true}, + {arg='module', type='nn.Module', help='a module to train', req=true}, + {arg='criterion', type='nn.Criterion', + help='a criterion to estimate the error'}, + {arg='preprocessor', type='nn.Module', + help='a preprocessor to prime the data before the module'}, + {arg='optimizer', type='nn.Optimization', + help='an optimization method'}, + {arg='batchSize', type='number', + help='[mini] batch size', default=1}, + {arg='maxEpoch', type='number', + help='maximum number of epochs', default=50}, + {arg='dispProgress', type='boolean', + help='display a progress bar during training/testing', default=true}, + {arg='save', type='string', + help='path to save networks and log training'}, + {arg='timestamp', type='boolean', + help='if true, appends a timestamp to each network saved', default=false} + ) + -- private params + self.trainOffset = -self.batchSize + self.testOffset = 0 + + -- counters + self.epoch = 1 + self.batch = 0 +end + +-- update the counters +function BatchTrainer:next() + self.batch = self.batch + 1 + self.trainOffset = self.trainOffset + self.batchSize + if self.trainOffset > self.trainset:size()-1 then + self.trainOffset = 1 + self.epoch = self.epoch + 1 + self.batch = 1 + end + -- disp progress + if self.dispProgress then + xlua.progress(self.trainOffset, trainset:size()) + end + +end + +-- this function is called train() in the online trainer. I seems to +-- make more sense to call it next_batch() here as the training is +-- done outside of this code. + +function BatchTrainer:next_batch() + self.next() + local module = self.module + local criterion = self.criterion + local t = self.trainOffset + local ds = self.trainset:size() + local bs = self.batchSize + + print('<trainer> on training set:') + print("<trainer> online epoch # " .. self.epoch .. ' batch # '..self.batch.. '[batchSize = ' .. self.batchSize .. ']') + + -- create mini batch + self.inputs = self.inputs or {} + self.targets = self.targets or {} + local inputs = {} + local targets = {} + if not self.inputs[self.batch] then + + self.inputs[self.batch] = {} + inputs = self.inputs[self.batch] + self.targets[self.batch] = {} + targets = self.targets[self.batch] + + for i = t,math.min(t+bs-1,ds) do + -- load new sample + local sample = trainset[t + i] + local input = sample[1] + local target = sample[2] + + -- optional preprocess (no learning is done for that guy) + if self.preprocessor then input = self.preprocessor:forward(input) end + + -- store input/target + table.insert(inputs, input) + table.insert(targets, target) + end + else + -- get batch from cache + inputs = self.inputs[self.batch] + targets = self.targets[self.batch] + end + + -- set up closure batch.evaluate() for optimizer + local error = self.optimizer:forward(inputs, targets) +end + + diff --git a/FindTorch.cmake b/FindTorch.cmake index 8ada8cc..6658d42 100644 --- a/FindTorch.cmake +++ b/FindTorch.cmake @@ -13,9 +13,9 @@ if (TORCH_EXECUTABLE) get_filename_component (TORCH_BIN_DIR ${TORCH_EXECUTABLE} PATH) endif (TORCH_EXECUTABLE) -find_library (TORCH_TH TH ${TORCH_BIN_DIR}/../lib) -find_library (TORCH_luaT luaT ${TORCH_BIN_DIR}/../lib) -find_library (TORCH_lua lua ${TORCH_BIN_DIR}/../lib) +find_library (TORCH_TH TH ${TORCH_BIN_DIR}/../lib NO_DEFAULT_PATH) +find_library (TORCH_luaT luaT ${TORCH_BIN_DIR}/../lib NO_DEFAULT_PATH) +find_library (TORCH_lua lua ${TORCH_BIN_DIR}/../lib NO_DEFAULT_PATH) set (TORCH_LIBRARIES ${TORCH_TH} ${TORCH_luaT} ${TORCH_lua}) diff --git a/OnlineTrainer.lua b/OnlineTrainer.lua index f213924..dc6e860 100644 --- a/OnlineTrainer.lua +++ b/OnlineTrainer.lua @@ -33,9 +33,6 @@ function OnlineTrainer:__init(...) {arg='timestamp', type='boolean', help='if true, appends a timestamp to each network saved', default=false} ) - -- private params - self.trainOffset = 0 - self.testOffset = 0 end function OnlineTrainer:log() @@ -63,15 +60,6 @@ function OnlineTrainer:train(dataset) local criterion = self.criterion self.trainset = dataset - local shuffledIndices = {} - if not self.shuffleIndices then - for t = 1,dataset:size() do - shuffledIndices[t] = t - end - else - shuffledIndices = lab.randperm(dataset:size()) - end - while true do print('<trainer> on training set:') print("<trainer> online epoch # " .. self.epoch .. ' [batchSize = ' .. self.batchSize .. ']') @@ -89,7 +77,7 @@ function OnlineTrainer:train(dataset) local targets = {} for i = t,math.min(t+self.batchSize-1,dataset:size()) do -- load new sample - local sample = dataset[shuffledIndices[self.trainOffset + i]] + local sample = dataset[i] local input = sample[1] local target = sample[2] @@ -128,10 +116,6 @@ function OnlineTrainer:train(dataset) self.epoch = self.epoch + 1 - if dataset.infiniteSet then - self.trainOffset = self.trainOffset + dataset:size() - end - if self.maxEpoch > 0 and self.epoch > self.maxEpoch then print("<trainer> you have reached the maximum number of epochs") break @@ -144,20 +128,10 @@ function OnlineTrainer:test(dataset) print('<trainer> on testing Set:') local module = self.module - local shuffledIndices = {} local criterion = self.criterion self.currentError = 0 self.testset = dataset - local shuffledIndices = {} - if not self.shuffleIndices then - for t = 1,dataset:size() do - shuffledIndices[t] = t - end - else - shuffledIndices = lab.randperm(dataset:size()) - end - self.time = sys.clock() for t = 1,dataset:size() do -- disp progress @@ -166,7 +140,7 @@ function OnlineTrainer:test(dataset) end -- get new sample - local sample = dataset[shuffledIndices[self.testOffset + t]] + local sample = dataset[t] local input = sample[1] local target = sample[2] @@ -197,10 +171,6 @@ function OnlineTrainer:test(dataset) self.hookTestEpoch(self) end - if dataset.infiniteSet then - self.testOffset = self.testOffset + dataset:size() - end - return self.currentError end diff --git a/Trainer.lua b/Trainer.lua index 3388ef7..b7da770 100644 --- a/Trainer.lua +++ b/Trainer.lua @@ -4,7 +4,6 @@ function Trainer:__init() self.learningRate = 0.01 self.learningRateDecay = 0 self.maxIteration = 25 - self.shuffleIndices = true end function Trainer:train(dataset) @@ -14,14 +13,12 @@ function Trainer:write(file) file:writeDouble(self.learningRate) file:writeDouble(self.learningRateDecay) file:writeInt(self.maxIteration) - file:writeBool(self.shuffleIndices) end function Trainer:read(file) self.learningRate = file:readDouble() self.learningRateDecay = file:readDouble() self.maxIteration = file:readInt() - self.shuffleIndices = file:readBool() end function Trainer:share(mlp, ...) @@ -30,10 +27,6 @@ function Trainer:share(mlp, ...) end end -function Trainer:setShuffle(bool) - self.shuffleIndices = bool -end - function Trainer:clone(...) local f = torch.MemoryFile("rw"):binary() f:writeObject(self) @@ -102,10 +102,12 @@ torch.include('nnx', 'SpatialSparseCriterion.lua') torch.include('nnx', 'Optimization.lua') torch.include('nnx', 'SGDOptimization.lua') torch.include('nnx', 'LBFGSOptimization.lua') +torch.include('nnx', 'BatchOptimization.lua') -- trainers: torch.include('nnx', 'Trainer.lua') torch.include('nnx', 'OnlineTrainer.lua') +torch.include('nnx', 'BatchTrainer.lua') -- datasets: torch.include('nnx', 'DataSet.lua') diff --git a/nnx-1.0-1.rockspec b/nnx-1.0-1.rockspec index 3af08d0..dcb8d1b 100644 --- a/nnx-1.0-1.rockspec +++ b/nnx-1.0-1.rockspec @@ -103,6 +103,8 @@ build = { install_files(/lua/nnx Optimization.lua) install_files(/lua/nnx LBFGSOptimization.lua) install_files(/lua/nnx SGDOptimization.lua) + install_files(/lua/nnx BatchOptimization.lua) + install_files(/lua/nnx BatchTrainer.lua) add_subdirectory (test) install_targets(/lib nnx) ]], |