diff options
author | Marco Scoffier <github@metm.org> | 2011-09-05 23:39:02 +0400 |
---|---|---|
committer | Marco Scoffier <github@metm.org> | 2011-09-05 23:39:02 +0400 |
commit | 44f44def4ec8f31516c0b4085c342a23a4e7a5f5 (patch) | |
tree | 68d0ee3b323b66c4d895df7f74b592eb4e997bf2 | |
parent | 58a8750d6abaef6c5eb0961f81b1460349ab53fe (diff) | |
parent | 3804abd138d4898ca5576e17cac504ff6fa8d459 (diff) |
Merge branch 'master' into cleanDataSetcleanDataSet
-rw-r--r-- | BatchOptimization.lua | 320 | ||||
-rw-r--r-- | BatchTrainer.lua | 170 | ||||
-rw-r--r-- | ConfusionMatrix.lua | 22 | ||||
-rw-r--r-- | DataList.lua | 19 | ||||
-rw-r--r-- | DataSetLabelMe.lua | 12 | ||||
-rw-r--r-- | DistNLLCriterion.lua | 81 | ||||
-rw-r--r-- | FindTorch.cmake | 6 | ||||
-rw-r--r-- | LBFGSOptimization.lua | 87 | ||||
-rw-r--r-- | OnlineTrainer.lua | 61 | ||||
-rw-r--r-- | Optimization.lua | 49 | ||||
-rw-r--r-- | README.md | 67 | ||||
-rw-r--r-- | README.txt | 13 | ||||
-rw-r--r-- | SGDOptimization.lua | 103 | ||||
-rw-r--r-- | StochasticTrainer.lua | 265 | ||||
-rw-r--r-- | Trainer.lua | 7 | ||||
-rw-r--r-- | init.lua | 30 | ||||
-rw-r--r-- | lbfgs.c | 72 | ||||
-rw-r--r-- | nnx-1.0-1.rockspec | 4 | ||||
-rw-r--r-- | test/test-all.lua | 87 |
19 files changed, 838 insertions, 637 deletions
diff --git a/BatchOptimization.lua b/BatchOptimization.lua new file mode 100644 index 0000000..f5feb3a --- /dev/null +++ b/BatchOptimization.lua @@ -0,0 +1,320 @@ +local Batch,parent = torch.class('nn.BatchOptimization', 'nn.Optimization') + +-- this is a generic class for any batch optimization modeled after +-- the LBFGS optimization. It simply provides a batch.evaluate() method +-- which creates a self.parameters and self.gradParameters from your +-- self.model + +function Batch:__init(...) + parent.__init(self) + xlua.unpack_class(self, {...}, + 'BatchOptimization', nil, + {arg='module', type='nn.Module', help='a module to train', req=true}, + {arg='criterion', type='nn.Criterion', + help='a criterion to estimate the error', req=true}, + {arg='parallelize', type='number', + help='parallelize onto N cores (experimental!)', default=1}, + {arg='verbose', type='number', + help='verbose level during training [0-2]', default=0} + ) + self.parameters = nnx.flattenParameters(nnx.getParameters(self.module)) + self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module)) + self.evalCounter = 0 + self.sampleCounter = 0 + if self.parallelize > 1 then + self:setup_mapreduce() + end + self.P = self.parallelize +end + +function Batch:forward(inputs, targets, options) + options = options or {} + if self.P > 1 then + return self:forward_mapreduce(inputs, targets, options) + else + return self:forward_sequential(inputs, targets, options) + end +end + +function Batch:forward_sequential(inputs, targets, options) + -- (1) construct a closure that compute f(inputs) + df/dW + -- after each call to that function: + -- + self.parameters contains the current X vector + -- + self.gradParameters contains the estimated dF/dX vector + -- + self.output contains the estimated (average) F(X) + self.evaluate + = function() + -- verbose + if self.verbose >= 2 then + print('<BatchOptimization> evaluating f(X) + df/dX') + end + local _t_ = sys.clock() + -- reset gradients + self.gradParameters:zero() + -- f is the average of all criterions + self.output = 0 + -- given all inputs, evaluate gradients + for i = 1,#inputs do + -- user hook + if self.prehook then + self.prehook(self, {inputs[i], targets[i], options[i]}) + end + -- estimate f + local output = self.module:forward(inputs[i]) + local err = self.criterion:forward(output, targets[i]) + self.output = self.output + err + -- estimate df/dW + local df_do = self.criterion:backward(output, targets[i]) + self.module:backward(inputs[i], df_do) + -- user hook + if self.posthook then + self.posthook(self, {inputs[i], targets[i], options[i]}) + end + end + -- update evaluation counter + self.evalCounter = self.evalCounter + 1 + -- normalize gradients + self.gradParameters:div(#inputs) + -- verbose + if self.verbose >= 2 then + print('<BatchOptimization> ' .. self.evalCounter .. 'th evaluation took ' .. (sys.clock() - _t_) .. ' sec') + end + -- return average f(X) + self.output = self.output/#inputs + return self.output + end + + -- (2) optimization callback + if self.optimize then + self:optimize() + end + + -- (3) update sample counter + self.sampleCounter = self.sampleCounter + #inputs + + -- (4) return current output after optimization + return self.output +end + +function Batch:forward_mapreduce(inputs, targets, options) + -- parameters + local P = self.P + + -- transmit user hooks, if defined + if not self.hooksets then + if self.prehook then + if type(self.prehook) == 'string' then + parallel.children:send(self.prehook) + else + print('\r<BatchOptimization> WARNING: when using para||el mode,'.. + ' hooks should be defined as strings. User prehook ignored.') + parallel.children:send('') + end + else + parallel.children:send('') + end + if self.posthook then + if type(self.posthook) == 'string' then + parallel.children:send(self.posthook) + else + print('\r<BatchOptimization> WARNING: when using para||el mode,'.. + ' hooks should be defined as strings. User posthook ignored.') + parallel.children:send('') + end + else + parallel.children:send('') + end + self.hooksets = true + end + + -- (0a) replicate output and gradParameters + local outputsPartial = {} + local gradParametersPartial = {} + + -- (0b) divide input/target batch into N batches + local inputss = {} + local targetss = {} + local optionss = {} + for t = 1,P do + inputss[t] = {} + targetss[t] = {} + optionss[t] = {} + for i = t,#inputs,P do + table.insert(inputss[t], inputs[i]) + table.insert(targetss[t], targets[i]) + if options then table.insert(optionss[t], options[i]) end + end + end + + -- (0c) send mini-batch to all workers + for t = 1,P do + parallel.children[t]:join() + parallel.children[t]:send(inputss[t]) + parallel.children[t]:send(targetss[t]) + parallel.children[t]:send(optionss[t]) + end + + -- (1) construct a closure that compute f(inputs) + df/dW + -- after each call to that function: + -- + self.parameters contains the current X vector + -- + self.gradParameters contains the estimated dF/dX vector + -- + self.output contains the estimated (average) F(X) + self.evaluate + = function() + -- verbose + if self.verbose >= 2 then + print('<BatchOptimization> evaluating f(X) + df/dX') + end + local _t_ = sys.clock() + -- do map/reduce + self.evaluate_map() + self.evaluate_reduce() + -- update evaluation counter + self.evalCounter = self.evalCounter + 1 + -- verbose + if self.verbose >= 2 then + print('<BatchOptimization> ' .. self.evalCounter .. 'th evaluation took ' .. (sys.clock() - _t_) .. ' sec') + end + return self.output + end + + -- (1a) the map part of the evaluation: compute partial gradients + -- in separate threads + self.evaluate_map + = function() + -- transmit new parameters to all workers + parallel.children:join() + parallel.children:send(self.parameters) + -- then wait for all workers to return their partial gradParameters + outputs + gradParametersPartial = parallel.children:receive() + outputsPartial = parallel.children:receive() + -- force cleanup + collectgarbage() + end + + -- (1b) the reduce part of the evaluation: accumulate all + -- partial estimates of the gradients + self.evaluate_reduce + = function() + -- accumulate partial gradients, and average + self.gradParameters:zero() + for t = 1,P do + self.gradParameters:add(gradParametersPartial[t]) + end + self.gradParameters:div(#inputs) + -- return average f(X) + self.output = 0 + for t = 1,P do + self.output = self.output + outputsPartial[t] + end + self.output = self.output/#inputs + end + + if self.optimize then + -- (2) optimization callback + self:optimize() + + -- (3) reset workers so they're ready for next mini-batch + -- only do this when we have an optimization hook + parallel.children:join('break') + end + + -- (4) update sample counter + self.sampleCounter = self.sampleCounter + #inputs + + -- (5) return current output after optimization + return self.output +end + +function Batch:setup_mapreduce () + -- (0) startup parallel package + if not xrequire 'parallel' then + xerror('install parallel for Lua to enable parallel computing (luarocks install parallel)', + 'nn.BatchOptimization') + end + + -- (1) define code for workers + local worker_code = [[ + -- require packages + require 'nnx' + + -- retrieve module + criterion at startup + parallel.yield() + module = parallel.parent:receive() + criterion = parallel.parent:receive() + + -- create fake optimizer, for hooks + optimizer = {module=module, criterion=criterion} + + -- retrieve optional prehook/posthook + prehook = parallel.parent:receive() + posthook = parallel.parent:receive() + if prehook ~= '' then loadstring(prehook)() else prehook = nil end + if posthook ~= '' then loadstring(posthook)() else posthook = nil end + + -- get pointer to parameter and gradParameter vectors + parameters = nnx.flattenParameters(nnx.getParameters(module)) + gradParameters = nnx.flattenParameters(nnx.getGradParameters(module)) + + -- outter loop: mini-batches + while true do + -- sync + if parallel.yield() == 'break' then break end + + -- receive new mini-batch + inputs = parallel.parent:receive() + targets = parallel.parent:receive() + options = parallel.parent:receive() + + -- inner loop: evaluations + while true do + -- sync + if parallel.yield() == 'break' then break end + + -- receive new set of parameters + parameters:copy(parallel.parent:receive()) + + -- reset gradients + gradParameters:zero() + -- f is the average of all criterions + local f_x = 0 + -- evaluate gradients on inputs for this thread + for i = 1,#inputs do + -- user hook + if prehook then + prehook(optimizer, {inputs[i], targets[i], options[i]}) + end + -- estimate f + local output = module:forward(inputs[i]) + local err = criterion:forward(output, targets[i]) + f_x = f_x + err + -- estimate df/dW + local df_do = criterion:backward(output, targets[i]) + module:backward(inputs[i], df_do) + -- user hook + if posthook then + posthook(optimizer, {inputs[i], targets[i], options[i]}) + end + end + -- now send back gradParameters + partial output + parallel.parent:send(gradParameters) + parallel.parent:send(f_x) + -- force cleanup + collectgarbage() + end + end + ]] + + local setup = function() + -- (2) startup all workers + parallel.sfork(self.parallelize) + parallel.children:exec(worker_code) + + -- (3) and send them the module + criterion architecture + parallel.children:join() + parallel.children:send(self.module) + parallel.children:send(self.criterion) + end + local ok,err = pcall(setup) + if not ok then parallel.close() error(err) end +end diff --git a/BatchTrainer.lua b/BatchTrainer.lua new file mode 100644 index 0000000..a5b135d --- /dev/null +++ b/BatchTrainer.lua @@ -0,0 +1,170 @@ +local BatchTrainer, parent = torch.class('nn.BatchTrainer', 'nn.OnlineTrainer') + +-- Essentially simialar to the OnlineTrainer but only used the parts +-- of the code which prepare the data and the tester. train() has been +-- replaced by nextBatch() which moves the trainer one batch further +-- in the data. When the first epoch is finished then the batches are +-- reused. Each call to optimizer.forward() in nextBatch() creates a +-- closure with the current batch as input. + +function BatchTrainer:__init(...) + local args = {...} + parent.__init(self, args) + -- unpack args + xlua.unpack_class( + self, args, + 'BatchTrainer', + 'A modified version of the general-purpose online trainer class.\n' + .. ' which only preps the input batch and calls optimizer to\n' + .. ' create a closure\n', + {arg='trainset', type='nn.DataList', + help='dataset from which to draw batches', req=true}, + {arg='module', type='nn.Module', help='a module to train', req=true}, + {arg='criterion', type='nn.Criterion', + help='a criterion to estimate the error'}, + {arg='preprocessor', type='nn.Module', + help='a preprocessor to prime the data before the module'}, + {arg='optimizer', type='nn.Optimization', + help='an optimization method'}, + {arg='batchSize', type='number', + help='[mini] batch size', default=1}, + {arg='maxEpoch', type='number', + help='maximum number of epochs', default=50}, + {arg='dispProgress', type='boolean', + help='display a progress bar during training/testing', default=true}, + {arg='save', type='string', + help='path to save networks and log training'}, + {arg='timestamp', type='boolean', + help='if true, appends a timestamp to each network saved', default=false} + ) + self.epoch = 1 + self.batch = nil + self.trainOffset = nil +end + +-- update the counters +function BatchTrainer:next() + if not self.batch or not self.trainOffset then + -- initialize + self.batch = 1 + self.trainOffset = 1 + else + -- hook to run something on the current batch + -- (for eg. if you want to run a test on this batch before + -- switching to the next) + if self.hookTrainBatch then + self.hookTrainBatch(self) + end + + -- simple batch increment + self.batch = self.batch + 1 + self.trainOffset = self.trainOffset + self.batchSize + + -- test for new epoch + if self.trainOffset > self.trainset:size() then + + -- hook to run on current epoch before switching to next + if self.hookTrainEpoch then + self.hookTrainEpoch(self) + end + + if self.save then self:log() end + + self.trainOffset = 1 + self.epoch = self.epoch + 1 + self.batch = 1 + end + + -- on all but the first batch we need to reset the children + if optimizer.parallelize > 1 then + parallel.children:send('break') + end + + end + -- disp progress + if self.dispProgress then + xlua.progress(self.trainOffset, self.trainset:size()) + end + +end + +-- this function is called train() in the online trainer. I seems to +-- make more sense to call it next_batch() here as the training is +-- done outside of this code. + +function BatchTrainer:nextBatch() + self:next() + local module = self.module + local criterion = self.criterion + local t = self.trainOffset + local ds = self.trainset:size() + local bs = self.batchSize + + print('<trainer> on training set:') + print("<trainer> online epoch # " .. self.epoch + .. ' batch # '..self.batch + .. ' [batchSize = ' .. self.batchSize .. ']') + + -- create mini batch + self.inputs = self.inputs or {} + self.targets = self.targets or {} + local inputs = {} + local targets = {} + if not self.inputs[self.batch] then + + self.inputs[self.batch] = {} + inputs = self.inputs[self.batch] + self.targets[self.batch] = {} + targets = self.targets[self.batch] + + for i = t,math.min(t+bs-1,ds) do + -- load new sample + local sample = self.trainset[i] + local input = sample[1] + local target = sample[2] + + -- optional preprocess (no learning is done for that guy) + if self.preprocessor then input = self.preprocessor:forward(input) end + + -- store input/target + table.insert(inputs, input) + table.insert(targets, target) + end + else + -- get batch from cache + inputs = self.inputs[self.batch] + targets = self.targets[self.batch] + end + + -- set up closure batch.evaluate() for optimizer + local error = self.optimizer:forward(inputs, targets) + +end + +-- special test to just get results of current batch +function BatchTrainer:testBatch() + local criterion = self.criterion + local module = self.module + + local inputs = self.inputs[self.batch] + local targets = self.targets[self.batch] + + self.currentError = 0 + + for i = 1,#inputs do + local input = inputs[i] + local target = targets[i] + if criterion then + self.currentError = self.currentError + + criterion:forward(module:forward(input), target) + else + local _,error = module:forward(input, target) + self.currentError = self.currentError + error + end + -- user hook + if self.hookTestSample then + self.hookTestSample(self, {input, target}) + end + end +end + diff --git a/ConfusionMatrix.lua b/ConfusionMatrix.lua index 96c9aa4..ed3f000 100644 --- a/ConfusionMatrix.lua +++ b/ConfusionMatrix.lua @@ -11,7 +11,7 @@ function ConfusionMatrix:__init(nclasses, classes) self.nclasses = nclasses self.totalValid = 0 self.averageValid = 0 - self.classes = classes + self.classes = classes or {} end function ConfusionMatrix:add(prediction, target) @@ -74,7 +74,7 @@ function ConfusionMatrix:__tostring__() for p = 1,nclasses do str = str .. '' .. string.format('%8d', self.mat[t][p]) end - if self.classes then + if self.classes and self.classes[1] then if t == nclasses then str = str .. ']] ' .. pclass .. '% \t[class: ' .. (self.classes[t] or '') .. ']\n' else @@ -92,3 +92,21 @@ function ConfusionMatrix:__tostring__() str = str .. ' + global correct: ' .. (self.totalValid*100) .. '%' return str end + +function ConfusionMatrix:write(file) + file:writeObject(self.mat) + file:writeObject(self.valids) + file:writeInt(self.nclasses) + file:writeInt(self.totalValid) + file:writeInt(self.averageValid) + file:writeObject(self.classes) +end + +function ConfusionMatrix:read(file) + self.mat = file:readObject() + self.valids = file:readObject() + self.nclasses = file:readInt() + self.totalValid = file:readInt() + self.averageValid = file:readInt() + self.classes = file:readObject() +end diff --git a/DataList.lua b/DataList.lua index 99b117a..4922e8b 100644 --- a/DataList.lua +++ b/DataList.lua @@ -13,6 +13,8 @@ function DataList:__init() self.nbClass = 0 self.ClassName = {} self.nbSamples = 0 + self.targetIsProbability = false + self.spatialTarget = false end function DataList:__tostring__() @@ -30,8 +32,21 @@ function DataList:__index__(key) elmt = ((elmt-1) % classSize) + 1 -- create target vector on the fly - self.datasets[class][elmt][2] = torch.Tensor(1,1,self.nbClass):fill(-1) - self.datasets[class][elmt][2][1][1][class] = 1 + if self.spatialTarget then + if self.targetIsProbability then + self.datasets[class][elmt][2] = torch.Tensor(self.nbClass,1,1):zero() + else + self.datasets[class][elmt][2] = torch.Tensor(self.nbClass,1,1):fill(-1) + end + self.datasets[class][elmt][2][class][1][1] = 1 + else + if self.targetIsProbability then + self.datasets[class][elmt][2] = torch.Tensor(self.nbClass):zero() + else + self.datasets[class][elmt][2] = torch.Tensor(self.nbClass):fill(-1) + end + self.datasets[class][elmt][2][class] = 1 + end -- apply hook on sample local sample = self.datasets[class][elmt] diff --git a/DataSetLabelMe.lua b/DataSetLabelMe.lua index 6a9e4cf..629561a 100644 --- a/DataSetLabelMe.lua +++ b/DataSetLabelMe.lua @@ -275,15 +275,9 @@ end function DataSetLabelMe:loadSample(index) if self.preloadedDone then if index ~= self.currentIndex then - -- clean up - self.currentSample = nil - self.currentMask = nil - collectgarbage() -- load new sample - self.currentSample = torch.Tensor(self.preloaded.samples[index]:size()) - self.currentSample:copy(self.preloaded.samples[index]):mul(1/255) - self.currentMask = torch.Tensor(self.preloaded.masks[index]:size()) - self.currentMask:copy(self.preloaded.masks[index]) + self.currentSample = self.preloaded.samples[index] + self.currentMask = self.preloaded.masks[index] -- remember index self.currentIndex = index end @@ -385,7 +379,7 @@ function DataSetLabelMe:preload(saveFile) xlua.progress(i,self.nbRawSamples) -- load samples, and store them in raw byte tensors (min memory footprint) self:loadSample(i) - local rawTensor = torch.Tensor(self.currentSample:size()):copy(self.currentSample:mul(255)) + local rawTensor = torch.Tensor(self.currentSample:size()):copy(self.currentSample) local rawMask = torch.Tensor(self.currentMask:size()):copy(self.currentMask) -- insert them in our list table.insert(self.preloaded.samples, rawTensor) diff --git a/DistNLLCriterion.lua b/DistNLLCriterion.lua new file mode 100644 index 0000000..fedda1b --- /dev/null +++ b/DistNLLCriterion.lua @@ -0,0 +1,81 @@ +local DistNLLCriterion, parent = torch.class('nn.DistNLLCriterion', 'nn.Criterion') + +function DistNLLCriterion:__init() + parent.__init(self) + -- user options + self.inputIsProbability = false + self.inputIsLogProbability = false + self.targetIsProbability = false + -- internal + self.targetSoftMax = nn.SoftMax() + self.inputLogSoftMax = nn.LogSoftMax() + self.gradLogInput = torch.Tensor() +end + +function DistNLLCriterion:normalize(input, target) + -- normalize target + if not self.targetIsProbability then + self.probTarget = self.targetSoftMax:forward(target) + else + self.probTarget = target + end + + -- normalize input + if not self.inputIsLogProbability and not self.inputIsProbability then + self.logProbInput = self.inputLogSoftMax:forward(input) + elseif not self.inputIsLogProbability then + print('TODO: implement nn.Log()') + else + self.logProbInput = input + end +end + +function DistNLLCriterion:denormalize(input) + -- denormalize gradients + if not self.inputIsLogProbability and not self.inputIsProbability then + self.gradInput = self.inputLogSoftMax:backward(input, self.gradLogInput) + elseif not self.inputIsLogProbability then + print('TODO: implement nn.Log()') + else + self.gradInput = self.gradLogInput + end +end + +function DistNLLCriterion:forward(input, target) + self:normalize(input, target) + self.output = 0 + for i = 1,input:size(1) do + self.output = self.output - self.logProbInput[i] * self.probTarget[i] + end + return self.output +end + +function DistNLLCriterion:backward(input, target) + self:normalize(input, target) + self.gradLogInput:resizeAs(input) + for i = 1,input:size(1) do + self.gradLogInput[i] = -self.probTarget[i] + end + self:denormalize(input) + return self.gradInput +end + +function DistNLLCriterion:write(file) + parent.write(self, file) + file:writeBool(self.inputIsProbability) + file:writeBool(self.inputIsLogProbability) + file:writeBool(self.targetIsProbability) + file:writeObject(self.targetSoftMax) + file:writeObject(self.inputLogSoftMax) + file:writeObject(self.gradLogInput) +end + +function DistNLLCriterion:read(file) + parent.read(self, file) + self.inputIsProbability = file:readBool() + self.inputIsLogProbability = file:readBool() + self.targetIsProbability = file:readBool() + self.targetSoftMax = file:readObject() + self.inputLogSoftMax = file:readObject() + self.gradLogInput = file:readObject() +end diff --git a/FindTorch.cmake b/FindTorch.cmake index 8ada8cc..6658d42 100644 --- a/FindTorch.cmake +++ b/FindTorch.cmake @@ -13,9 +13,9 @@ if (TORCH_EXECUTABLE) get_filename_component (TORCH_BIN_DIR ${TORCH_EXECUTABLE} PATH) endif (TORCH_EXECUTABLE) -find_library (TORCH_TH TH ${TORCH_BIN_DIR}/../lib) -find_library (TORCH_luaT luaT ${TORCH_BIN_DIR}/../lib) -find_library (TORCH_lua lua ${TORCH_BIN_DIR}/../lib) +find_library (TORCH_TH TH ${TORCH_BIN_DIR}/../lib NO_DEFAULT_PATH) +find_library (TORCH_luaT luaT ${TORCH_BIN_DIR}/../lib NO_DEFAULT_PATH) +find_library (TORCH_lua lua ${TORCH_BIN_DIR}/../lib NO_DEFAULT_PATH) set (TORCH_LIBRARIES ${TORCH_TH} ${TORCH_luaT} ${TORCH_lua}) diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua index 83393d9..ad98139 100644 --- a/LBFGSOptimization.lua +++ b/LBFGSOptimization.lua @@ -1,75 +1,30 @@ -local LBFGS,parent = torch.class('nn.LBFGSOptimization', 'nn.Optimization') +local LBFGS,parent = torch.class('nn.LBFGSOptimization', 'nn.BatchOptimization') function LBFGS:__init(...) require 'liblbfgs' - parent.__init(self) + parent.__init(self, ...) xlua.unpack_class(self, {...}, 'LBFGSOptimization', nil, - {arg='module', type='nn.Module', help='a module to train', req=true}, - {arg='criterion', type='nn.Criterion', help='a criterion to estimate the error', req=true}, - {arg='maxIterations', type='number', help='maximum nb of iterations per pass (0 = no max)', default=0}, - {arg='maxLineSearch', type='number', help='maximum nb of steps in line search', default=20}, - {arg='sparsity', type='number', help='sparsity coef (Orthantwise C)', default=0}, - {arg='verbose', type='number', help='verbose level during training [0-2]', default=0} + {arg='maxEvaluation', type='number', + help='maximum nb of function evaluations per pass (0 = no max)', default=0}, + {arg='maxIterations', type='number', + help='maximum nb of iterations per pass (0 = no max)', default=0}, + {arg='maxLineSearch', type='number', + help='maximum nb of steps in line search', default=20}, + {arg='sparsity', type='number', + help='sparsity coef (Orthantwise C)', default=0}, + {arg='parallelize', type='number', + help='parallelize onto N cores (experimental!)', default=1} ) - self.parametersT = nnx.getParameters(self.module) - self.gradParametersT = nnx.getGradParameters(self.module) - lbfgs.verbose = self.verbose + self.parameters = nnx.flattenParameters(nnx.getParameters(self.module)) + self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module)) end -function LBFGS:forward(inputs, targets, options) - options = options or {} - -- (1) construct a closure that compute f(inputs) + df/dW - -- after each call to that function: - -- + self.parameters contains the current X vector - -- + self.gradParameters contains the estimated dF/dX vector - -- + self.output contains the estimated (average) F(X) - lbfgs.evaluate - = function() - -- set parameters from current state - self:unflatten(self.parametersT, self.gradParametersT) - -- reset gradients - self.module:zeroGradParameters() - -- f is the average of all criterions - self.output = 0 - -- given all inputs, evaluate gradients - for i = 1,#inputs do - -- user hook - if self.prehook then - self.prehook(self, {inputs[i], targets[i], options[i]}) - end - -- estimate f - local output = self.module:forward(inputs[i]) - local err = self.criterion:forward(output, targets[i]) - self.output = self.output + err - -- estimate df/dW - local df_do = self.criterion:backward(output, targets[i]) - self.module:backward(inputs[i], df_do) - -- user hook - if self.posthook then - self.posthook(self, {inputs[i], targets[i], options[i]}) - end - end - -- update state from computed parameters - self:flatten(self.parametersT, self.gradParametersT) - -- normalize gradients - self.gradParameters:div(#inputs) - -- return average f(X) - return self.output/#inputs - end - - -- (2) store current parameters/gradParameters - self:flatten(self.parametersT, self.gradParametersT) - - -- (3) the magic function: will update the parameter vector - -- according to the l-BFGS method - self.output = lbfgs.run(self.parameters, self.gradParameters, - self.maxIterations, self.maxLineSearch, - self.sparsity) - - -- (4) last: read parameters back into the model - self:unflatten(self.parametersT, self.gradParametersT) - - -- (5) return current output after optimization - return self.output +function LBFGS:optimize() + lbfgs.evaluate = self.evaluate + -- the magic function: will update the parameter vector + -- according to the l-BFGS method + self.output = lbfgs.run(self.parameters, self.gradParameters, + self.maxEvaluation, self.maxIterations, self.maxLineSearch, + self.sparsity, self.verbose) end diff --git a/OnlineTrainer.lua b/OnlineTrainer.lua index 2b7f2b5..dc6e860 100644 --- a/OnlineTrainer.lua +++ b/OnlineTrainer.lua @@ -16,19 +16,23 @@ function OnlineTrainer:__init(...) .. '> ', {arg='module', type='nn.Module', help='a module to train', req=true}, - {arg='criterion', type='nn.Criterion', help='a criterion to estimate the error'}, - {arg='preprocessor', type='nn.Module', help='a preprocessor to prime the data before the module'}, - {arg='optimizer', type='nn.Optimization', help='an optimization method'}, - - {arg='batchSize', type='number', help='[mini] batch size', default=1}, - {arg='maxEpoch', type='number', help='maximum number of epochs', default=50}, - {arg='dispProgress', type='boolean', help='display a progress bar during training/testing', default=true}, - {arg='save', type='string', help='path to save networks and log training'}, - {arg='timestamp', type='boolean', help='if true, appends a timestamp to each network saved', default=false} + {arg='criterion', type='nn.Criterion', + help='a criterion to estimate the error'}, + {arg='preprocessor', type='nn.Module', + help='a preprocessor to prime the data before the module'}, + {arg='optimizer', type='nn.Optimization', + help='an optimization method'}, + {arg='batchSize', type='number', + help='[mini] batch size', default=1}, + {arg='maxEpoch', type='number', + help='maximum number of epochs', default=50}, + {arg='dispProgress', type='boolean', + help='display a progress bar during training/testing', default=true}, + {arg='save', type='string', + help='path to save networks and log training'}, + {arg='timestamp', type='boolean', + help='if true, appends a timestamp to each network saved', default=false} ) - -- private params - self.trainOffset = 0 - self.testOffset = 0 end function OnlineTrainer:log() @@ -56,18 +60,9 @@ function OnlineTrainer:train(dataset) local criterion = self.criterion self.trainset = dataset - local shuffledIndices = {} - if not self.shuffleIndices then - for t = 1,dataset:size() do - shuffledIndices[t] = t - end - else - shuffledIndices = lab.randperm(dataset:size()) - end - while true do print('<trainer> on training set:') - print("<trainer> online epoch # " .. self.epoch .. '[batchSize = ' .. self.batchSize .. ']') + print("<trainer> online epoch # " .. self.epoch .. ' [batchSize = ' .. self.batchSize .. ']') self.time = sys.clock() self.currentError = 0 @@ -82,7 +77,7 @@ function OnlineTrainer:train(dataset) local targets = {} for i = t,math.min(t+self.batchSize-1,dataset:size()) do -- load new sample - local sample = dataset[self.trainOffset + shuffledIndices[i]] + local sample = dataset[i] local input = sample[1] local target = sample[2] @@ -121,10 +116,6 @@ function OnlineTrainer:train(dataset) self.epoch = self.epoch + 1 - if dataset.infiniteSet then - self.trainOffset = self.trainOffset + dataset:size() - end - if self.maxEpoch > 0 and self.epoch > self.maxEpoch then print("<trainer> you have reached the maximum number of epochs") break @@ -137,20 +128,10 @@ function OnlineTrainer:test(dataset) print('<trainer> on testing Set:') local module = self.module - local shuffledIndices = {} local criterion = self.criterion self.currentError = 0 self.testset = dataset - local shuffledIndices = {} - if not self.shuffleIndices then - for t = 1,dataset:size() do - shuffledIndices[t] = t - end - else - shuffledIndices = lab.randperm(dataset:size()) - end - self.time = sys.clock() for t = 1,dataset:size() do -- disp progress @@ -159,7 +140,7 @@ function OnlineTrainer:test(dataset) end -- get new sample - local sample = dataset[self.testOffset + shuffledIndices[t]] + local sample = dataset[t] local input = sample[1] local target = sample[2] @@ -190,10 +171,6 @@ function OnlineTrainer:test(dataset) self.hookTestEpoch(self) end - if dataset.infiniteSet then - self.testOffset = self.testOffset + dataset:size() - end - return self.currentError end diff --git a/Optimization.lua b/Optimization.lua index f18c635..daf0a8d 100644 --- a/Optimization.lua +++ b/Optimization.lua @@ -1,56 +1,11 @@ local Optimization = torch.class('nn.Optimization') function Optimization:__init() + self.output = 0 end function Optimization:forward(inputs, targets) - self:flatten(parameters, gradParameters) self.output = 0 - self:unflatten(parameters, gradParameters) + print('<Optimization> WARNING: this is a virtual function, please overload !') return self.output end - -function Optimization:flatten(parameters, gradParameters) - if type(parameters) == 'table' then - -- create flat parameters - self.parameters = self.parameters or torch.Tensor() - self.gradParameters = self.gradParameters or torch.Tensor() - -- assuming that the parameters won't change their size, - -- we compute offsets once - if not self.offsets then - self.nParameters = 0 - self.offsets = {} - for _,param in ipairs(parameters) do - table.insert(self.offsets, self.nParameters+1) - self.nParameters = self.nParameters + param:nElement() - end - self.parameters:resize(self.nParameters) - self.gradParameters:resize(self.nParameters) - end - -- copy all params in flat array - for i = 1,#parameters do - local nElement = parameters[i]:nElement() - self.parameters:narrow(1,self.offsets[i],nElement):copy(parameters[i]) - self.gradParameters:narrow(1,self.offsets[i],nElement):copy(gradParameters[i]) - end - else - self.parameters = parameters - self.gradParameters = gradParameters - end -end - -function Optimization:unflatten(parameters, gradParameters) - if type(parameters) == 'table' then - -- copy all params into unflat arrays - local offset = 1 - for i = 1,#parameters do - local nElement = parameters[i]:nElement() - parameters[i]:copy(self.parameters:narrow(1,offset,nElement)) - gradParameters[i]:copy(self.gradParameters:narrow(1,offset,nElement)) - offset = offset + nElement - end - else - parameters = self.parameters - gradParameters = self.gradParameters - end -end diff --git a/README.md b/README.md new file mode 100644 index 0000000..f30f5b9 --- /dev/null +++ b/README.md @@ -0,0 +1,67 @@ +# nnx: an Xperimental package for neural network modules + optimizations + +The original neural network from Torch7, 'nn', contains stable and widely +used modules. 'nnx' contains more experimental, unproven modules, and +optimizations. Eventually, modules that become stable enough will make +their way into 'nn' (some already have). + +## Install dependencies + +1/ third-party libraries: + +On Linux (Ubuntu > 9.04): + +``` sh +$ apt-get install gcc g++ git libreadline5-dev cmake wget +``` + +On Mac OS (Leopard, or more), using [Homebrew](http://mxcl.github.com/homebrew/): + +``` sh +$ brew install git readline cmake wget +``` + +2/ Lua 5.1 + Luarocks + xLua: + +``` sh +$ git clone https://github.com/clementfarabet/lua4torch +$ cd lua4torch +$ make install PREFIX=/usr/local +``` + +3/ nnx: + +Note: this automatically installs Torch7+nn, and other Lua dependencies. + +``` sh +$ luarocks install nnx +``` + +## Use the library + +First run xlua, and load nnx: + +``` sh +$ xlua +``` + +``` lua +> require 'nnx' +``` + +Once loaded, tab-completion will help you navigate through the +library (note that most function are added directly to nn): + +``` lua +> nnx. + TAB +... +> nn. + TAB +``` + +In particular, it's good to verify that all modules provided pass their +tests: + +``` lua +> nnx.test_all() +> nnx.test_omp() +``` diff --git a/README.txt b/README.txt deleted file mode 100644 index 6f183e9..0000000 --- a/README.txt +++ /dev/null @@ -1,13 +0,0 @@ - -INSTALL: -$ luarocks --from=http://data.neuflow.org/lua/rocks install nnx - -USE: -> require 'nnx' -> n1 = nn.SpatialLinear(16,4) - --- run tests: -> nnx.test_all() -... -> nnx.test_omp() -... diff --git a/SGDOptimization.lua b/SGDOptimization.lua index 8bfe9a5..ddbf220 100644 --- a/SGDOptimization.lua +++ b/SGDOptimization.lua @@ -1,81 +1,48 @@ -local SGD,parent = torch.class('nn.SGDOptimization', 'nn.Optimization') +local SGD,parent = torch.class('nn.SGDOptimization', 'nn.BatchOptimization') function SGD:__init(...) - parent.__init(self) + parent.__init(self,...) xlua.unpack_class(self, {...}, 'SGDOptimization', nil, - {arg='module', type='nn.Module', help='a module to train', req=true}, - {arg='criterion', type='nn.Criterion', help='a criterion to estimate the error', req=true}, - {arg='learningRate', type='number', help='learning rate (W = W - rate*dE/dW)', default=1e-2}, - {arg='weightDecay', type='number', help='amount of weight decay (W = W - decay*W)', default=0}, - {arg='momentum', type='number', help='amount of momentum on weights (dE/W = dE/dW*(1-momentum) + prev(dE/dW)*momentum)', default=0} + {arg='maxIterations', type='number', + help='maximum nb of iterations per pass', default=1}, + {arg='learningRate', type='number', + help='learning rate (W = W - rate*dE/dW)', default=1e-2}, + {arg='learningRateDecay', type='number', + help='learning rate decay (lr_t = lr_0 / (1 + samplesSeen*lrDecay))', default=0}, + {arg='weightDecay', type='number', + help='amount of weight decay (W = W - decay*W)', default=0}, + {arg='momentum', type='number', + help='amount of momentum on weights (dE/W = dE/dW*(1-momentum) + prev(dE/dW)*momentum)', default=0} ) - self.parametersT = nnx.getParameters(self.module) - self.gradParametersT = nnx.getGradParameters(self.module) + self.parameters = nnx.flattenParameters(nnx.getParameters(self.module)) + self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module)) end -function SGD:forward(inputs, targets, options) - options = options or {} - - -- reset gradients - self.module:zeroGradParameters() - - -- f is the average of all criterions - self.output = 0 - - -- given all inputs, evaluate gradients - for i = 1,#inputs do - -- user hook - if self.prehook then - self.prehook(self, {inputs[i], targets[i], options[i]}) +function SGD:optimize() + -- optimize N times + for i = 1,self.maxIterations do + -- evaluate f(X) + df/dX + self.evaluate() + + -- apply momentum + if self.momentum ~= 0 then + if not self.currentGradParameters then + self.currentGradParameters = torch.Tensor():resizeAs(self.gradParameters):copy(self.gradParameters) + else + self.currentGradParameters:mul(self.momentum):add(1-self.momentum, self.gradParameters) + end + else + self.currentGradParameters = self.gradParameters end - -- estimate f - local output = self.module:forward(inputs[i]) - local err = self.criterion:forward(output, targets[i]) - self.output = self.output + err - - -- estimate df/dW - local df_do = self.criterion:backward(output, targets[i]) - self.module:backward(inputs[i], df_do) - - -- user hook - if self.posthook then - self.posthook(self, {inputs[i], targets[i], options[i]}) + -- weight decay + if self.weightDecay ~= 0 then + self.parameters:add(-self.weightDecay, self.parameters) end - end - -- renorm f - self.output = self.output / #inputs - - -- update state from computed parameters - self:flatten(self.parametersT, self.gradParametersT) - - -- normalize gradients - self.gradParameters:div(#inputs) - - -- apply momentum - if self.momentum ~= 0 then - if not self.currentGradParameters then - self.currentGradParameters = torch.Tensor():resizeAs(self.gradParameters):copy(self.gradParameters) - else - self.currentGradParameters:mul(self.momentum):add(1-self.momentum, self.gradParameters) - end - else - self.currentGradParameters = self.gradParameters - end - - -- weight decay - if self.weightDecay ~= 0 then - self.parameters:add(-self.weightDecay, self.parameters) + -- update parameters + local learningRate = self.learningRate / (1 + self.sampleCounter*self.learningRateDecay) + self.parameters:add(-learningRate, self.currentGradParameters) end - - -- update parameters - self.parameters:add(-self.learningRate, self.currentGradParameters) - - -- write compute parameters back in place - self:unflatten(self.parametersT, self.gradParametersT) - - -- return current output - return self.output end diff --git a/StochasticTrainer.lua b/StochasticTrainer.lua deleted file mode 100644 index 62fb670..0000000 --- a/StochasticTrainer.lua +++ /dev/null @@ -1,265 +0,0 @@ -local StochasticTrainer, parent = torch.class('nn.StochasticTrainer','nn.Trainer') - -function StochasticTrainer:__init(...) - parent.__init(self) - -- unpack args - xlua.unpack_class(self, {...}, - 'StochasticTrainer', - - 'A general-purpose stochastic trainer class.\n' - .. 'Provides 4 user hooks to perform extra work after each sample, or each epoch:\n' - .. '> trainer = nn.StochasticTrainer(...) \n' - .. '> trainer.hookTrainSample = function(trainer, sample) ... end \n' - .. '> trainer.hookTrainEpoch = function(trainer) ... end \n' - .. '> trainer.hookTestSample = function(trainer, sample) ... end \n' - .. '> trainer.hookTestEpoch = function(trainer) ... end \n' - .. '> ', - - {arg='module', type='nn.Module', help='a module to train', req=true}, - {arg='criterion', type='nn.Module', help='a criterion to estimate the error'}, - {arg='preprocessor', type='nn.Module', help='a preprocessor to prime the data before the module'}, - - {arg='learningRate', type='number', help='learning rate (W = W - rate*dE/dW)', default=1e-2}, - {arg='learningRateDecay', type='number', help='learning rate decay (rate = rate * (1-decay), at each epoch)', default=0}, - {arg='weightDecay', type='number', help='amount of weight decay (W = W - decay*W)', default=0}, - {arg='momentum', type='number', help='amount of momentum on weights (dE/W = dE/dW + momentum*prev(dE/dW))', default=0}, - {arg='maxEpoch', type='number', help='maximum number of epochs', default=50}, - - {arg='maxTarget', type='boolean', help='replaces an CxHxW target map by a HxN target of max values (for NLL criterions)', default=false}, - {arg='dispProgress', type='boolean', help='display a progress bar during training/testing', default=true}, - {arg='skipUniformTargets', type='boolean', help='skip uniform (flat) targets during training', default=false}, - - {arg='save', type='string', help='path to save networks and log training'}, - {arg='timestamp', type='boolean', help='if true, appends a timestamp to each network saved', default=false} - ) - -- instantiate SGD optimization module - self.optimizer = nn.SGDOptimization(self.learningRate, self.weightDecay, self.momentum) - -- private params - self.errorArray = self.skipUniformTargets - self.trainOffset = 0 - self.testOffset = 0 -end - -function StochasticTrainer:log() - -- save network - local filename = self.save - os.execute('mkdir -p ' .. sys.dirname(filename)) - if self.timestamp then - -- use a timestamp to store all networks uniquely - filename = filename .. '-' .. os.date("%Y_%m_%d_%X") - else - -- if no timestamp, just store the previous one - if sys.filep(filename) then - os.execute('mv ' .. filename .. ' ' .. filename .. '.old') - end - end - print('<trainer> saving network to '..filename) - local file = torch.DiskFile(filename,'w') - self.module:write(file) - file:close() -end - -function StochasticTrainer:train(dataset) - self.epoch = self.epoch or 1 - local currentLearningRate = self.learningRate - local module = self.module - local criterion = self.criterion - self.trainset = dataset - - local shuffledIndices = {} - if not self.shuffleIndices then - for t = 1,dataset:size() do - shuffledIndices[t] = t - end - else - shuffledIndices = lab.randperm(dataset:size()) - end - - local parameters = nnx.getParameters(module) - local gradParameters = nnx.getGradParameters(module) - - while true do - print('<trainer> on training set:') - print("<trainer> stochastic gradient descent epoch # " .. self.epoch) - - module:zeroGradParameters() - - self.time = sys.clock() - self.currentError = 0 - for t = 1,dataset:size() do - -- disp progress - if self.dispProgress then - xlua.progress(t, dataset:size()) - end - - -- load new sample - local sample = dataset[self.trainOffset + shuffledIndices[t]] - local input = sample[1] - local target = sample[2] - local sample_x = sample.x - local sample_y = sample.y - - -- get max of target ? - if self.maxTarget then - target = torch.Tensor(target:nElement()):copy(target) - _,target = lab.max(target) - target = target[1] - end - - -- is target uniform ? - local isUniform = false - if self.errorArray and target:min() == target:max() then - isUniform = true - end - - -- perform SGD step - if not (self.skipUniformTargets and isUniform) then - -- optional preprocess - if self.preprocessor then input = self.preprocessor:forward(input) end - - -- forward through model and criterion - -- (if no criterion, it is assumed to be contained in the model) - local modelOut, error - if criterion then - modelOut = module:forward(input) - error = criterion:forward(modelOut, target) - else - modelOut, error = module:forward(input, target, sample_x, sample_y) - end - - -- accumulate error - self.currentError = self.currentError + error - - -- reset gradients - module:zeroGradParameters() - - -- backward through model - -- (if no criterion, it is assumed that derror is internally generated) - if criterion then - local derror = criterion:backward(module.output, target) - module:backward(input, derror) - else - module:backward(input) - end - - -- update parameters in the model - self.optimizer:forward(parameters, gradParameters) - end - - -- call user hook, if any - if self.hookTrainSample then - self.hookTrainSample(self, sample) - end - end - - self.currentError = self.currentError / dataset:size() - print("<trainer> current error = " .. self.currentError) - - self.time = sys.clock() - self.time - self.time = self.time / dataset:size() - print("<trainer> time to learn 1 sample = " .. (self.time*1000) .. 'ms') - - if self.hookTrainEpoch then - self.hookTrainEpoch(self) - end - - if self.save then self:log() end - - self.epoch = self.epoch + 1 - currentLearningRate = self.learningRate/(1+self.epoch*self.learningRateDecay) - self.optimizer.learningRate = currentLearningRate - - if dataset.infiniteSet then - self.trainOffset = self.trainOffset + dataset:size() - end - - if self.maxEpoch > 0 and self.epoch > self.maxEpoch then - print("<trainer> you have reached the maximum number of epochs") - break - end - end -end - - -function StochasticTrainer:test(dataset) - print('<trainer> on testing Set:') - - local module = self.module - local shuffledIndices = {} - local criterion = self.criterion - self.currentError = 0 - self.testset = dataset - - local shuffledIndices = {} - if not self.shuffleIndices then - for t = 1,dataset:size() do - shuffledIndices[t] = t - end - else - shuffledIndices = lab.randperm(dataset:size()) - end - - self.time = sys.clock() - for t = 1,dataset:size() do - -- disp progress - if self.dispProgress then - xlua.progress(t, dataset:size()) - end - - -- get new sample - local sample = dataset[self.testOffset + shuffledIndices[t]] - local input = sample[1] - local target = sample[2] - - -- max target ? - if self.maxTarget then - target = torch.Tensor(target:nElement()):copy(target) - _,target = lab.max(target) - target = target[1] - end - - -- test sample through current model - if self.preprocessor then input = self.preprocessor:forward(input) end - if criterion then - self.currentError = self.currentError + - criterion:forward(module:forward(input), target) - else - local _,error = module:forward(input, target) - self.currentError = self.currentError + error - end - - -- user hook - if self.hookTestSample then - self.hookTestSample(self, sample) - end - end - - self.currentError = self.currentError / dataset:size() - print("<trainer> test current error = " .. self.currentError) - - self.time = sys.clock() - self.time - self.time = self.time / dataset:size() - print("<trainer> time to test 1 sample = " .. (self.time*1000) .. 'ms') - - if self.hookTestEpoch then - self.hookTestEpoch(self) - end - - if dataset.infiniteSet then - self.testOffset = self.testOffset + dataset:size() - end - - return self.currentError -end - -function StochasticTrainer:write(file) - parent.write(self,file) - file:writeObject(self.module) - file:writeObject(self.criterion) -end - -function StochasticTrainer:read(file) - parent.read(self,file) - self.module = file:readObject() - self.criterion = file:readObject() -end diff --git a/Trainer.lua b/Trainer.lua index 3388ef7..b7da770 100644 --- a/Trainer.lua +++ b/Trainer.lua @@ -4,7 +4,6 @@ function Trainer:__init() self.learningRate = 0.01 self.learningRateDecay = 0 self.maxIteration = 25 - self.shuffleIndices = true end function Trainer:train(dataset) @@ -14,14 +13,12 @@ function Trainer:write(file) file:writeDouble(self.learningRate) file:writeDouble(self.learningRateDecay) file:writeInt(self.maxIteration) - file:writeBool(self.shuffleIndices) end function Trainer:read(file) self.learningRate = file:readDouble() self.learningRateDecay = file:readDouble() self.maxIteration = file:readInt() - self.shuffleIndices = file:readBool() end function Trainer:share(mlp, ...) @@ -30,10 +27,6 @@ function Trainer:share(mlp, ...) end end -function Trainer:setShuffle(bool) - self.shuffleIndices = bool -end - function Trainer:clone(...) local f = torch.MemoryFile("rw"):binary() f:writeObject(self) @@ -93,19 +93,21 @@ torch.include('nnx', 'SpatialColorTransform.lua') -- criterions: torch.include('nnx', 'SuperCriterion.lua') torch.include('nnx', 'SparseCriterion.lua') +torch.include('nnx', 'DistNLLCriterion.lua') torch.include('nnx', 'SpatialMSECriterion.lua') torch.include('nnx', 'SpatialClassNLLCriterion.lua') torch.include('nnx', 'SpatialSparseCriterion.lua') -- optimizations: torch.include('nnx', 'Optimization.lua') +torch.include('nnx', 'BatchOptimization.lua') torch.include('nnx', 'SGDOptimization.lua') torch.include('nnx', 'LBFGSOptimization.lua') -- trainers: torch.include('nnx', 'Trainer.lua') torch.include('nnx', 'OnlineTrainer.lua') -torch.include('nnx', 'StochasticTrainer.lua') +torch.include('nnx', 'BatchTrainer.lua') -- datasets: torch.include('nnx', 'DataSet.lua') @@ -185,3 +187,29 @@ function nnx.getGradParameters(...) -- return all parameters found return holder end + +function nnx.flattenParameters(parameters) + -- compute offsets of each parameter + local offsets = {} + local dimensions = {} + local elements = {} + local nParameters = 0 + for _,param in ipairs(parameters) do + table.insert(offsets, nParameters+1) + table.insert(dimensions, param:size()) + table.insert(elements, param:nElement()) + nParameters = nParameters + param:nElement() + end + -- create flat vector + local flatParameters = torch.Tensor(nParameters) + local storage = flatParameters:storage() + -- reallocate all parameters in flat vector + for i = 1,#parameters do + local data = parameters[i]:clone() + parameters[i]:set(storage, offsets[i], elements[i]):resize(dimensions[i]):copy(data) + end + -- cleanup + collectgarbage() + -- return new flat vector that contains all discrete parameters + return flatParameters +end @@ -81,6 +81,12 @@ #define max2(a, b) ((a) >= (b) ? (a) : (b)) #define max3(a, b, c) max2(max2((a), (b)), (c)); +// extra globals +static int nEvaluation = 0; +static int maxEval = 0; // maximum number of function evaluations +static int nIteration = 0; +static int verbose = 0; + struct tag_callback_data { int n; void *instance; @@ -415,7 +421,8 @@ int lbfgs( fx += xnorm * param.orthantwise_c; owlqn_pseudo_gradient( pg, x, g, n, - param.orthantwise_c, param.orthantwise_start, param.orthantwise_end + param.orthantwise_c, + param.orthantwise_start, param.orthantwise_end ); } @@ -468,7 +475,8 @@ int lbfgs( ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, ¶m); owlqn_pseudo_gradient( pg, x, g, n, - param.orthantwise_c, param.orthantwise_start, param.orthantwise_end + param.orthantwise_c, + param.orthantwise_start, param.orthantwise_end ); } if (ls < 0) { @@ -476,6 +484,9 @@ int lbfgs( veccpy(x, xp, n); veccpy(g, gp, n); ret = ls; + if (verbose > 1){ + printf("Stopping b/c ls (%d) < 0\n", ls); + } goto lbfgs_exit; } @@ -490,10 +501,20 @@ int lbfgs( /* Report the progress. */ if (cd.proc_progress) { if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) { - goto lbfgs_exit; + if (verbose > 1){ + printf("Stopping b/c cd.proc_progress (%d)\n", ret); + } + goto lbfgs_exit; } } + /* Count number of function evaluations */ + if ((maxEval != 0)&&(nEvaluation > maxEval)) { + if (verbose > 1){ + printf("Stopping b/c exceeded max number of function evaluations\n"); + } + goto lbfgs_exit; + } /* Convergence test. The criterion is given by the following formula: @@ -501,6 +522,10 @@ int lbfgs( */ if (xnorm < 1.0) xnorm = 1.0; if (gnorm / xnorm <= param.epsilon) { + if (verbose > 1){ + printf("Stopping b/c gnorm(%f)/xnorm(%f) <= param.epsilon (%f)\n", + gnorm, xnorm, param.epsilon); + } /* Convergence. */ ret = LBFGS_SUCCESS; break; @@ -519,6 +544,10 @@ int lbfgs( /* The stopping criterion. */ if (rate < param.delta) { + if (verbose > 1){ + printf("Stopping b/c rate (%f) < param.delta (%f)\n", + rate, param.delta); + } ret = LBFGS_STOP; break; } @@ -529,6 +558,10 @@ int lbfgs( } if (param.max_iterations != 0 && param.max_iterations < k+1) { + if (verbose > 1){ + printf("Stopping b/c param.max_iterations (%d) < k+1 (%d)\n", + param.max_iterations, k+1); + } /* Maximum number of iterations. */ ret = LBFGSERR_MAXIMUMITERATION; break; @@ -1375,9 +1408,6 @@ static THDoubleTensor *gradParameters = NULL; static int nParameter = 0; static lua_State *GL = NULL; static lbfgs_parameter_t lbfgs_param; -static int nEvaluation = 0; -static int nIteration = 0; -static int verbose = 0; static lbfgsfloatval_t evaluate(void *instance, const lbfgsfloatval_t *x, @@ -1417,12 +1447,11 @@ static int progress(void *instance, int ls) { nIteration = k; - if (verbose == 2) { - printf("\n<LBFGSOptimization> iteration %d:\n", nIteration); - printf(" + fx = %f\n", fx); + if (verbose > 1) { + printf("<LBFGSOptimization> iteration %d:\n", nIteration); + printf(" + f(X) = %f\n", fx); printf(" + xnorm = %f, gnorm = %f, step = %f\n", xnorm, gnorm, step); printf(" + nb evaluations = %d\n", nEvaluation); - printf("\n"); } return 0; } @@ -1433,27 +1462,22 @@ int lbfgs_run(lua_State *L) { parameters = luaT_checkudata(L, 1, torch_DoubleTensor_id); gradParameters = luaT_checkudata(L, 2, torch_DoubleTensor_id); nParameter = THDoubleTensor_nElement(parameters); - // parameters for algorithm nEvaluation = 0; lbfgsfloatval_t fx; lbfgsfloatval_t *x = lbfgs_malloc(nParameter); - // get verbose level - lua_getfield(GL, LUA_GLOBALSINDEX, "lbfgs"); // push lbfgs on top of stack - lua_getfield(GL, -1, "verbose"); // push lbfgs.verbose on top of stack - verbose = lua_tonumber(GL, -1); // verbose = lbfgs.verbose - lua_pop(GL, 2); // pop last two entries - - // initialize vector x <- parameters memcpy(x, THDoubleTensor_data(parameters), sizeof(double)*nParameter); // initialize the parameters for the L-BFGS optimization lbfgs_parameter_init(&lbfgs_param); - lbfgs_param.max_iterations = lua_tonumber(L, 3); - lbfgs_param.max_linesearch = lua_tonumber(L, 4); + maxEval = lua_tonumber(L,3); + lbfgs_param.max_iterations = lua_tonumber(L, 4); + lbfgs_param.max_linesearch = lua_tonumber(L, 5); lbfgs_param.linesearch = LBFGS_LINESEARCH_BACKTRACKING; - lbfgs_param.orthantwise_c = lua_tonumber(L, 5); + lbfgs_param.orthantwise_c = lua_tonumber(L, 6); + // get verbose level + verbose = lua_tonumber(L,7); // Start the L-BFGS optimization; this will invoke the callback functions // evaluate() and progress() when necessary. @@ -1461,10 +1485,10 @@ int lbfgs_run(lua_State *L) { // verbose if (verbose) { - printf("\n<LBFGSOptimization> batch optimized after %d iterations\n", nIteration); - printf(" + fx = %f\n", fx); + printf("<LBFGSOptimization> batch optimized after %d iterations\n", nIteration); + printf(" + f(X) = %f\n", fx); + printf(" + X = [%f , ... %f]\n",x[0],x[nParameter-1]); printf(" + nb evaluations = %d\n", nEvaluation); - printf("\n"); } // cleanup diff --git a/nnx-1.0-1.rockspec b/nnx-1.0-1.rockspec index 4529d24..dcb8d1b 100644 --- a/nnx-1.0-1.rockspec +++ b/nnx-1.0-1.rockspec @@ -62,6 +62,7 @@ build = { install_files(/lua/nnx init.lua) install_files(/lua/nnx Abs.lua) install_files(/lua/nnx ConfusionMatrix.lua) + install_files(/lua/nnx DistNLLCriterion.lua) install_files(/lua/nnx Logger.lua) install_files(/lua/nnx Probe.lua) install_files(/lua/nnx HardShrink.lua) @@ -83,7 +84,6 @@ build = { install_files(/lua/nnx SpatialCriterion.lua) install_files(/lua/nnx Trainer.lua) install_files(/lua/nnx OnlineTrainer.lua) - install_files(/lua/nnx StochasticTrainer.lua) install_files(/lua/nnx DataSet.lua) install_files(/lua/nnx DataList.lua) install_files(/lua/nnx DataSetLabelMe.lua) @@ -103,6 +103,8 @@ build = { install_files(/lua/nnx Optimization.lua) install_files(/lua/nnx LBFGSOptimization.lua) install_files(/lua/nnx SGDOptimization.lua) + install_files(/lua/nnx BatchOptimization.lua) + install_files(/lua/nnx BatchTrainer.lua) add_subdirectory (test) install_targets(/lib nnx) ]], diff --git a/test/test-all.lua b/test/test-all.lua index 148e860..f7e591a 100644 --- a/test/test-all.lua +++ b/test/test-all.lua @@ -301,93 +301,6 @@ function nnxtest.SpatialConvolution() mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') end -function nnxtest.SpatialConvolutionSparse_1() - local from = math.random(1,10) - local to = math.random(1,10) - local ini = math.random(10,20) - local inj = math.random(10,20) - local ki = math.random(1,10) - local kj = math.random(1,10) - local si = math.random(1,1) - local sj = math.random(1,1) - - local ct = nn.tables.full(from,to) - local module = nn.SpatialConvolutionSparse(ct, ki, kj, si, sj) - local input = torch.Tensor(from, inj, ini):zero() - module:reset() - - local err = nn.Jacobian.testJacobian(module, input) - mytester:assertlt(err, precision, 'error on state ') - - local err = nn.Jacobian.testJacobianParameters(module, input, module.weight, module.gradWeight) - mytester:assertlt(err, precision, 'error on weight ') - - local err = nn.Jacobian.testJacobianParameters(module, input, module.bias, module.gradBias) - mytester:assertlt(err, precision, 'error on bias ') - - local ferr, berr = nn.Jacobian.testIO(module, input) - mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') - mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') -end - -function nnxtest.SpatialConvolutionSparse_2() - local from = math.random(1,10) - local to = math.random(1,10) - local ini = math.random(10,20) - local inj = math.random(10,20) - local ki = math.random(1,10) - local kj = math.random(1,10) - local si = math.random(1,1) - local sj = math.random(1,1) - - local ct = nn.tables.oneToOne(from) - local module = nn.SpatialConvolutionSparse(ct, ki, kj, si, sj) - local input = torch.Tensor(from, inj, ini):zero() - module:reset() - - local err = nn.Jacobian.testJacobian(module, input) - mytester:assertlt(err, precision, 'error on state ') - - local err = nn.Jacobian.testJacobianParameters(module, input, module.weight, module.gradWeight) - mytester:assertlt(err, precision, 'error on weight ') - - local err = nn.Jacobian.testJacobianParameters(module, input, module.bias, module.gradBias) - mytester:assertlt(err, precision, 'error on bias ') - - local ferr, berr = nn.Jacobian.testIO(module, input) - mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') - mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') -end - -function nnxtest.SpatialConvolutionSparse_3() - local from = math.random(2,6) - local to = math.random(4,8) - local ini = math.random(10,20) - local inj = math.random(10,20) - local ki = math.random(1,10) - local kj = math.random(1,10) - local si = math.random(1,1) - local sj = math.random(1,1) - - local ct = nn.tables.random(from,to,from-1) - local module = nn.SpatialConvolutionSparse(ct, ki, kj, si, sj) - local input = torch.Tensor(from, inj, ini):zero() - module:reset() - - local err = nn.Jacobian.testJacobian(module, input) - mytester:assertlt(err, precision, 'error on state ') - - local err = nn.Jacobian.testJacobianParameters(module, input, module.weight, module.gradWeight) - mytester:assertlt(err, precision, 'error on weight ') - - local err = nn.Jacobian.testJacobianParameters(module, input, module.bias, module.gradBias) - mytester:assertlt(err, precision, 'error on bias ') - - local ferr, berr = nn.Jacobian.testIO(module, input) - mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') - mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') -end - function nnxtest.SpatialNormalization_Gaussian2D() local inputSize = math.random(11,20) local kersize = 9 |