Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/clementfarabet/lua---nnx.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarco Scoffier <github@metm.org>2011-09-05 23:39:02 +0400
committerMarco Scoffier <github@metm.org>2011-09-05 23:39:02 +0400
commit44f44def4ec8f31516c0b4085c342a23a4e7a5f5 (patch)
tree68d0ee3b323b66c4d895df7f74b592eb4e997bf2
parent58a8750d6abaef6c5eb0961f81b1460349ab53fe (diff)
parent3804abd138d4898ca5576e17cac504ff6fa8d459 (diff)
Merge branch 'master' into cleanDataSetcleanDataSet
-rw-r--r--BatchOptimization.lua320
-rw-r--r--BatchTrainer.lua170
-rw-r--r--ConfusionMatrix.lua22
-rw-r--r--DataList.lua19
-rw-r--r--DataSetLabelMe.lua12
-rw-r--r--DistNLLCriterion.lua81
-rw-r--r--FindTorch.cmake6
-rw-r--r--LBFGSOptimization.lua87
-rw-r--r--OnlineTrainer.lua61
-rw-r--r--Optimization.lua49
-rw-r--r--README.md67
-rw-r--r--README.txt13
-rw-r--r--SGDOptimization.lua103
-rw-r--r--StochasticTrainer.lua265
-rw-r--r--Trainer.lua7
-rw-r--r--init.lua30
-rw-r--r--lbfgs.c72
-rw-r--r--nnx-1.0-1.rockspec4
-rw-r--r--test/test-all.lua87
19 files changed, 838 insertions, 637 deletions
diff --git a/BatchOptimization.lua b/BatchOptimization.lua
new file mode 100644
index 0000000..f5feb3a
--- /dev/null
+++ b/BatchOptimization.lua
@@ -0,0 +1,320 @@
+local Batch,parent = torch.class('nn.BatchOptimization', 'nn.Optimization')
+
+-- this is a generic class for any batch optimization modeled after
+-- the LBFGS optimization. It simply provides a batch.evaluate() method
+-- which creates a self.parameters and self.gradParameters from your
+-- self.model
+
+function Batch:__init(...)
+ parent.__init(self)
+ xlua.unpack_class(self, {...},
+ 'BatchOptimization', nil,
+ {arg='module', type='nn.Module', help='a module to train', req=true},
+ {arg='criterion', type='nn.Criterion',
+ help='a criterion to estimate the error', req=true},
+ {arg='parallelize', type='number',
+ help='parallelize onto N cores (experimental!)', default=1},
+ {arg='verbose', type='number',
+ help='verbose level during training [0-2]', default=0}
+ )
+ self.parameters = nnx.flattenParameters(nnx.getParameters(self.module))
+ self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
+ self.evalCounter = 0
+ self.sampleCounter = 0
+ if self.parallelize > 1 then
+ self:setup_mapreduce()
+ end
+ self.P = self.parallelize
+end
+
+function Batch:forward(inputs, targets, options)
+ options = options or {}
+ if self.P > 1 then
+ return self:forward_mapreduce(inputs, targets, options)
+ else
+ return self:forward_sequential(inputs, targets, options)
+ end
+end
+
+function Batch:forward_sequential(inputs, targets, options)
+ -- (1) construct a closure that compute f(inputs) + df/dW
+ -- after each call to that function:
+ -- + self.parameters contains the current X vector
+ -- + self.gradParameters contains the estimated dF/dX vector
+ -- + self.output contains the estimated (average) F(X)
+ self.evaluate
+ = function()
+ -- verbose
+ if self.verbose >= 2 then
+ print('<BatchOptimization> evaluating f(X) + df/dX')
+ end
+ local _t_ = sys.clock()
+ -- reset gradients
+ self.gradParameters:zero()
+ -- f is the average of all criterions
+ self.output = 0
+ -- given all inputs, evaluate gradients
+ for i = 1,#inputs do
+ -- user hook
+ if self.prehook then
+ self.prehook(self, {inputs[i], targets[i], options[i]})
+ end
+ -- estimate f
+ local output = self.module:forward(inputs[i])
+ local err = self.criterion:forward(output, targets[i])
+ self.output = self.output + err
+ -- estimate df/dW
+ local df_do = self.criterion:backward(output, targets[i])
+ self.module:backward(inputs[i], df_do)
+ -- user hook
+ if self.posthook then
+ self.posthook(self, {inputs[i], targets[i], options[i]})
+ end
+ end
+ -- update evaluation counter
+ self.evalCounter = self.evalCounter + 1
+ -- normalize gradients
+ self.gradParameters:div(#inputs)
+ -- verbose
+ if self.verbose >= 2 then
+ print('<BatchOptimization> ' .. self.evalCounter .. 'th evaluation took ' .. (sys.clock() - _t_) .. ' sec')
+ end
+ -- return average f(X)
+ self.output = self.output/#inputs
+ return self.output
+ end
+
+ -- (2) optimization callback
+ if self.optimize then
+ self:optimize()
+ end
+
+ -- (3) update sample counter
+ self.sampleCounter = self.sampleCounter + #inputs
+
+ -- (4) return current output after optimization
+ return self.output
+end
+
+function Batch:forward_mapreduce(inputs, targets, options)
+ -- parameters
+ local P = self.P
+
+ -- transmit user hooks, if defined
+ if not self.hooksets then
+ if self.prehook then
+ if type(self.prehook) == 'string' then
+ parallel.children:send(self.prehook)
+ else
+ print('\r<BatchOptimization> WARNING: when using para||el mode,'..
+ ' hooks should be defined as strings. User prehook ignored.')
+ parallel.children:send('')
+ end
+ else
+ parallel.children:send('')
+ end
+ if self.posthook then
+ if type(self.posthook) == 'string' then
+ parallel.children:send(self.posthook)
+ else
+ print('\r<BatchOptimization> WARNING: when using para||el mode,'..
+ ' hooks should be defined as strings. User posthook ignored.')
+ parallel.children:send('')
+ end
+ else
+ parallel.children:send('')
+ end
+ self.hooksets = true
+ end
+
+ -- (0a) replicate output and gradParameters
+ local outputsPartial = {}
+ local gradParametersPartial = {}
+
+ -- (0b) divide input/target batch into N batches
+ local inputss = {}
+ local targetss = {}
+ local optionss = {}
+ for t = 1,P do
+ inputss[t] = {}
+ targetss[t] = {}
+ optionss[t] = {}
+ for i = t,#inputs,P do
+ table.insert(inputss[t], inputs[i])
+ table.insert(targetss[t], targets[i])
+ if options then table.insert(optionss[t], options[i]) end
+ end
+ end
+
+ -- (0c) send mini-batch to all workers
+ for t = 1,P do
+ parallel.children[t]:join()
+ parallel.children[t]:send(inputss[t])
+ parallel.children[t]:send(targetss[t])
+ parallel.children[t]:send(optionss[t])
+ end
+
+ -- (1) construct a closure that compute f(inputs) + df/dW
+ -- after each call to that function:
+ -- + self.parameters contains the current X vector
+ -- + self.gradParameters contains the estimated dF/dX vector
+ -- + self.output contains the estimated (average) F(X)
+ self.evaluate
+ = function()
+ -- verbose
+ if self.verbose >= 2 then
+ print('<BatchOptimization> evaluating f(X) + df/dX')
+ end
+ local _t_ = sys.clock()
+ -- do map/reduce
+ self.evaluate_map()
+ self.evaluate_reduce()
+ -- update evaluation counter
+ self.evalCounter = self.evalCounter + 1
+ -- verbose
+ if self.verbose >= 2 then
+ print('<BatchOptimization> ' .. self.evalCounter .. 'th evaluation took ' .. (sys.clock() - _t_) .. ' sec')
+ end
+ return self.output
+ end
+
+ -- (1a) the map part of the evaluation: compute partial gradients
+ -- in separate threads
+ self.evaluate_map
+ = function()
+ -- transmit new parameters to all workers
+ parallel.children:join()
+ parallel.children:send(self.parameters)
+ -- then wait for all workers to return their partial gradParameters + outputs
+ gradParametersPartial = parallel.children:receive()
+ outputsPartial = parallel.children:receive()
+ -- force cleanup
+ collectgarbage()
+ end
+
+ -- (1b) the reduce part of the evaluation: accumulate all
+ -- partial estimates of the gradients
+ self.evaluate_reduce
+ = function()
+ -- accumulate partial gradients, and average
+ self.gradParameters:zero()
+ for t = 1,P do
+ self.gradParameters:add(gradParametersPartial[t])
+ end
+ self.gradParameters:div(#inputs)
+ -- return average f(X)
+ self.output = 0
+ for t = 1,P do
+ self.output = self.output + outputsPartial[t]
+ end
+ self.output = self.output/#inputs
+ end
+
+ if self.optimize then
+ -- (2) optimization callback
+ self:optimize()
+
+ -- (3) reset workers so they're ready for next mini-batch
+ -- only do this when we have an optimization hook
+ parallel.children:join('break')
+ end
+
+ -- (4) update sample counter
+ self.sampleCounter = self.sampleCounter + #inputs
+
+ -- (5) return current output after optimization
+ return self.output
+end
+
+function Batch:setup_mapreduce ()
+ -- (0) startup parallel package
+ if not xrequire 'parallel' then
+ xerror('install parallel for Lua to enable parallel computing (luarocks install parallel)',
+ 'nn.BatchOptimization')
+ end
+
+ -- (1) define code for workers
+ local worker_code = [[
+ -- require packages
+ require 'nnx'
+
+ -- retrieve module + criterion at startup
+ parallel.yield()
+ module = parallel.parent:receive()
+ criterion = parallel.parent:receive()
+
+ -- create fake optimizer, for hooks
+ optimizer = {module=module, criterion=criterion}
+
+ -- retrieve optional prehook/posthook
+ prehook = parallel.parent:receive()
+ posthook = parallel.parent:receive()
+ if prehook ~= '' then loadstring(prehook)() else prehook = nil end
+ if posthook ~= '' then loadstring(posthook)() else posthook = nil end
+
+ -- get pointer to parameter and gradParameter vectors
+ parameters = nnx.flattenParameters(nnx.getParameters(module))
+ gradParameters = nnx.flattenParameters(nnx.getGradParameters(module))
+
+ -- outter loop: mini-batches
+ while true do
+ -- sync
+ if parallel.yield() == 'break' then break end
+
+ -- receive new mini-batch
+ inputs = parallel.parent:receive()
+ targets = parallel.parent:receive()
+ options = parallel.parent:receive()
+
+ -- inner loop: evaluations
+ while true do
+ -- sync
+ if parallel.yield() == 'break' then break end
+
+ -- receive new set of parameters
+ parameters:copy(parallel.parent:receive())
+
+ -- reset gradients
+ gradParameters:zero()
+ -- f is the average of all criterions
+ local f_x = 0
+ -- evaluate gradients on inputs for this thread
+ for i = 1,#inputs do
+ -- user hook
+ if prehook then
+ prehook(optimizer, {inputs[i], targets[i], options[i]})
+ end
+ -- estimate f
+ local output = module:forward(inputs[i])
+ local err = criterion:forward(output, targets[i])
+ f_x = f_x + err
+ -- estimate df/dW
+ local df_do = criterion:backward(output, targets[i])
+ module:backward(inputs[i], df_do)
+ -- user hook
+ if posthook then
+ posthook(optimizer, {inputs[i], targets[i], options[i]})
+ end
+ end
+ -- now send back gradParameters + partial output
+ parallel.parent:send(gradParameters)
+ parallel.parent:send(f_x)
+ -- force cleanup
+ collectgarbage()
+ end
+ end
+ ]]
+
+ local setup = function()
+ -- (2) startup all workers
+ parallel.sfork(self.parallelize)
+ parallel.children:exec(worker_code)
+
+ -- (3) and send them the module + criterion architecture
+ parallel.children:join()
+ parallel.children:send(self.module)
+ parallel.children:send(self.criterion)
+ end
+ local ok,err = pcall(setup)
+ if not ok then parallel.close() error(err) end
+end
diff --git a/BatchTrainer.lua b/BatchTrainer.lua
new file mode 100644
index 0000000..a5b135d
--- /dev/null
+++ b/BatchTrainer.lua
@@ -0,0 +1,170 @@
+local BatchTrainer, parent = torch.class('nn.BatchTrainer', 'nn.OnlineTrainer')
+
+-- Essentially simialar to the OnlineTrainer but only used the parts
+-- of the code which prepare the data and the tester. train() has been
+-- replaced by nextBatch() which moves the trainer one batch further
+-- in the data. When the first epoch is finished then the batches are
+-- reused. Each call to optimizer.forward() in nextBatch() creates a
+-- closure with the current batch as input.
+
+function BatchTrainer:__init(...)
+ local args = {...}
+ parent.__init(self, args)
+ -- unpack args
+ xlua.unpack_class(
+ self, args,
+ 'BatchTrainer',
+ 'A modified version of the general-purpose online trainer class.\n'
+ .. ' which only preps the input batch and calls optimizer to\n'
+ .. ' create a closure\n',
+ {arg='trainset', type='nn.DataList',
+ help='dataset from which to draw batches', req=true},
+ {arg='module', type='nn.Module', help='a module to train', req=true},
+ {arg='criterion', type='nn.Criterion',
+ help='a criterion to estimate the error'},
+ {arg='preprocessor', type='nn.Module',
+ help='a preprocessor to prime the data before the module'},
+ {arg='optimizer', type='nn.Optimization',
+ help='an optimization method'},
+ {arg='batchSize', type='number',
+ help='[mini] batch size', default=1},
+ {arg='maxEpoch', type='number',
+ help='maximum number of epochs', default=50},
+ {arg='dispProgress', type='boolean',
+ help='display a progress bar during training/testing', default=true},
+ {arg='save', type='string',
+ help='path to save networks and log training'},
+ {arg='timestamp', type='boolean',
+ help='if true, appends a timestamp to each network saved', default=false}
+ )
+ self.epoch = 1
+ self.batch = nil
+ self.trainOffset = nil
+end
+
+-- update the counters
+function BatchTrainer:next()
+ if not self.batch or not self.trainOffset then
+ -- initialize
+ self.batch = 1
+ self.trainOffset = 1
+ else
+ -- hook to run something on the current batch
+ -- (for eg. if you want to run a test on this batch before
+ -- switching to the next)
+ if self.hookTrainBatch then
+ self.hookTrainBatch(self)
+ end
+
+ -- simple batch increment
+ self.batch = self.batch + 1
+ self.trainOffset = self.trainOffset + self.batchSize
+
+ -- test for new epoch
+ if self.trainOffset > self.trainset:size() then
+
+ -- hook to run on current epoch before switching to next
+ if self.hookTrainEpoch then
+ self.hookTrainEpoch(self)
+ end
+
+ if self.save then self:log() end
+
+ self.trainOffset = 1
+ self.epoch = self.epoch + 1
+ self.batch = 1
+ end
+
+ -- on all but the first batch we need to reset the children
+ if optimizer.parallelize > 1 then
+ parallel.children:send('break')
+ end
+
+ end
+ -- disp progress
+ if self.dispProgress then
+ xlua.progress(self.trainOffset, self.trainset:size())
+ end
+
+end
+
+-- this function is called train() in the online trainer. I seems to
+-- make more sense to call it next_batch() here as the training is
+-- done outside of this code.
+
+function BatchTrainer:nextBatch()
+ self:next()
+ local module = self.module
+ local criterion = self.criterion
+ local t = self.trainOffset
+ local ds = self.trainset:size()
+ local bs = self.batchSize
+
+ print('<trainer> on training set:')
+ print("<trainer> online epoch # " .. self.epoch
+ .. ' batch # '..self.batch
+ .. ' [batchSize = ' .. self.batchSize .. ']')
+
+ -- create mini batch
+ self.inputs = self.inputs or {}
+ self.targets = self.targets or {}
+ local inputs = {}
+ local targets = {}
+ if not self.inputs[self.batch] then
+
+ self.inputs[self.batch] = {}
+ inputs = self.inputs[self.batch]
+ self.targets[self.batch] = {}
+ targets = self.targets[self.batch]
+
+ for i = t,math.min(t+bs-1,ds) do
+ -- load new sample
+ local sample = self.trainset[i]
+ local input = sample[1]
+ local target = sample[2]
+
+ -- optional preprocess (no learning is done for that guy)
+ if self.preprocessor then input = self.preprocessor:forward(input) end
+
+ -- store input/target
+ table.insert(inputs, input)
+ table.insert(targets, target)
+ end
+ else
+ -- get batch from cache
+ inputs = self.inputs[self.batch]
+ targets = self.targets[self.batch]
+ end
+
+ -- set up closure batch.evaluate() for optimizer
+ local error = self.optimizer:forward(inputs, targets)
+
+end
+
+-- special test to just get results of current batch
+function BatchTrainer:testBatch()
+ local criterion = self.criterion
+ local module = self.module
+
+ local inputs = self.inputs[self.batch]
+ local targets = self.targets[self.batch]
+
+ self.currentError = 0
+
+ for i = 1,#inputs do
+ local input = inputs[i]
+ local target = targets[i]
+ if criterion then
+ self.currentError = self.currentError +
+ criterion:forward(module:forward(input), target)
+ else
+ local _,error = module:forward(input, target)
+ self.currentError = self.currentError + error
+ end
+ -- user hook
+ if self.hookTestSample then
+ self.hookTestSample(self, {input, target})
+ end
+ end
+end
+
diff --git a/ConfusionMatrix.lua b/ConfusionMatrix.lua
index 96c9aa4..ed3f000 100644
--- a/ConfusionMatrix.lua
+++ b/ConfusionMatrix.lua
@@ -11,7 +11,7 @@ function ConfusionMatrix:__init(nclasses, classes)
self.nclasses = nclasses
self.totalValid = 0
self.averageValid = 0
- self.classes = classes
+ self.classes = classes or {}
end
function ConfusionMatrix:add(prediction, target)
@@ -74,7 +74,7 @@ function ConfusionMatrix:__tostring__()
for p = 1,nclasses do
str = str .. '' .. string.format('%8d', self.mat[t][p])
end
- if self.classes then
+ if self.classes and self.classes[1] then
if t == nclasses then
str = str .. ']] ' .. pclass .. '% \t[class: ' .. (self.classes[t] or '') .. ']\n'
else
@@ -92,3 +92,21 @@ function ConfusionMatrix:__tostring__()
str = str .. ' + global correct: ' .. (self.totalValid*100) .. '%'
return str
end
+
+function ConfusionMatrix:write(file)
+ file:writeObject(self.mat)
+ file:writeObject(self.valids)
+ file:writeInt(self.nclasses)
+ file:writeInt(self.totalValid)
+ file:writeInt(self.averageValid)
+ file:writeObject(self.classes)
+end
+
+function ConfusionMatrix:read(file)
+ self.mat = file:readObject()
+ self.valids = file:readObject()
+ self.nclasses = file:readInt()
+ self.totalValid = file:readInt()
+ self.averageValid = file:readInt()
+ self.classes = file:readObject()
+end
diff --git a/DataList.lua b/DataList.lua
index 99b117a..4922e8b 100644
--- a/DataList.lua
+++ b/DataList.lua
@@ -13,6 +13,8 @@ function DataList:__init()
self.nbClass = 0
self.ClassName = {}
self.nbSamples = 0
+ self.targetIsProbability = false
+ self.spatialTarget = false
end
function DataList:__tostring__()
@@ -30,8 +32,21 @@ function DataList:__index__(key)
elmt = ((elmt-1) % classSize) + 1
-- create target vector on the fly
- self.datasets[class][elmt][2] = torch.Tensor(1,1,self.nbClass):fill(-1)
- self.datasets[class][elmt][2][1][1][class] = 1
+ if self.spatialTarget then
+ if self.targetIsProbability then
+ self.datasets[class][elmt][2] = torch.Tensor(self.nbClass,1,1):zero()
+ else
+ self.datasets[class][elmt][2] = torch.Tensor(self.nbClass,1,1):fill(-1)
+ end
+ self.datasets[class][elmt][2][class][1][1] = 1
+ else
+ if self.targetIsProbability then
+ self.datasets[class][elmt][2] = torch.Tensor(self.nbClass):zero()
+ else
+ self.datasets[class][elmt][2] = torch.Tensor(self.nbClass):fill(-1)
+ end
+ self.datasets[class][elmt][2][class] = 1
+ end
-- apply hook on sample
local sample = self.datasets[class][elmt]
diff --git a/DataSetLabelMe.lua b/DataSetLabelMe.lua
index 6a9e4cf..629561a 100644
--- a/DataSetLabelMe.lua
+++ b/DataSetLabelMe.lua
@@ -275,15 +275,9 @@ end
function DataSetLabelMe:loadSample(index)
if self.preloadedDone then
if index ~= self.currentIndex then
- -- clean up
- self.currentSample = nil
- self.currentMask = nil
- collectgarbage()
-- load new sample
- self.currentSample = torch.Tensor(self.preloaded.samples[index]:size())
- self.currentSample:copy(self.preloaded.samples[index]):mul(1/255)
- self.currentMask = torch.Tensor(self.preloaded.masks[index]:size())
- self.currentMask:copy(self.preloaded.masks[index])
+ self.currentSample = self.preloaded.samples[index]
+ self.currentMask = self.preloaded.masks[index]
-- remember index
self.currentIndex = index
end
@@ -385,7 +379,7 @@ function DataSetLabelMe:preload(saveFile)
xlua.progress(i,self.nbRawSamples)
-- load samples, and store them in raw byte tensors (min memory footprint)
self:loadSample(i)
- local rawTensor = torch.Tensor(self.currentSample:size()):copy(self.currentSample:mul(255))
+ local rawTensor = torch.Tensor(self.currentSample:size()):copy(self.currentSample)
local rawMask = torch.Tensor(self.currentMask:size()):copy(self.currentMask)
-- insert them in our list
table.insert(self.preloaded.samples, rawTensor)
diff --git a/DistNLLCriterion.lua b/DistNLLCriterion.lua
new file mode 100644
index 0000000..fedda1b
--- /dev/null
+++ b/DistNLLCriterion.lua
@@ -0,0 +1,81 @@
+local DistNLLCriterion, parent = torch.class('nn.DistNLLCriterion', 'nn.Criterion')
+
+function DistNLLCriterion:__init()
+ parent.__init(self)
+ -- user options
+ self.inputIsProbability = false
+ self.inputIsLogProbability = false
+ self.targetIsProbability = false
+ -- internal
+ self.targetSoftMax = nn.SoftMax()
+ self.inputLogSoftMax = nn.LogSoftMax()
+ self.gradLogInput = torch.Tensor()
+end
+
+function DistNLLCriterion:normalize(input, target)
+ -- normalize target
+ if not self.targetIsProbability then
+ self.probTarget = self.targetSoftMax:forward(target)
+ else
+ self.probTarget = target
+ end
+
+ -- normalize input
+ if not self.inputIsLogProbability and not self.inputIsProbability then
+ self.logProbInput = self.inputLogSoftMax:forward(input)
+ elseif not self.inputIsLogProbability then
+ print('TODO: implement nn.Log()')
+ else
+ self.logProbInput = input
+ end
+end
+
+function DistNLLCriterion:denormalize(input)
+ -- denormalize gradients
+ if not self.inputIsLogProbability and not self.inputIsProbability then
+ self.gradInput = self.inputLogSoftMax:backward(input, self.gradLogInput)
+ elseif not self.inputIsLogProbability then
+ print('TODO: implement nn.Log()')
+ else
+ self.gradInput = self.gradLogInput
+ end
+end
+
+function DistNLLCriterion:forward(input, target)
+ self:normalize(input, target)
+ self.output = 0
+ for i = 1,input:size(1) do
+ self.output = self.output - self.logProbInput[i] * self.probTarget[i]
+ end
+ return self.output
+end
+
+function DistNLLCriterion:backward(input, target)
+ self:normalize(input, target)
+ self.gradLogInput:resizeAs(input)
+ for i = 1,input:size(1) do
+ self.gradLogInput[i] = -self.probTarget[i]
+ end
+ self:denormalize(input)
+ return self.gradInput
+end
+
+function DistNLLCriterion:write(file)
+ parent.write(self, file)
+ file:writeBool(self.inputIsProbability)
+ file:writeBool(self.inputIsLogProbability)
+ file:writeBool(self.targetIsProbability)
+ file:writeObject(self.targetSoftMax)
+ file:writeObject(self.inputLogSoftMax)
+ file:writeObject(self.gradLogInput)
+end
+
+function DistNLLCriterion:read(file)
+ parent.read(self, file)
+ self.inputIsProbability = file:readBool()
+ self.inputIsLogProbability = file:readBool()
+ self.targetIsProbability = file:readBool()
+ self.targetSoftMax = file:readObject()
+ self.inputLogSoftMax = file:readObject()
+ self.gradLogInput = file:readObject()
+end
diff --git a/FindTorch.cmake b/FindTorch.cmake
index 8ada8cc..6658d42 100644
--- a/FindTorch.cmake
+++ b/FindTorch.cmake
@@ -13,9 +13,9 @@ if (TORCH_EXECUTABLE)
get_filename_component (TORCH_BIN_DIR ${TORCH_EXECUTABLE} PATH)
endif (TORCH_EXECUTABLE)
-find_library (TORCH_TH TH ${TORCH_BIN_DIR}/../lib)
-find_library (TORCH_luaT luaT ${TORCH_BIN_DIR}/../lib)
-find_library (TORCH_lua lua ${TORCH_BIN_DIR}/../lib)
+find_library (TORCH_TH TH ${TORCH_BIN_DIR}/../lib NO_DEFAULT_PATH)
+find_library (TORCH_luaT luaT ${TORCH_BIN_DIR}/../lib NO_DEFAULT_PATH)
+find_library (TORCH_lua lua ${TORCH_BIN_DIR}/../lib NO_DEFAULT_PATH)
set (TORCH_LIBRARIES ${TORCH_TH} ${TORCH_luaT} ${TORCH_lua})
diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index 83393d9..ad98139 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -1,75 +1,30 @@
-local LBFGS,parent = torch.class('nn.LBFGSOptimization', 'nn.Optimization')
+local LBFGS,parent = torch.class('nn.LBFGSOptimization', 'nn.BatchOptimization')
function LBFGS:__init(...)
require 'liblbfgs'
- parent.__init(self)
+ parent.__init(self, ...)
xlua.unpack_class(self, {...},
'LBFGSOptimization', nil,
- {arg='module', type='nn.Module', help='a module to train', req=true},
- {arg='criterion', type='nn.Criterion', help='a criterion to estimate the error', req=true},
- {arg='maxIterations', type='number', help='maximum nb of iterations per pass (0 = no max)', default=0},
- {arg='maxLineSearch', type='number', help='maximum nb of steps in line search', default=20},
- {arg='sparsity', type='number', help='sparsity coef (Orthantwise C)', default=0},
- {arg='verbose', type='number', help='verbose level during training [0-2]', default=0}
+ {arg='maxEvaluation', type='number',
+ help='maximum nb of function evaluations per pass (0 = no max)', default=0},
+ {arg='maxIterations', type='number',
+ help='maximum nb of iterations per pass (0 = no max)', default=0},
+ {arg='maxLineSearch', type='number',
+ help='maximum nb of steps in line search', default=20},
+ {arg='sparsity', type='number',
+ help='sparsity coef (Orthantwise C)', default=0},
+ {arg='parallelize', type='number',
+ help='parallelize onto N cores (experimental!)', default=1}
)
- self.parametersT = nnx.getParameters(self.module)
- self.gradParametersT = nnx.getGradParameters(self.module)
- lbfgs.verbose = self.verbose
+ self.parameters = nnx.flattenParameters(nnx.getParameters(self.module))
+ self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
end
-function LBFGS:forward(inputs, targets, options)
- options = options or {}
- -- (1) construct a closure that compute f(inputs) + df/dW
- -- after each call to that function:
- -- + self.parameters contains the current X vector
- -- + self.gradParameters contains the estimated dF/dX vector
- -- + self.output contains the estimated (average) F(X)
- lbfgs.evaluate
- = function()
- -- set parameters from current state
- self:unflatten(self.parametersT, self.gradParametersT)
- -- reset gradients
- self.module:zeroGradParameters()
- -- f is the average of all criterions
- self.output = 0
- -- given all inputs, evaluate gradients
- for i = 1,#inputs do
- -- user hook
- if self.prehook then
- self.prehook(self, {inputs[i], targets[i], options[i]})
- end
- -- estimate f
- local output = self.module:forward(inputs[i])
- local err = self.criterion:forward(output, targets[i])
- self.output = self.output + err
- -- estimate df/dW
- local df_do = self.criterion:backward(output, targets[i])
- self.module:backward(inputs[i], df_do)
- -- user hook
- if self.posthook then
- self.posthook(self, {inputs[i], targets[i], options[i]})
- end
- end
- -- update state from computed parameters
- self:flatten(self.parametersT, self.gradParametersT)
- -- normalize gradients
- self.gradParameters:div(#inputs)
- -- return average f(X)
- return self.output/#inputs
- end
-
- -- (2) store current parameters/gradParameters
- self:flatten(self.parametersT, self.gradParametersT)
-
- -- (3) the magic function: will update the parameter vector
- -- according to the l-BFGS method
- self.output = lbfgs.run(self.parameters, self.gradParameters,
- self.maxIterations, self.maxLineSearch,
- self.sparsity)
-
- -- (4) last: read parameters back into the model
- self:unflatten(self.parametersT, self.gradParametersT)
-
- -- (5) return current output after optimization
- return self.output
+function LBFGS:optimize()
+ lbfgs.evaluate = self.evaluate
+ -- the magic function: will update the parameter vector
+ -- according to the l-BFGS method
+ self.output = lbfgs.run(self.parameters, self.gradParameters,
+ self.maxEvaluation, self.maxIterations, self.maxLineSearch,
+ self.sparsity, self.verbose)
end
diff --git a/OnlineTrainer.lua b/OnlineTrainer.lua
index 2b7f2b5..dc6e860 100644
--- a/OnlineTrainer.lua
+++ b/OnlineTrainer.lua
@@ -16,19 +16,23 @@ function OnlineTrainer:__init(...)
.. '> ',
{arg='module', type='nn.Module', help='a module to train', req=true},
- {arg='criterion', type='nn.Criterion', help='a criterion to estimate the error'},
- {arg='preprocessor', type='nn.Module', help='a preprocessor to prime the data before the module'},
- {arg='optimizer', type='nn.Optimization', help='an optimization method'},
-
- {arg='batchSize', type='number', help='[mini] batch size', default=1},
- {arg='maxEpoch', type='number', help='maximum number of epochs', default=50},
- {arg='dispProgress', type='boolean', help='display a progress bar during training/testing', default=true},
- {arg='save', type='string', help='path to save networks and log training'},
- {arg='timestamp', type='boolean', help='if true, appends a timestamp to each network saved', default=false}
+ {arg='criterion', type='nn.Criterion',
+ help='a criterion to estimate the error'},
+ {arg='preprocessor', type='nn.Module',
+ help='a preprocessor to prime the data before the module'},
+ {arg='optimizer', type='nn.Optimization',
+ help='an optimization method'},
+ {arg='batchSize', type='number',
+ help='[mini] batch size', default=1},
+ {arg='maxEpoch', type='number',
+ help='maximum number of epochs', default=50},
+ {arg='dispProgress', type='boolean',
+ help='display a progress bar during training/testing', default=true},
+ {arg='save', type='string',
+ help='path to save networks and log training'},
+ {arg='timestamp', type='boolean',
+ help='if true, appends a timestamp to each network saved', default=false}
)
- -- private params
- self.trainOffset = 0
- self.testOffset = 0
end
function OnlineTrainer:log()
@@ -56,18 +60,9 @@ function OnlineTrainer:train(dataset)
local criterion = self.criterion
self.trainset = dataset
- local shuffledIndices = {}
- if not self.shuffleIndices then
- for t = 1,dataset:size() do
- shuffledIndices[t] = t
- end
- else
- shuffledIndices = lab.randperm(dataset:size())
- end
-
while true do
print('<trainer> on training set:')
- print("<trainer> online epoch # " .. self.epoch .. '[batchSize = ' .. self.batchSize .. ']')
+ print("<trainer> online epoch # " .. self.epoch .. ' [batchSize = ' .. self.batchSize .. ']')
self.time = sys.clock()
self.currentError = 0
@@ -82,7 +77,7 @@ function OnlineTrainer:train(dataset)
local targets = {}
for i = t,math.min(t+self.batchSize-1,dataset:size()) do
-- load new sample
- local sample = dataset[self.trainOffset + shuffledIndices[i]]
+ local sample = dataset[i]
local input = sample[1]
local target = sample[2]
@@ -121,10 +116,6 @@ function OnlineTrainer:train(dataset)
self.epoch = self.epoch + 1
- if dataset.infiniteSet then
- self.trainOffset = self.trainOffset + dataset:size()
- end
-
if self.maxEpoch > 0 and self.epoch > self.maxEpoch then
print("<trainer> you have reached the maximum number of epochs")
break
@@ -137,20 +128,10 @@ function OnlineTrainer:test(dataset)
print('<trainer> on testing Set:')
local module = self.module
- local shuffledIndices = {}
local criterion = self.criterion
self.currentError = 0
self.testset = dataset
- local shuffledIndices = {}
- if not self.shuffleIndices then
- for t = 1,dataset:size() do
- shuffledIndices[t] = t
- end
- else
- shuffledIndices = lab.randperm(dataset:size())
- end
-
self.time = sys.clock()
for t = 1,dataset:size() do
-- disp progress
@@ -159,7 +140,7 @@ function OnlineTrainer:test(dataset)
end
-- get new sample
- local sample = dataset[self.testOffset + shuffledIndices[t]]
+ local sample = dataset[t]
local input = sample[1]
local target = sample[2]
@@ -190,10 +171,6 @@ function OnlineTrainer:test(dataset)
self.hookTestEpoch(self)
end
- if dataset.infiniteSet then
- self.testOffset = self.testOffset + dataset:size()
- end
-
return self.currentError
end
diff --git a/Optimization.lua b/Optimization.lua
index f18c635..daf0a8d 100644
--- a/Optimization.lua
+++ b/Optimization.lua
@@ -1,56 +1,11 @@
local Optimization = torch.class('nn.Optimization')
function Optimization:__init()
+ self.output = 0
end
function Optimization:forward(inputs, targets)
- self:flatten(parameters, gradParameters)
self.output = 0
- self:unflatten(parameters, gradParameters)
+ print('<Optimization> WARNING: this is a virtual function, please overload !')
return self.output
end
-
-function Optimization:flatten(parameters, gradParameters)
- if type(parameters) == 'table' then
- -- create flat parameters
- self.parameters = self.parameters or torch.Tensor()
- self.gradParameters = self.gradParameters or torch.Tensor()
- -- assuming that the parameters won't change their size,
- -- we compute offsets once
- if not self.offsets then
- self.nParameters = 0
- self.offsets = {}
- for _,param in ipairs(parameters) do
- table.insert(self.offsets, self.nParameters+1)
- self.nParameters = self.nParameters + param:nElement()
- end
- self.parameters:resize(self.nParameters)
- self.gradParameters:resize(self.nParameters)
- end
- -- copy all params in flat array
- for i = 1,#parameters do
- local nElement = parameters[i]:nElement()
- self.parameters:narrow(1,self.offsets[i],nElement):copy(parameters[i])
- self.gradParameters:narrow(1,self.offsets[i],nElement):copy(gradParameters[i])
- end
- else
- self.parameters = parameters
- self.gradParameters = gradParameters
- end
-end
-
-function Optimization:unflatten(parameters, gradParameters)
- if type(parameters) == 'table' then
- -- copy all params into unflat arrays
- local offset = 1
- for i = 1,#parameters do
- local nElement = parameters[i]:nElement()
- parameters[i]:copy(self.parameters:narrow(1,offset,nElement))
- gradParameters[i]:copy(self.gradParameters:narrow(1,offset,nElement))
- offset = offset + nElement
- end
- else
- parameters = self.parameters
- gradParameters = self.gradParameters
- end
-end
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f30f5b9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,67 @@
+# nnx: an Xperimental package for neural network modules + optimizations
+
+The original neural network from Torch7, 'nn', contains stable and widely
+used modules. 'nnx' contains more experimental, unproven modules, and
+optimizations. Eventually, modules that become stable enough will make
+their way into 'nn' (some already have).
+
+## Install dependencies
+
+1/ third-party libraries:
+
+On Linux (Ubuntu > 9.04):
+
+``` sh
+$ apt-get install gcc g++ git libreadline5-dev cmake wget
+```
+
+On Mac OS (Leopard, or more), using [Homebrew](http://mxcl.github.com/homebrew/):
+
+``` sh
+$ brew install git readline cmake wget
+```
+
+2/ Lua 5.1 + Luarocks + xLua:
+
+``` sh
+$ git clone https://github.com/clementfarabet/lua4torch
+$ cd lua4torch
+$ make install PREFIX=/usr/local
+```
+
+3/ nnx:
+
+Note: this automatically installs Torch7+nn, and other Lua dependencies.
+
+``` sh
+$ luarocks install nnx
+```
+
+## Use the library
+
+First run xlua, and load nnx:
+
+``` sh
+$ xlua
+```
+
+``` lua
+> require 'nnx'
+```
+
+Once loaded, tab-completion will help you navigate through the
+library (note that most function are added directly to nn):
+
+``` lua
+> nnx. + TAB
+...
+> nn. + TAB
+```
+
+In particular, it's good to verify that all modules provided pass their
+tests:
+
+``` lua
+> nnx.test_all()
+> nnx.test_omp()
+```
diff --git a/README.txt b/README.txt
deleted file mode 100644
index 6f183e9..0000000
--- a/README.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-
-INSTALL:
-$ luarocks --from=http://data.neuflow.org/lua/rocks install nnx
-
-USE:
-> require 'nnx'
-> n1 = nn.SpatialLinear(16,4)
-
--- run tests:
-> nnx.test_all()
-...
-> nnx.test_omp()
-...
diff --git a/SGDOptimization.lua b/SGDOptimization.lua
index 8bfe9a5..ddbf220 100644
--- a/SGDOptimization.lua
+++ b/SGDOptimization.lua
@@ -1,81 +1,48 @@
-local SGD,parent = torch.class('nn.SGDOptimization', 'nn.Optimization')
+local SGD,parent = torch.class('nn.SGDOptimization', 'nn.BatchOptimization')
function SGD:__init(...)
- parent.__init(self)
+ parent.__init(self,...)
xlua.unpack_class(self, {...},
'SGDOptimization', nil,
- {arg='module', type='nn.Module', help='a module to train', req=true},
- {arg='criterion', type='nn.Criterion', help='a criterion to estimate the error', req=true},
- {arg='learningRate', type='number', help='learning rate (W = W - rate*dE/dW)', default=1e-2},
- {arg='weightDecay', type='number', help='amount of weight decay (W = W - decay*W)', default=0},
- {arg='momentum', type='number', help='amount of momentum on weights (dE/W = dE/dW*(1-momentum) + prev(dE/dW)*momentum)', default=0}
+ {arg='maxIterations', type='number',
+ help='maximum nb of iterations per pass', default=1},
+ {arg='learningRate', type='number',
+ help='learning rate (W = W - rate*dE/dW)', default=1e-2},
+ {arg='learningRateDecay', type='number',
+ help='learning rate decay (lr_t = lr_0 / (1 + samplesSeen*lrDecay))', default=0},
+ {arg='weightDecay', type='number',
+ help='amount of weight decay (W = W - decay*W)', default=0},
+ {arg='momentum', type='number',
+ help='amount of momentum on weights (dE/W = dE/dW*(1-momentum) + prev(dE/dW)*momentum)', default=0}
)
- self.parametersT = nnx.getParameters(self.module)
- self.gradParametersT = nnx.getGradParameters(self.module)
+ self.parameters = nnx.flattenParameters(nnx.getParameters(self.module))
+ self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
end
-function SGD:forward(inputs, targets, options)
- options = options or {}
-
- -- reset gradients
- self.module:zeroGradParameters()
-
- -- f is the average of all criterions
- self.output = 0
-
- -- given all inputs, evaluate gradients
- for i = 1,#inputs do
- -- user hook
- if self.prehook then
- self.prehook(self, {inputs[i], targets[i], options[i]})
+function SGD:optimize()
+ -- optimize N times
+ for i = 1,self.maxIterations do
+ -- evaluate f(X) + df/dX
+ self.evaluate()
+
+ -- apply momentum
+ if self.momentum ~= 0 then
+ if not self.currentGradParameters then
+ self.currentGradParameters = torch.Tensor():resizeAs(self.gradParameters):copy(self.gradParameters)
+ else
+ self.currentGradParameters:mul(self.momentum):add(1-self.momentum, self.gradParameters)
+ end
+ else
+ self.currentGradParameters = self.gradParameters
end
- -- estimate f
- local output = self.module:forward(inputs[i])
- local err = self.criterion:forward(output, targets[i])
- self.output = self.output + err
-
- -- estimate df/dW
- local df_do = self.criterion:backward(output, targets[i])
- self.module:backward(inputs[i], df_do)
-
- -- user hook
- if self.posthook then
- self.posthook(self, {inputs[i], targets[i], options[i]})
+ -- weight decay
+ if self.weightDecay ~= 0 then
+ self.parameters:add(-self.weightDecay, self.parameters)
end
- end
- -- renorm f
- self.output = self.output / #inputs
-
- -- update state from computed parameters
- self:flatten(self.parametersT, self.gradParametersT)
-
- -- normalize gradients
- self.gradParameters:div(#inputs)
-
- -- apply momentum
- if self.momentum ~= 0 then
- if not self.currentGradParameters then
- self.currentGradParameters = torch.Tensor():resizeAs(self.gradParameters):copy(self.gradParameters)
- else
- self.currentGradParameters:mul(self.momentum):add(1-self.momentum, self.gradParameters)
- end
- else
- self.currentGradParameters = self.gradParameters
- end
-
- -- weight decay
- if self.weightDecay ~= 0 then
- self.parameters:add(-self.weightDecay, self.parameters)
+ -- update parameters
+ local learningRate = self.learningRate / (1 + self.sampleCounter*self.learningRateDecay)
+ self.parameters:add(-learningRate, self.currentGradParameters)
end
-
- -- update parameters
- self.parameters:add(-self.learningRate, self.currentGradParameters)
-
- -- write compute parameters back in place
- self:unflatten(self.parametersT, self.gradParametersT)
-
- -- return current output
- return self.output
end
diff --git a/StochasticTrainer.lua b/StochasticTrainer.lua
deleted file mode 100644
index 62fb670..0000000
--- a/StochasticTrainer.lua
+++ /dev/null
@@ -1,265 +0,0 @@
-local StochasticTrainer, parent = torch.class('nn.StochasticTrainer','nn.Trainer')
-
-function StochasticTrainer:__init(...)
- parent.__init(self)
- -- unpack args
- xlua.unpack_class(self, {...},
- 'StochasticTrainer',
-
- 'A general-purpose stochastic trainer class.\n'
- .. 'Provides 4 user hooks to perform extra work after each sample, or each epoch:\n'
- .. '> trainer = nn.StochasticTrainer(...) \n'
- .. '> trainer.hookTrainSample = function(trainer, sample) ... end \n'
- .. '> trainer.hookTrainEpoch = function(trainer) ... end \n'
- .. '> trainer.hookTestSample = function(trainer, sample) ... end \n'
- .. '> trainer.hookTestEpoch = function(trainer) ... end \n'
- .. '> ',
-
- {arg='module', type='nn.Module', help='a module to train', req=true},
- {arg='criterion', type='nn.Module', help='a criterion to estimate the error'},
- {arg='preprocessor', type='nn.Module', help='a preprocessor to prime the data before the module'},
-
- {arg='learningRate', type='number', help='learning rate (W = W - rate*dE/dW)', default=1e-2},
- {arg='learningRateDecay', type='number', help='learning rate decay (rate = rate * (1-decay), at each epoch)', default=0},
- {arg='weightDecay', type='number', help='amount of weight decay (W = W - decay*W)', default=0},
- {arg='momentum', type='number', help='amount of momentum on weights (dE/W = dE/dW + momentum*prev(dE/dW))', default=0},
- {arg='maxEpoch', type='number', help='maximum number of epochs', default=50},
-
- {arg='maxTarget', type='boolean', help='replaces an CxHxW target map by a HxN target of max values (for NLL criterions)', default=false},
- {arg='dispProgress', type='boolean', help='display a progress bar during training/testing', default=true},
- {arg='skipUniformTargets', type='boolean', help='skip uniform (flat) targets during training', default=false},
-
- {arg='save', type='string', help='path to save networks and log training'},
- {arg='timestamp', type='boolean', help='if true, appends a timestamp to each network saved', default=false}
- )
- -- instantiate SGD optimization module
- self.optimizer = nn.SGDOptimization(self.learningRate, self.weightDecay, self.momentum)
- -- private params
- self.errorArray = self.skipUniformTargets
- self.trainOffset = 0
- self.testOffset = 0
-end
-
-function StochasticTrainer:log()
- -- save network
- local filename = self.save
- os.execute('mkdir -p ' .. sys.dirname(filename))
- if self.timestamp then
- -- use a timestamp to store all networks uniquely
- filename = filename .. '-' .. os.date("%Y_%m_%d_%X")
- else
- -- if no timestamp, just store the previous one
- if sys.filep(filename) then
- os.execute('mv ' .. filename .. ' ' .. filename .. '.old')
- end
- end
- print('<trainer> saving network to '..filename)
- local file = torch.DiskFile(filename,'w')
- self.module:write(file)
- file:close()
-end
-
-function StochasticTrainer:train(dataset)
- self.epoch = self.epoch or 1
- local currentLearningRate = self.learningRate
- local module = self.module
- local criterion = self.criterion
- self.trainset = dataset
-
- local shuffledIndices = {}
- if not self.shuffleIndices then
- for t = 1,dataset:size() do
- shuffledIndices[t] = t
- end
- else
- shuffledIndices = lab.randperm(dataset:size())
- end
-
- local parameters = nnx.getParameters(module)
- local gradParameters = nnx.getGradParameters(module)
-
- while true do
- print('<trainer> on training set:')
- print("<trainer> stochastic gradient descent epoch # " .. self.epoch)
-
- module:zeroGradParameters()
-
- self.time = sys.clock()
- self.currentError = 0
- for t = 1,dataset:size() do
- -- disp progress
- if self.dispProgress then
- xlua.progress(t, dataset:size())
- end
-
- -- load new sample
- local sample = dataset[self.trainOffset + shuffledIndices[t]]
- local input = sample[1]
- local target = sample[2]
- local sample_x = sample.x
- local sample_y = sample.y
-
- -- get max of target ?
- if self.maxTarget then
- target = torch.Tensor(target:nElement()):copy(target)
- _,target = lab.max(target)
- target = target[1]
- end
-
- -- is target uniform ?
- local isUniform = false
- if self.errorArray and target:min() == target:max() then
- isUniform = true
- end
-
- -- perform SGD step
- if not (self.skipUniformTargets and isUniform) then
- -- optional preprocess
- if self.preprocessor then input = self.preprocessor:forward(input) end
-
- -- forward through model and criterion
- -- (if no criterion, it is assumed to be contained in the model)
- local modelOut, error
- if criterion then
- modelOut = module:forward(input)
- error = criterion:forward(modelOut, target)
- else
- modelOut, error = module:forward(input, target, sample_x, sample_y)
- end
-
- -- accumulate error
- self.currentError = self.currentError + error
-
- -- reset gradients
- module:zeroGradParameters()
-
- -- backward through model
- -- (if no criterion, it is assumed that derror is internally generated)
- if criterion then
- local derror = criterion:backward(module.output, target)
- module:backward(input, derror)
- else
- module:backward(input)
- end
-
- -- update parameters in the model
- self.optimizer:forward(parameters, gradParameters)
- end
-
- -- call user hook, if any
- if self.hookTrainSample then
- self.hookTrainSample(self, sample)
- end
- end
-
- self.currentError = self.currentError / dataset:size()
- print("<trainer> current error = " .. self.currentError)
-
- self.time = sys.clock() - self.time
- self.time = self.time / dataset:size()
- print("<trainer> time to learn 1 sample = " .. (self.time*1000) .. 'ms')
-
- if self.hookTrainEpoch then
- self.hookTrainEpoch(self)
- end
-
- if self.save then self:log() end
-
- self.epoch = self.epoch + 1
- currentLearningRate = self.learningRate/(1+self.epoch*self.learningRateDecay)
- self.optimizer.learningRate = currentLearningRate
-
- if dataset.infiniteSet then
- self.trainOffset = self.trainOffset + dataset:size()
- end
-
- if self.maxEpoch > 0 and self.epoch > self.maxEpoch then
- print("<trainer> you have reached the maximum number of epochs")
- break
- end
- end
-end
-
-
-function StochasticTrainer:test(dataset)
- print('<trainer> on testing Set:')
-
- local module = self.module
- local shuffledIndices = {}
- local criterion = self.criterion
- self.currentError = 0
- self.testset = dataset
-
- local shuffledIndices = {}
- if not self.shuffleIndices then
- for t = 1,dataset:size() do
- shuffledIndices[t] = t
- end
- else
- shuffledIndices = lab.randperm(dataset:size())
- end
-
- self.time = sys.clock()
- for t = 1,dataset:size() do
- -- disp progress
- if self.dispProgress then
- xlua.progress(t, dataset:size())
- end
-
- -- get new sample
- local sample = dataset[self.testOffset + shuffledIndices[t]]
- local input = sample[1]
- local target = sample[2]
-
- -- max target ?
- if self.maxTarget then
- target = torch.Tensor(target:nElement()):copy(target)
- _,target = lab.max(target)
- target = target[1]
- end
-
- -- test sample through current model
- if self.preprocessor then input = self.preprocessor:forward(input) end
- if criterion then
- self.currentError = self.currentError +
- criterion:forward(module:forward(input), target)
- else
- local _,error = module:forward(input, target)
- self.currentError = self.currentError + error
- end
-
- -- user hook
- if self.hookTestSample then
- self.hookTestSample(self, sample)
- end
- end
-
- self.currentError = self.currentError / dataset:size()
- print("<trainer> test current error = " .. self.currentError)
-
- self.time = sys.clock() - self.time
- self.time = self.time / dataset:size()
- print("<trainer> time to test 1 sample = " .. (self.time*1000) .. 'ms')
-
- if self.hookTestEpoch then
- self.hookTestEpoch(self)
- end
-
- if dataset.infiniteSet then
- self.testOffset = self.testOffset + dataset:size()
- end
-
- return self.currentError
-end
-
-function StochasticTrainer:write(file)
- parent.write(self,file)
- file:writeObject(self.module)
- file:writeObject(self.criterion)
-end
-
-function StochasticTrainer:read(file)
- parent.read(self,file)
- self.module = file:readObject()
- self.criterion = file:readObject()
-end
diff --git a/Trainer.lua b/Trainer.lua
index 3388ef7..b7da770 100644
--- a/Trainer.lua
+++ b/Trainer.lua
@@ -4,7 +4,6 @@ function Trainer:__init()
self.learningRate = 0.01
self.learningRateDecay = 0
self.maxIteration = 25
- self.shuffleIndices = true
end
function Trainer:train(dataset)
@@ -14,14 +13,12 @@ function Trainer:write(file)
file:writeDouble(self.learningRate)
file:writeDouble(self.learningRateDecay)
file:writeInt(self.maxIteration)
- file:writeBool(self.shuffleIndices)
end
function Trainer:read(file)
self.learningRate = file:readDouble()
self.learningRateDecay = file:readDouble()
self.maxIteration = file:readInt()
- self.shuffleIndices = file:readBool()
end
function Trainer:share(mlp, ...)
@@ -30,10 +27,6 @@ function Trainer:share(mlp, ...)
end
end
-function Trainer:setShuffle(bool)
- self.shuffleIndices = bool
-end
-
function Trainer:clone(...)
local f = torch.MemoryFile("rw"):binary()
f:writeObject(self)
diff --git a/init.lua b/init.lua
index 6b8b7e5..3519294 100644
--- a/init.lua
+++ b/init.lua
@@ -93,19 +93,21 @@ torch.include('nnx', 'SpatialColorTransform.lua')
-- criterions:
torch.include('nnx', 'SuperCriterion.lua')
torch.include('nnx', 'SparseCriterion.lua')
+torch.include('nnx', 'DistNLLCriterion.lua')
torch.include('nnx', 'SpatialMSECriterion.lua')
torch.include('nnx', 'SpatialClassNLLCriterion.lua')
torch.include('nnx', 'SpatialSparseCriterion.lua')
-- optimizations:
torch.include('nnx', 'Optimization.lua')
+torch.include('nnx', 'BatchOptimization.lua')
torch.include('nnx', 'SGDOptimization.lua')
torch.include('nnx', 'LBFGSOptimization.lua')
-- trainers:
torch.include('nnx', 'Trainer.lua')
torch.include('nnx', 'OnlineTrainer.lua')
-torch.include('nnx', 'StochasticTrainer.lua')
+torch.include('nnx', 'BatchTrainer.lua')
-- datasets:
torch.include('nnx', 'DataSet.lua')
@@ -185,3 +187,29 @@ function nnx.getGradParameters(...)
-- return all parameters found
return holder
end
+
+function nnx.flattenParameters(parameters)
+ -- compute offsets of each parameter
+ local offsets = {}
+ local dimensions = {}
+ local elements = {}
+ local nParameters = 0
+ for _,param in ipairs(parameters) do
+ table.insert(offsets, nParameters+1)
+ table.insert(dimensions, param:size())
+ table.insert(elements, param:nElement())
+ nParameters = nParameters + param:nElement()
+ end
+ -- create flat vector
+ local flatParameters = torch.Tensor(nParameters)
+ local storage = flatParameters:storage()
+ -- reallocate all parameters in flat vector
+ for i = 1,#parameters do
+ local data = parameters[i]:clone()
+ parameters[i]:set(storage, offsets[i], elements[i]):resize(dimensions[i]):copy(data)
+ end
+ -- cleanup
+ collectgarbage()
+ -- return new flat vector that contains all discrete parameters
+ return flatParameters
+end
diff --git a/lbfgs.c b/lbfgs.c
index 93680bd..851a8dd 100644
--- a/lbfgs.c
+++ b/lbfgs.c
@@ -81,6 +81,12 @@
#define max2(a, b) ((a) >= (b) ? (a) : (b))
#define max3(a, b, c) max2(max2((a), (b)), (c));
+// extra globals
+static int nEvaluation = 0;
+static int maxEval = 0; // maximum number of function evaluations
+static int nIteration = 0;
+static int verbose = 0;
+
struct tag_callback_data {
int n;
void *instance;
@@ -415,7 +421,8 @@ int lbfgs(
fx += xnorm * param.orthantwise_c;
owlqn_pseudo_gradient(
pg, x, g, n,
- param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
+ param.orthantwise_c,
+ param.orthantwise_start, param.orthantwise_end
);
}
@@ -468,7 +475,8 @@ int lbfgs(
ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, &param);
owlqn_pseudo_gradient(
pg, x, g, n,
- param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
+ param.orthantwise_c,
+ param.orthantwise_start, param.orthantwise_end
);
}
if (ls < 0) {
@@ -476,6 +484,9 @@ int lbfgs(
veccpy(x, xp, n);
veccpy(g, gp, n);
ret = ls;
+ if (verbose > 1){
+ printf("Stopping b/c ls (%d) < 0\n", ls);
+ }
goto lbfgs_exit;
}
@@ -490,10 +501,20 @@ int lbfgs(
/* Report the progress. */
if (cd.proc_progress) {
if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) {
- goto lbfgs_exit;
+ if (verbose > 1){
+ printf("Stopping b/c cd.proc_progress (%d)\n", ret);
+ }
+ goto lbfgs_exit;
}
}
+ /* Count number of function evaluations */
+ if ((maxEval != 0)&&(nEvaluation > maxEval)) {
+ if (verbose > 1){
+ printf("Stopping b/c exceeded max number of function evaluations\n");
+ }
+ goto lbfgs_exit;
+ }
/*
Convergence test.
The criterion is given by the following formula:
@@ -501,6 +522,10 @@ int lbfgs(
*/
if (xnorm < 1.0) xnorm = 1.0;
if (gnorm / xnorm <= param.epsilon) {
+ if (verbose > 1){
+ printf("Stopping b/c gnorm(%f)/xnorm(%f) <= param.epsilon (%f)\n",
+ gnorm, xnorm, param.epsilon);
+ }
/* Convergence. */
ret = LBFGS_SUCCESS;
break;
@@ -519,6 +544,10 @@ int lbfgs(
/* The stopping criterion. */
if (rate < param.delta) {
+ if (verbose > 1){
+ printf("Stopping b/c rate (%f) < param.delta (%f)\n",
+ rate, param.delta);
+ }
ret = LBFGS_STOP;
break;
}
@@ -529,6 +558,10 @@ int lbfgs(
}
if (param.max_iterations != 0 && param.max_iterations < k+1) {
+ if (verbose > 1){
+ printf("Stopping b/c param.max_iterations (%d) < k+1 (%d)\n",
+ param.max_iterations, k+1);
+ }
/* Maximum number of iterations. */
ret = LBFGSERR_MAXIMUMITERATION;
break;
@@ -1375,9 +1408,6 @@ static THDoubleTensor *gradParameters = NULL;
static int nParameter = 0;
static lua_State *GL = NULL;
static lbfgs_parameter_t lbfgs_param;
-static int nEvaluation = 0;
-static int nIteration = 0;
-static int verbose = 0;
static lbfgsfloatval_t evaluate(void *instance,
const lbfgsfloatval_t *x,
@@ -1417,12 +1447,11 @@ static int progress(void *instance,
int ls)
{
nIteration = k;
- if (verbose == 2) {
- printf("\n<LBFGSOptimization> iteration %d:\n", nIteration);
- printf(" + fx = %f\n", fx);
+ if (verbose > 1) {
+ printf("<LBFGSOptimization> iteration %d:\n", nIteration);
+ printf(" + f(X) = %f\n", fx);
printf(" + xnorm = %f, gnorm = %f, step = %f\n", xnorm, gnorm, step);
printf(" + nb evaluations = %d\n", nEvaluation);
- printf("\n");
}
return 0;
}
@@ -1433,27 +1462,22 @@ int lbfgs_run(lua_State *L) {
parameters = luaT_checkudata(L, 1, torch_DoubleTensor_id);
gradParameters = luaT_checkudata(L, 2, torch_DoubleTensor_id);
nParameter = THDoubleTensor_nElement(parameters);
-
// parameters for algorithm
nEvaluation = 0;
lbfgsfloatval_t fx;
lbfgsfloatval_t *x = lbfgs_malloc(nParameter);
- // get verbose level
- lua_getfield(GL, LUA_GLOBALSINDEX, "lbfgs"); // push lbfgs on top of stack
- lua_getfield(GL, -1, "verbose"); // push lbfgs.verbose on top of stack
- verbose = lua_tonumber(GL, -1); // verbose = lbfgs.verbose
- lua_pop(GL, 2); // pop last two entries
-
- // initialize vector x <- parameters
memcpy(x, THDoubleTensor_data(parameters), sizeof(double)*nParameter);
// initialize the parameters for the L-BFGS optimization
lbfgs_parameter_init(&lbfgs_param);
- lbfgs_param.max_iterations = lua_tonumber(L, 3);
- lbfgs_param.max_linesearch = lua_tonumber(L, 4);
+ maxEval = lua_tonumber(L,3);
+ lbfgs_param.max_iterations = lua_tonumber(L, 4);
+ lbfgs_param.max_linesearch = lua_tonumber(L, 5);
lbfgs_param.linesearch = LBFGS_LINESEARCH_BACKTRACKING;
- lbfgs_param.orthantwise_c = lua_tonumber(L, 5);
+ lbfgs_param.orthantwise_c = lua_tonumber(L, 6);
+ // get verbose level
+ verbose = lua_tonumber(L,7);
// Start the L-BFGS optimization; this will invoke the callback functions
// evaluate() and progress() when necessary.
@@ -1461,10 +1485,10 @@ int lbfgs_run(lua_State *L) {
// verbose
if (verbose) {
- printf("\n<LBFGSOptimization> batch optimized after %d iterations\n", nIteration);
- printf(" + fx = %f\n", fx);
+ printf("<LBFGSOptimization> batch optimized after %d iterations\n", nIteration);
+ printf(" + f(X) = %f\n", fx);
+ printf(" + X = [%f , ... %f]\n",x[0],x[nParameter-1]);
printf(" + nb evaluations = %d\n", nEvaluation);
- printf("\n");
}
// cleanup
diff --git a/nnx-1.0-1.rockspec b/nnx-1.0-1.rockspec
index 4529d24..dcb8d1b 100644
--- a/nnx-1.0-1.rockspec
+++ b/nnx-1.0-1.rockspec
@@ -62,6 +62,7 @@ build = {
install_files(/lua/nnx init.lua)
install_files(/lua/nnx Abs.lua)
install_files(/lua/nnx ConfusionMatrix.lua)
+ install_files(/lua/nnx DistNLLCriterion.lua)
install_files(/lua/nnx Logger.lua)
install_files(/lua/nnx Probe.lua)
install_files(/lua/nnx HardShrink.lua)
@@ -83,7 +84,6 @@ build = {
install_files(/lua/nnx SpatialCriterion.lua)
install_files(/lua/nnx Trainer.lua)
install_files(/lua/nnx OnlineTrainer.lua)
- install_files(/lua/nnx StochasticTrainer.lua)
install_files(/lua/nnx DataSet.lua)
install_files(/lua/nnx DataList.lua)
install_files(/lua/nnx DataSetLabelMe.lua)
@@ -103,6 +103,8 @@ build = {
install_files(/lua/nnx Optimization.lua)
install_files(/lua/nnx LBFGSOptimization.lua)
install_files(/lua/nnx SGDOptimization.lua)
+ install_files(/lua/nnx BatchOptimization.lua)
+ install_files(/lua/nnx BatchTrainer.lua)
add_subdirectory (test)
install_targets(/lib nnx)
]],
diff --git a/test/test-all.lua b/test/test-all.lua
index 148e860..f7e591a 100644
--- a/test/test-all.lua
+++ b/test/test-all.lua
@@ -301,93 +301,6 @@ function nnxtest.SpatialConvolution()
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
end
-function nnxtest.SpatialConvolutionSparse_1()
- local from = math.random(1,10)
- local to = math.random(1,10)
- local ini = math.random(10,20)
- local inj = math.random(10,20)
- local ki = math.random(1,10)
- local kj = math.random(1,10)
- local si = math.random(1,1)
- local sj = math.random(1,1)
-
- local ct = nn.tables.full(from,to)
- local module = nn.SpatialConvolutionSparse(ct, ki, kj, si, sj)
- local input = torch.Tensor(from, inj, ini):zero()
- module:reset()
-
- local err = nn.Jacobian.testJacobian(module, input)
- mytester:assertlt(err, precision, 'error on state ')
-
- local err = nn.Jacobian.testJacobianParameters(module, input, module.weight, module.gradWeight)
- mytester:assertlt(err, precision, 'error on weight ')
-
- local err = nn.Jacobian.testJacobianParameters(module, input, module.bias, module.gradBias)
- mytester:assertlt(err, precision, 'error on bias ')
-
- local ferr, berr = nn.Jacobian.testIO(module, input)
- mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
- mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-end
-
-function nnxtest.SpatialConvolutionSparse_2()
- local from = math.random(1,10)
- local to = math.random(1,10)
- local ini = math.random(10,20)
- local inj = math.random(10,20)
- local ki = math.random(1,10)
- local kj = math.random(1,10)
- local si = math.random(1,1)
- local sj = math.random(1,1)
-
- local ct = nn.tables.oneToOne(from)
- local module = nn.SpatialConvolutionSparse(ct, ki, kj, si, sj)
- local input = torch.Tensor(from, inj, ini):zero()
- module:reset()
-
- local err = nn.Jacobian.testJacobian(module, input)
- mytester:assertlt(err, precision, 'error on state ')
-
- local err = nn.Jacobian.testJacobianParameters(module, input, module.weight, module.gradWeight)
- mytester:assertlt(err, precision, 'error on weight ')
-
- local err = nn.Jacobian.testJacobianParameters(module, input, module.bias, module.gradBias)
- mytester:assertlt(err, precision, 'error on bias ')
-
- local ferr, berr = nn.Jacobian.testIO(module, input)
- mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
- mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-end
-
-function nnxtest.SpatialConvolutionSparse_3()
- local from = math.random(2,6)
- local to = math.random(4,8)
- local ini = math.random(10,20)
- local inj = math.random(10,20)
- local ki = math.random(1,10)
- local kj = math.random(1,10)
- local si = math.random(1,1)
- local sj = math.random(1,1)
-
- local ct = nn.tables.random(from,to,from-1)
- local module = nn.SpatialConvolutionSparse(ct, ki, kj, si, sj)
- local input = torch.Tensor(from, inj, ini):zero()
- module:reset()
-
- local err = nn.Jacobian.testJacobian(module, input)
- mytester:assertlt(err, precision, 'error on state ')
-
- local err = nn.Jacobian.testJacobianParameters(module, input, module.weight, module.gradWeight)
- mytester:assertlt(err, precision, 'error on weight ')
-
- local err = nn.Jacobian.testJacobianParameters(module, input, module.bias, module.gradBias)
- mytester:assertlt(err, precision, 'error on bias ')
-
- local ferr, berr = nn.Jacobian.testIO(module, input)
- mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
- mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-end
-
function nnxtest.SpatialNormalization_Gaussian2D()
local inputSize = math.random(11,20)
local kersize = 9