Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/clementfarabet/lua---nnx.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authornicholas-leonard <nick@nikopia.org>2015-09-03 16:28:18 +0300
committernicholas-leonard <nick@nikopia.org>2015-09-03 16:28:18 +0300
commit191a0cbcbcbc5db28d71e8a40cdaac4b53420bbf (patch)
tree88112f6c1aabcc4038ae1bd44db8a68923ccaf4e
parenta9026fd1377757a148c464784e2d2dce6580c138 (diff)
Removed deprecated modules
-rw-r--r--AbstractRecurrent.lua281
-rw-r--r--BatchNormalization.lua80
-rw-r--r--LSTM.lua353
-rw-r--r--Module.lua25
-rw-r--r--NarrowLookupTable.lua148
-rw-r--r--Recurrent.lua405
-rw-r--r--Repeater.lua87
-rw-r--r--RepeaterCriterion.lua49
-rw-r--r--Sequencer.lua197
-rw-r--r--init.lua12
-rw-r--r--test/test-all.lua476
11 files changed, 0 insertions, 2113 deletions
diff --git a/AbstractRecurrent.lua b/AbstractRecurrent.lua
deleted file mode 100644
index cf40626..0000000
--- a/AbstractRecurrent.lua
+++ /dev/null
@@ -1,281 +0,0 @@
-local AbstractRecurrent, parent = torch.class('nn.AbstractRecurrent', 'nn.Container')
-
-function AbstractRecurrent:__init(rho)
- parent.__init(self)
-
- self.rho = rho --the maximum number of time steps to BPTT
-
- self.fastBackward = true
- self.copyInputs = true
-
- self.inputs = {}
- self.outputs = {}
- self.gradOutputs = {}
- self.scales = {}
-
- self.gradParametersAccumulated = false
- self.step = 1
-
- -- stores internal states of Modules at different time-steps
- self.recurrentOutputs = {}
- self.recurrentGradInputs = {}
-
- self:reset()
-end
-
-local function recursiveResizeAs(t1,t2)
- if torch.type(t2) == 'table' then
- t1 = (torch.type(t1) == 'table') and t1 or {t1}
- for key,_ in pairs(t2) do
- t1[key], t2[key] = recursiveResizeAs(t1[key], t2[key])
- end
- elseif torch.isTensor(t2) then
- t1 = torch.isTensor(t1) and t1 or t2.new()
- t1:resizeAs(t2)
- else
- error("expecting nested tensors or tables. Got "..
- torch.type(t1).." and "..torch.type(t2).." instead")
- end
- return t1, t2
-end
-AbstractRecurrent.recursiveResizeAs = recursiveResizeAs
-
-local function recursiveSet(t1,t2)
- if torch.type(t2) == 'table' then
- t1 = (torch.type(t1) == 'table') and t1 or {t1}
- for key,_ in pairs(t2) do
- t1[key], t2[key] = recursiveSet(t1[key], t2[key])
- end
- elseif torch.isTensor(t2) then
- t1 = t1 or t2.new()
- t1:set(t2)
- else
- error("expecting nested tensors or tables. Got "..
- torch.type(t1).." and "..torch.type(t2).." instead")
- end
- return t1, t2
-end
-AbstractRecurrent.recursiveSet = recursiveSet
-
-local function recursiveCopy(t1,t2)
- if torch.type(t2) == 'table' then
- t1 = (torch.type(t1) == 'table') and t1 or {t1}
- for key,_ in pairs(t2) do
- t1[key], t2[key] = recursiveCopy(t1[key], t2[key])
- end
- elseif torch.isTensor(t2) then
- t1 = torch.isTensor(t1) and t1 or t2.new()
- t1:resizeAs(t2):copy(t2)
- else
- error("expecting nested tensors or tables. Got "..
- torch.type(t1).." and "..torch.type(t2).." instead")
- end
- return t1, t2
-end
-AbstractRecurrent.recursiveCopy = recursiveCopy
-
-local function recursiveAdd(t1, t2)
- if torch.type(t2) == 'table' then
- t1 = (torch.type(t1) == 'table') and t1 or {t1}
- for key,_ in pairs(t2) do
- t1[key], t2[key] = recursiveAdd(t1[key], t2[key])
- end
- elseif torch.isTensor(t2) and torch.isTensor(t2) then
- t1:add(t2)
- else
- error("expecting nested tensors or tables. Got "..
- torch.type(t1).." and "..torch.type(t2).." instead")
- end
- return t1, t2
-end
-AbstractRecurrent.recursiveAdd = recursiveAdd
-
-local function recursiveTensorEq(t1, t2)
- if torch.type(t2) == 'table' then
- local isEqual = true
- if torch.type(t1) ~= 'table' then
- return false
- end
- for key,_ in pairs(t2) do
- isEqual = isEqual and recursiveTensorEq(t1[key], t2[key])
- end
- return isEqual
- elseif torch.isTensor(t2) and torch.isTensor(t2) then
- local diff = t1-t2
- local err = diff:abs():max()
- return err < 0.00001
- else
- error("expecting nested tensors or tables. Got "..
- torch.type(t1).." and "..torch.type(t2).." instead")
- end
-end
-AbstractRecurrent.recursiveTensorEq = recursiveTensorEq
-
-local function recursiveNormal(t2)
- if torch.type(t2) == 'table' then
- for key,_ in pairs(t2) do
- t2[key] = recursiveNormal(t2[key])
- end
- elseif torch.isTensor(t2) then
- t2:normal()
- else
- error("expecting tensor or table thereof. Got "
- ..torch.type(t2).." instead")
- end
- return t2
-end
-AbstractRecurrent.recursiveNormal = recursiveNormal
-
-function AbstractRecurrent:updateGradInput(input, gradOutput)
- -- Back-Propagate Through Time (BPTT) happens in updateParameters()
- -- for now we just keep a list of the gradOutputs
- self.gradOutputs[self.step-1] = self.recursiveCopy(self.gradOutputs[self.step-1] , gradOutput)
-end
-
-function AbstractRecurrent:accGradParameters(input, gradOutput, scale)
- -- Back-Propagate Through Time (BPTT) happens in updateParameters()
- -- for now we just keep a list of the scales
- self.scales[self.step-1] = scale
-end
-
-function AbstractRecurrent:backwardUpdateThroughTime(learningRate)
- local gradInput = self:updateGradInputThroughTime()
- self:accUpdateGradParametersThroughTime(learningRate)
- return gradInput
-end
-
-function AbstractRecurrent:updateParameters(learningRate)
- if self.gradParametersAccumulated then
- for i=1,#self.modules do
- self.modules[i]:updateParameters(learningRate)
- end
- else
- self:backwardUpdateThroughTime(learningRate)
- end
-end
-
--- goes hand in hand with the next method : forget()
-function AbstractRecurrent:recycle()
- -- +1 is to skip initialModule
- if self.step > self.rho + 1 then
- assert(self.recurrentOutputs[self.step] == nil)
- assert(self.recurrentOutputs[self.step-self.rho] ~= nil)
- self.recurrentOutputs[self.step] = self.recurrentOutputs[self.step-self.rho]
- self.recurrentGradInputs[self.step] = self.recurrentGradInputs[self.step-self.rho]
- self.recurrentOutputs[self.step-self.rho] = nil
- self.recurrentGradInputs[self.step-self.rho] = nil
- -- need to keep rho+1 of these
- self.outputs[self.step] = self.outputs[self.step-self.rho-1]
- self.outputs[self.step-self.rho-1] = nil
- end
- if self.step > self.rho then
- assert(self.inputs[self.step] == nil)
- assert(self.inputs[self.step-self.rho] ~= nil)
- self.inputs[self.step] = self.inputs[self.step-self.rho]
- self.gradOutputs[self.step] = self.gradOutputs[self.step-self.rho]
- self.inputs[self.step-self.rho] = nil
- self.gradOutputs[self.step-self.rho] = nil
- self.scales[self.step-self.rho] = nil
- end
-end
-
-function AbstractRecurrent:forget(offset)
- offset = offset or 1
- if self.train ~= false then
- -- bring all states back to the start of the sequence buffers
- local lastStep = self.step - 1
-
- if lastStep > self.rho + offset then
- local i = 1 + offset
- for step = lastStep-self.rho+offset,lastStep do
- self.recurrentOutputs[i] = self.recurrentOutputs[step]
- self.recurrentGradInputs[i] = self.recurrentGradInputs[step]
- self.recurrentOutputs[step] = nil
- self.recurrentGradInputs[step] = nil
- -- we keep rho+1 of these : outputs[k]=outputs[k+rho+1]
- self.outputs[i-1] = self.outputs[step]
- self.outputs[step] = nil
- i = i + 1
- end
-
- end
-
- if lastStep > self.rho then
- local i = 1
- for step = lastStep-self.rho+1,lastStep do
- self.inputs[i] = self.inputs[step]
- self.gradOutputs[i] = self.gradOutputs[step]
- self.inputs[step] = nil
- self.gradOutputs[step] = nil
- self.scales[step] = nil
- i = i + 1
- end
-
- end
- end
-
- -- forget the past inputs; restart from first step
- self.step = 1
-end
-
--- tests whether or not the mlp can be used internally for recursion.
--- forward A, backward A, forward B, forward A should be consistent with
--- forward B, backward B, backward A where A and B each
--- have their own gradInputs/outputs.
-function AbstractRecurrent.isRecursable(mlp, input)
- local output = recursiveCopy(nil, mlp:forward(input)) --forward A
- local gradOutput = recursiveNormal(recursiveCopy(nil, output))
- mlp:zeroGradParameters()
- local gradInput = recursiveCopy(nil, mlp:backward(input, gradOutput)) --backward A
- local params, gradParams = mlp:parameters()
- gradParams = recursiveCopy(nil, gradParams)
-
- -- output/gradInput are the only internal module states that we track
- local recurrentOutputs = {}
- local recurrentGradInputs = {}
-
- local modules = mlp:listModules()
-
- -- save the output/gradInput states of A
- for i,modula in ipairs(modules) do
- recurrentOutputs[i] = modula.output
- recurrentGradInputs[i] = modula.gradInput
- end
- -- set the output/gradInput states for B
- local recurrentOutputs2 = {}
- local recurrentGradInputs2 = {}
- for i,modula in ipairs(modules) do
- modula.output = recursiveResizeAs(recurrentOutputs2[i], modula.output)
- modula.gradInput = recursiveResizeAs(recurrentGradInputs2[i], modula.gradInput)
- end
-
- local input2 = recursiveNormal(recursiveCopy(nil, input))
- local gradOutput2 = recursiveNormal(recursiveCopy(nil, gradOutput))
- local output2 = mlp:forward(input2) --forward B
- mlp:zeroGradParameters()
- local gradInput2 = mlp:backward(input2, gradOutput2) --backward B
-
- -- save the output/gradInput state of B
- for i,modula in ipairs(modules) do
- recurrentOutputs2[i] = modula.output
- recurrentGradInputs2[i] = modula.gradInput
- end
-
- -- set the output/gradInput states for A
- for i,modula in ipairs(modules) do
- modula.output = recursiveResizeAs(recurrentOutputs[i], modula.output)
- modula.gradInput = recursiveResizeAs(recurrentGradInputs[i], modula.gradInput)
- end
-
- mlp:zeroGradParameters()
- local gradInput3 = mlp:backward(input, gradOutput) --forward A
- local gradInputTest = recursiveTensorEq(gradInput, gradInput3)
- local params3, gradParams3 = mlp:parameters()
- local nEq = 0
- for i,gradParam in ipairs(gradParams) do
- nEq = nEq + (recursiveTensorEq(gradParam, gradParams3[i]) and 1 or 0)
- end
- local gradParamsTest = (nEq == #gradParams3)
- mlp:zeroGradParameters()
- return gradParamsTest and gradInputTest, gradParamsTest, gradInputTest
-end
diff --git a/BatchNormalization.lua b/BatchNormalization.lua
deleted file mode 100644
index 90fadfa..0000000
--- a/BatchNormalization.lua
+++ /dev/null
@@ -1,80 +0,0 @@
---Based on: http://arxiv.org/pdf/1502.03167v3
---Usage example:
-------------------------------------
--- model:add(nn.BatchNormalization(3 * 32 * 32))
-------------------------------------
-
-require 'nn'
-require 'cunn'
-local BatchNormalization, parent = torch.class('nn.BatchNormalization', 'nn.Module')
-
-function BatchNormalization:__init(inputSize)
- parent.__init(self)
- self.bias = torch.Tensor(inputSize)
- self.weight = torch.Tensor(inputSize)
- self.gradBias = torch.Tensor(inputSize)
- self.gradWeight = torch.Tensor(inputSize)
-
- self:reset(stdv)
-end
-
-function BatchNormalization:reset(stdv)
- if stdv then
- stdv = stdv * math.sqrt(3)
- else
- stdv = 1./math.sqrt(self.bias:nElement())
- end
-
- self.bias:uniform(-stdv,stdv)
- self.weight:uniform(-stdv,stdv)
-end
-
-function BatchNormalization:updateOutput(input)
- self.output = self.output or input.new()
- self.output:resizeAs(input)
- self.size = input:nElement()
- self.std = torch.std(input) * torch.sqrt((self.size - 1.0) / self.size )
- self.mean = torch.mean(input)
- self.stdcube = torch.pow(self.std,3)
- self.ones = torch.Tensor(self.size):fill(1.0)-- :cuda()
- self.output:copy(input):add(-self.mean):div(self.std)
- self.buffer = self.buffer or input.new()
- self.buffer:resizeAs(self.output):copy(self.output)
- self.output:cmul(self.weight)
- self.output:add(self.bias)
-return self.output
-end
-
-function BatchNormalization:updateGradInput(input, gradOutput)
-
- self.buffer = self.buffer or gradOutput.new()
- self.buffer:resizeAs(gradOutput):copy(gradOutput)
- self.buffer:cmul(self.weight)
- self.dotprod1 = torch.dot(self.ones,self.buffer)
- local der1 = self.ones:clone()
- der1:mul(- self.dotprod1 / self.size/self.std)
- -- x_i - mu
- local der2 = input:clone()
- der2:add(-self.mean)
-
- self.dotprod2 = torch.dot(der2,self.buffer)
- der2:mul(self.dotprod2 / self.size / self.stdcube)
-
- self.gradInput = self.buffer:clone()
-
- self.gradInput:div(self.std)
-
- self.gradInput:add(der1)
- self.gradInput:add(-der2)
- return self.gradInput
-end
-
-function BatchNormalization:accGradParameters(input, gradOutput, scale)
- scale = scale or 1
-
- self.gradBias:add(scale,gradOutput)
- self.gradWeight:addcmul(scale,self.buffer,gradOutput)
-end
-
-
-
diff --git a/LSTM.lua b/LSTM.lua
deleted file mode 100644
index a3541b8..0000000
--- a/LSTM.lua
+++ /dev/null
@@ -1,353 +0,0 @@
-------------------------------------------------------------------------
---[[ LSTM ]]--
--- Long Short Term Memory architecture.
--- Ref. A.: http://arxiv.org/pdf/1303.5778v1 (blueprint for this module)
--- B. http://web.eecs.utk.edu/~itamar/courses/ECE-692/Bobby_paper1.pdf
--- C. https://github.com/wojzaremba/lstm
--- Expects 1D or 2D input.
--- The first input in sequence uses zero value for cell and hidden state
-------------------------------------------------------------------------
-local LSTM, parent = torch.class('nn.LSTM', 'nn.AbstractRecurrent')
-
-function LSTM:__init(inputSize, outputSize, rho)
- parent.__init(self, rho or 999999999999)
- self.inputSize = inputSize
- self.outputSize = outputSize
- -- build the model
- self.recurrentModule = self:buildModel()
- -- make it work with nn.Container
- self.modules[1] = self.recurrentModule
-
- -- for output(0), cell(0) and gradCell(T)
- self.zeroTensor = torch.Tensor()
-
- self.cells = {}
- self.gradCells = {}
-end
-
--------------------------- factory methods -----------------------------
-function LSTM:buildGate()
- -- Note : gate expects an input table : {input, output(t-1), cell(t-1)}
- local gate = nn.Sequential()
- local input2gate = nn.Linear(self.inputSize, self.outputSize)
- local output2gate = nn.Linear(self.outputSize, self.outputSize)
- local cell2gate = nn.CMul(self.outputSize) -- diagonal cell to gate weight matrix
- --output2gate:noBias() --TODO
- local para = nn.ParallelTable()
- para:add(input2gate):add(output2gate):add(cell2gate)
- gate:add(para)
- gate:add(nn.CAddTable())
- gate:add(nn.Sigmoid())
- return gate
-end
-
-function LSTM:buildInputGate()
- self.inputGate = self:buildGate()
- return self.inputGate
-end
-
-function LSTM:buildForgetGate()
- self.forgetGate = self:buildGate()
- return self.forgetGate
-end
-
-function LSTM:buildHidden()
- local hidden = nn.Sequential()
- local input2hidden = nn.Linear(self.inputSize, self.outputSize)
- local output2hidden = nn.Linear(self.outputSize, self.outputSize)
- local para = nn.ParallelTable()
- --output2hidden:noBias()
- para:add(input2hidden):add(output2hidden)
- -- input is {input, output(t-1), cell(t-1)}, but we only need {input, output(t-1)}
- local concat = nn.ConcatTable()
- concat:add(nn.SelectTable(1)):add(nn.SelectTable(2))
- hidden:add(concat)
- hidden:add(para)
- hidden:add(nn.CAddTable())
- self.hiddenLayer = hidden
- return hidden
-end
-
-function LSTM:buildCell()
- -- build
- self.inputGate = self:buildInputGate()
- self.forgetGate = self:buildForgetGate()
- self.hiddenLayer = self:buildHidden()
- -- forget = forgetGate{input, output(t-1), cell(t-1)} * cell(t-1)
- local forget = nn.Sequential()
- local concat = nn.ConcatTable()
- concat:add(self.forgetGate):add(nn.SelectTable(3))
- forget:add(concat)
- forget:add(nn.CMulTable())
- -- input = inputGate{input, output(t-1), cell(t-1)} * hiddenLayer{input, output(t-1), cell(t-1)}
- local input = nn.Sequential()
- local concat2 = nn.ConcatTable()
- concat2:add(self.inputGate):add(self.hiddenLayer)
- input:add(concat2)
- input:add(nn.CMulTable())
- -- cell(t) = forget + input
- local cell = nn.Sequential()
- local concat3 = nn.ConcatTable()
- concat3:add(forget):add(input)
- cell:add(concat3)
- cell:add(nn.CAddTable())
- self.cellLayer = cell
- return cell
-end
-
-function LSTM:buildOutputGate()
- self.outputGate = self:buildGate()
- return self.outputGate
-end
-
--- cell(t) = cellLayer{input, output(t-1), cell(t-1)}
--- output(t) = outputGate{input, output(t-1), cell(t)}*tanh(cell(t))
--- output of Model is table : {output(t), cell(t)}
-function LSTM:buildModel()
- -- build components
- self.cellLayer = self:buildCell()
- self.outputGate = self:buildOutputGate()
- -- assemble
- local concat = nn.ConcatTable()
- local concat2 = nn.ConcatTable()
- concat2:add(nn.SelectTable(1)):add(nn.SelectTable(2))
- concat:add(concat2):add(self.cellLayer)
- local model = nn.Sequential()
- model:add(concat)
- -- output of concat is {{input, output}, cell(t)},
- -- so flatten to {input, output, cell(t)}
- model:add(nn.FlattenTable())
- local cellAct = nn.Sequential()
- cellAct:add(nn.SelectTable(3))
- cellAct:add(nn.Tanh())
- local concat3 = nn.ConcatTable()
- concat3:add(self.outputGate):add(cellAct)
- local output = nn.Sequential()
- output:add(concat3)
- output:add(nn.CMulTable())
- -- we want the model to output : {output(t), cell(t)}
- local concat4 = nn.ConcatTable()
- concat4:add(output):add(nn.SelectTable(3))
- model:add(concat4)
- return model
-end
-
-------------------------- forward backward -----------------------------
-function LSTM:updateOutput(input)
- local prevOutput, prevCell
- if self.step == 1 then
- prevOutput = self.zeroTensor
- prevCell = self.zeroTensor
- if input:dim() == 2 then
- self.zeroTensor:resize(input:size(1), self.outputSize):zero()
- else
- self.zeroTensor:resize(self.outputSize):zero()
- end
- self.outputs[0] = self.zeroTensor
- self.cells[0] = self.zeroTensor
- else
- -- previous output and cell of this module
- prevOutput = self.output
- prevCell = self.cell
- end
-
- -- output(t), cell(t) = lstm{input(t), output(t-1), cell(t-1)}
- local output, cell
- if self.train ~= false then
- -- set/save the output states
- local modules = self.recurrentModule:listModules()
- self:recycle()
- local recurrentOutputs = self.recurrentOutputs[self.step]
- if not recurrentOutputs then
- recurrentOutputs = {}
- self.recurrentOutputs[self.step] = recurrentOutputs
- end
- for i,modula in ipairs(modules) do
- local output_ = self.recursiveResizeAs(recurrentOutputs[i], modula.output)
- modula.output = output_
- end
- -- the actual forward propagation
- output, cell = unpack(self.recurrentModule:updateOutput{input, prevOutput, prevCell})
-
- for i,modula in ipairs(modules) do
- recurrentOutputs[i] = modula.output
- end
- else
- output, cell = unpack(self.recurrentModule:updateOutput{input, prevOutput, prevCell})
- end
-
- if self.train ~= false then
- local input_ = self.inputs[self.step]
- self.inputs[self.step] = self.copyInputs
- and self.recursiveCopy(input_, input)
- or self.recursiveSet(input_, input)
- end
-
- self.outputs[self.step] = output
- self.cells[self.step] = cell
-
- self.output = output
- self.cell = cell
-
- self.step = self.step + 1
- self.gradParametersAccumulated = false
- -- note that we don't return the cell, just the output
- return self.output
-end
-
-function LSTM:backwardThroughTime()
- assert(self.step > 1, "expecting at least one updateOutput")
- self.gradInputs = {}
- local rho = math.min(self.rho, self.step-1)
- local stop = self.step - rho
- if self.fastBackward then
- local gradInput, gradPrevOutput, gradCell
- for step=self.step-1,math.max(stop,1),-1 do
- -- set the output/gradOutput states of current Module
- local modules = self.recurrentModule:listModules()
- local recurrentOutputs = self.recurrentOutputs[step]
- local recurrentGradInputs = self.recurrentGradInputs[step]
- if not recurrentGradInputs then
- recurrentGradInputs = {}
- self.recurrentGradInputs[step] = recurrentGradInputs
- end
-
- for i,modula in ipairs(modules) do
- local output, gradInput = modula.output, modula.gradInput
- assert(gradInput, "missing gradInput")
- local output_ = recurrentOutputs[i]
- assert(output_, "backwardThroughTime should be preceded by updateOutput")
- modula.output = output_
- modula.gradInput = self.recursiveResizeAs(recurrentGradInputs[i], gradInput) --resize, NOT copy
- end
-
- -- backward propagate through this step
- local gradOutput = self.gradOutputs[step]
- if gradPrevOutput then
- self.recursiveAdd(gradOutput, gradPrevOutput)
- end
-
- self.gradCells[step] = gradCell
- local scale = self.scales[step]/rho
-
- local inputTable = {self.inputs[step], self.outputs[step-1], self.cells[step-1]}
- local gradInputTable = self.recurrentModule:backward(inputTable, {gradOutput, gradCell}, scale)
- gradInput, gradPrevOutput, gradCell = unpack(gradInputTable)
- table.insert(self.gradInputs, 1, gradInput)
-
- for i,modula in ipairs(modules) do
- recurrentGradInputs[i] = modula.gradInput
- end
- end
- return gradInput
- else
- local gradInput = self:updateGradInputThroughTime()
- self:accGradParametersThroughTime()
- return gradInput
- end
-end
-
-function LSTM:updateGradInputThroughTime()
- assert(self.step > 1, "expecting at least one updateOutput")
- self.gradInputs = {}
- local gradInput, gradPrevOutput
- local gradCell = self.zeroTensor
- local rho = math.min(self.rho, self.step-1)
- local stop = self.step - rho
- for step=self.step-1,math.max(stop,1),-1 do
- -- set the output/gradOutput states of current Module
- local modules = self.recurrentModule:listModules()
- local recurrentOutputs = self.recurrentOutputs[step]
- local recurrentGradInputs = self.recurrentGradInputs[step]
- if not recurrentGradInputs then
- recurrentGradInputs = {}
- self.recurrentGradInputs[step] = recurrentGradInputs
- end
- for i,modula in ipairs(modules) do
- local output, gradInput = modula.output, modula.gradInput
- local output_ = recurrentOutputs[i]
- assert(output_, "updateGradInputThroughTime should be preceded by updateOutput")
- modula.output = output_
- modula.gradInput = self.recursiveResizeAs(recurrentGradInputs[i], gradInput)
- end
-
- -- backward propagate through this step
- local gradOutput = self.gradOutputs[step]
- if gradPrevOutput then
- self.recursiveAdd(gradOutput, gradPrevOutput)
- end
-
- self.gradCells[step] = gradCell
- local scale = self.scales[step]/rho
- local inputTable = {self.inputs[step], self.outputs[step-1], self.cells[step-1]}
- local gradInputTable = self.recurrentModule:updateGradInput(inputTable, {gradOutput, gradCell}, scale)
- gradInput, gradPrevOutput, gradCell = unpack(gradInputTable)
- table.insert(self.gradInputs, 1, gradInput)
-
- for i,modula in ipairs(modules) do
- recurrentGradInputs[i] = modula.gradInput
- end
- end
-
- return gradInput
-end
-
-function LSTM:accGradParametersThroughTime()
- local rho = math.min(self.rho, self.step-1)
- local stop = self.step - rho
- for step=self.step-1,math.max(stop,1),-1 do
- -- set the output/gradOutput states of current Module
- local modules = self.recurrentModule:listModules()
- local recurrentOutputs = self.recurrentOutputs[step]
- local recurrentGradInputs = self.recurrentGradInputs[step]
-
- for i,modula in ipairs(modules) do
- local output, gradInput = modula.output, modula.gradInput
- local output_ = recurrentOutputs[i]
- local gradInput_ = recurrentGradInputs[i]
- assert(output_, "accGradParametersThroughTime should be preceded by updateOutput")
- assert(gradInput_, "accGradParametersThroughTime should be preceded by updateGradInputThroughTime")
- modula.output = output_
- modula.gradInput = gradInput_
- end
-
- -- backward propagate through this step
- local scale = self.scales[step]/rho
- local inputTable = {self.inputs[step], self.outputs[step-1], self.cells[step-1]}
- local gradOutputTable = {self.gradOutputs[step], self.gradCells[step]}
- self.recurrentModule:accGradParameters(inputTable, gradOutputTable, scale)
- end
-
- self.gradParametersAccumulated = true
- return gradInput
-end
-
-function LSTM:accUpdateGradParametersThroughTime(lr)
- local rho = math.min(self.rho, self.step-1)
- local stop = self.step - rho
- for step=self.step-1,math.max(stop,1),-1 do
- -- set the output/gradOutput states of current Module
- local modules = self.recurrentModule:listModules()
- local recurrentOutputs = self.recurrentOutputs[step]
- local recurrentGradInputs = self.recurrentGradInputs[step]
-
- for i,modula in ipairs(modules) do
- local output, gradInput = modula.output, modula.gradInput
- local output_ = recurrentOutputs[i]
- local gradInput_ = recurrentGradInputs[i]
- assert(output_, "accGradParametersThroughTime should be preceded by updateOutput")
- assert(gradInput_, "accGradParametersThroughTime should be preceded by updateGradInputThroughTime")
- modula.output = output_
- modula.gradInput = gradInput_
- end
-
- -- backward propagate through this step
- local scale = self.scales[step]/rho
- local inputTable = {self.inputs[step], self.outputs[step-1], self.cells[step]}
- local gradOutputTable = {self.gradOutputs[step], self.gradCells[step]}
- self.recurrentModule:accUpdateGradParameters(inputTable, gradOutputTable, lr*scale)
- end
-
- return gradInput
-end
-
diff --git a/Module.lua b/Module.lua
deleted file mode 100644
index 298e2ce..0000000
--- a/Module.lua
+++ /dev/null
@@ -1,25 +0,0 @@
-local Module = nn.Module
-
--- returns a list of modules
-function Module:listModules()
- local function tinsert(to, from)
- if torch.type(from) == 'table' then
- for i=1,#from do
- tinsert(to,from[i])
- end
- else
- table.insert(to,from)
- end
- end
- -- include self first
- local modules = {self}
- if self.modules then
- for i=1,#self.modules do
- local modulas = self.modules[i]:listModules()
- if modulas then
- tinsert(modules,modulas)
- end
- end
- end
- return modules
-end
diff --git a/NarrowLookupTable.lua b/NarrowLookupTable.lua
deleted file mode 100644
index aef4c2c..0000000
--- a/NarrowLookupTable.lua
+++ /dev/null
@@ -1,148 +0,0 @@
-------------------------------------------------------------------------
---[[ NarrowLookupTable ]]--
--- Concatenates embeddings with descending narrowed sizes
--- (ascDelta = true).
--- Useful for language models, where most recent words in context
--- are more useful in predicting next word than older ones.
--- If input is ordered furthest to nearest word, use ascDelta = false.
-------------------------------------------------------------------------
-local NarrowLookupTable, parent = torch.class('nn.NarrowLookupTable', 'nn.LookupTable')
-
-function NarrowLookupTable:__init(deltaSize, nIndex, embedSize, ascDelta)
- nn.Module.__init(self)
- self.deltaSize = deltaSize
- self.deltaSizes = torch.LongTensor()
- self.embedSize = embedSize
- self.ascDelta = (ascDelta == nil) and true or ascDelta
-
- self.weight = torch.Tensor(nIndex, embedSize)
- self.gradWeight = torch.Tensor(nIndex, embedSize):zero()
- self.inputs = {}
-
- self.accUpdate = false
- self.nIndex = 0
-
- self.nBackward = 0
- self:reset()
-end
-
--- this could be overrided in a subclass :
-function NarrowLookupTable:buildSizes(nIndex)
- if self.nIndex == nIndex then
- return
- end
- self.deltaSizes:resize(nIndex)
- local deltaSize = 0
- if self.ascDelta then
- for i=1,self.deltaSizes:size(1),1 do
- self.deltaSizes[i] = deltaSize
- deltaSize = deltaSize + self.deltaSize
- end
- else
- for i=self.deltaSizes:size(1),1,-1 do
- self.deltaSizes[i] = deltaSize
- deltaSize = deltaSize + self.deltaSize
- end
- end
- self.outputSize = nIndex*self.embedSize - self.deltaSizes:sum()
- self.nIndex = nIndex
-end
-
-function NarrowLookupTable:updateOutput(input)
- if input:dim() == 1 then
- local nIndex = input:size(1)
- self:buildSizes(nIndex)
- self.output:resize(self.outputSize)
- local embedIdx = 1
- for i=1,nIndex do
- local embedSize = self.embedSize - self.deltaSizes[i]
- local embed = self.weight[input[i]]:narrow(1, 1, embedSize)
- self.output:narrow(1, embedIdx, embedSize):copy(embed)
- embedIdx = embedIdx + embedSize
- end
- elseif input:dim() == 2 then
- local nExample = input:size(1)
- local nIndex = input:size(2)
- self:buildSizes(nIndex)
- self.output:resize(nExample, self.outputSize)
- for i=1,nExample do
- local output = self.output:select(1, i)
- local input = input:select(1, i)
- local embedIdx = 1
- for j=1,nIndex do
- local embedSize = self.embedSize - self.deltaSizes[j]
- local embed = self.weight[input[j]]:narrow(1, 1, embedSize)
- output:narrow(1, embedIdx, embedSize):copy(embed)
- embedIdx = embedIdx + embedSize
- end
- end
- end
-
- return self.output
-end
-
-function NarrowLookupTable:accGradParameters(input, gradOutput, scale)
- scale = scale or 1
- if input:dim() == 1 then
- self.nBackward = self.nBackward + 1
- local embedIdx = 1
- for i=1,input:size(1) do
- local k = input[i]
- self.inputs[k] = (self.inputs[k] or 0) + 1
- local embedSize = self.embedSize - self.deltaSizes[i]
- local gradEmbed = gradOutput:narrow(1, embedIdx, embedSize)
- self.gradWeight[input[i]]:narrow(1, 1, embedSize):add(gradEmbed)
- embedIdx = embedIdx + embedSize
- end
- elseif input:dim() == 2 then
- self.nBackward = self.nBackward + input:size(1)
- for i=1,input:size(1) do
- local input = input:select(1, i)
- local gradOutput = gradOutput:select(1, i)
- local embedIdx = 1
- for j=1,input:size(1) do
- local k = input[j]
- self.inputs[k] = (self.inputs[k] or 0) + 1
- local embedSize = self.embedSize - self.deltaSizes[j]
- local gradEmbed = gradOutput:narrow(1, embedIdx, embedSize)
- self.gradWeight[input[j]]:narrow(1, 1, embedSize):add(gradEmbed)
- embedIdx = embedIdx + embedSize
- end
- end
- end
-end
-
-function NarrowLookupTable:accUpdateGradParameters(input, gradOutput, lr)
- if input:dim() == 1 then
- local embedIdx = 1
- for i=1,input:size(1) do
- local k = input[j]
- local kscale = self:scaleUpdateByKey(k)
- local embedSize = self.embedSize - self.deltaSizes[i]
- local gradEmbed = gradOutput:narrow(1, embedIdx, embedSize)
- self.weight[input[i]]:narrow(1, 1, embedSize):add(-lr*kscale, gradEmbed)
- embedIdx = embedIdx + embedSize
- end
- elseif input:dim() == 2 then
- for i=1,input:size(1) do
- local input = input:select(1, i)
- local gradOutput = gradOutput:select(1, i)
- local embedIdx = 1
- for j=1,input:size(1) do
- local k = input[j]
- local kscale = self:scaleUpdateByKey(k)
- local embedSize = self.embedSize - self.deltaSizes[j]
- local gradEmbed = gradOutput:narrow(1, embedIdx, embedSize)
- self.weight[input[j]]:narrow(1, 1, embedSize):add(-lr*kscale, gradEmbed)
- embedIdx = embedIdx + embedSize
- end
- end
- end
-end
-
-function NarrowLookupTable:type(type)
- self.gradInput = self.gradInput:type(type)
- self.output = self.output:type(type)
- self.weight = self.weight:type(type)
- self.gradWeight = self.gradWeight:type(type)
-end
diff --git a/Recurrent.lua b/Recurrent.lua
deleted file mode 100644
index eb5c61d..0000000
--- a/Recurrent.lua
+++ /dev/null
@@ -1,405 +0,0 @@
-------------------------------------------------------------------------
---[[ Recurrent ]]--
--- Ref. A.: http://goo.gl/vtVGkO (Mikolov et al.)
--- B. http://goo.gl/hu1Lqm
--- Processes the sequence one timestep (forward/backward) at a time.
--- A call to backward only keeps a log of the gradOutputs and scales.
--- Back-Propagation Through Time (BPTT) is done when updateParameters
--- is called. The Module keeps a list of all previous representations
--- (Module.outputs), including intermediate ones for BPTT.
--- To use this module with batches, we suggest using different
--- sequences of the same size within a batch and calling
--- updateParameters() at the end of the Sequence.
--- Note that this won't work with modules that use more than the
--- output attribute to keep track of their internal state between
--- forward and backward.
-------------------------------------------------------------------------
-local Recurrent, parent = torch.class('nn.Recurrent', 'nn.AbstractRecurrent')
-
-function Recurrent:__init(start, input, feedback, transfer, rho, merge)
- parent.__init(self, rho or 5)
-
- local ts = torch.type(start)
- if ts == 'torch.LongTensor' or ts == 'number' then
- start = nn.Add(start)
- end
-
- self.startModule = start
- self.inputModule = input
- self.feedbackModule = feedback
- self.transferModule = transfer or nn.Sigmoid()
- self.mergeModule = merge or nn.CAddTable()
-
- self.modules = {self.startModule, self.inputModule, self.feedbackModule, self.transferModule, self.mergeModule}
-
- self:buildInitialModule()
- self:buildRecurrentModule()
-
- self.initialOutputs = {}
- self.initialGradInputs = {}
-end
-
--- build module used for the first step (steps == 1)
-function Recurrent:buildInitialModule()
- self.initialModule = nn.Sequential()
- self.initialModule:add(self.inputModule)
- self.initialModule:add(self.startModule)
- self.initialModule:add(self.transferModule)
-end
-
--- build module used for the other steps (steps > 1)
-function Recurrent:buildRecurrentModule()
- local parallelModule = nn.ParallelTable()
- parallelModule:add(self.inputModule)
- parallelModule:add(self.feedbackModule)
- self.recurrentModule = nn.Sequential()
- self.recurrentModule:add(parallelModule)
- self.recurrentModule:add(self.mergeModule)
- self.recurrentModule:add(self.transferModule)
-end
-
-function Recurrent:updateOutput(input)
- -- output(t) = transfer(feedback(output_(t-1)) + input(input_(t)))
- local output
- if self.step == 1 then
- -- set/save the output states
- local modules = self.initialModule:listModules()
- for i,modula in ipairs(modules) do
- local output_ = self.recursiveResizeAs(self.initialOutputs[i], modula.output)
- modula.output = output_
- end
- output = self.initialModule:updateOutput(input)
- for i,modula in ipairs(modules) do
- self.initialOutputs[i] = modula.output
- end
- else
- if self.train ~= false then
- -- set/save the output states
- local modules = self.recurrentModule:listModules()
- self:recycle()
- local recurrentOutputs = self.recurrentOutputs[self.step]
- if not recurrentOutputs then
- recurrentOutputs = {}
- self.recurrentOutputs[self.step] = recurrentOutputs
- end
- for i,modula in ipairs(modules) do
- local output_ = self.recursiveResizeAs(recurrentOutputs[i], modula.output)
- modula.output = output_
- end
- -- self.output is the previous output of this module
- output = self.recurrentModule:updateOutput{input, self.output}
- for i,modula in ipairs(modules) do
- recurrentOutputs[i] = modula.output
- end
- else
- -- self.output is the previous output of this module
- output = self.recurrentModule:updateOutput{input, self.output}
- end
- end
-
- if self.train ~= false then
- local input_ = self.inputs[self.step]
- self.inputs[self.step] = self.copyInputs
- and self.recursiveCopy(input_, input)
- or self.recursiveSet(input_, input)
- end
-
- self.outputs[self.step] = output
- self.output = output
- self.step = self.step + 1
- self.gradParametersAccumulated = false
- return self.output
-end
-
--- not to be confused with the hit movie Back to the Future
-function Recurrent:backwardThroughTime()
- assert(self.step > 1, "expecting at least one updateOutput")
- local rho = math.min(self.rho, self.step-1)
- local stop = self.step - rho
- if self.fastBackward then
- self.gradInputs = {}
- local gradInput, gradPrevOutput
- for step=self.step-1,math.max(stop, 2),-1 do
- -- set the output/gradOutput states of current Module
- local modules = self.recurrentModule:listModules()
- local recurrentOutputs = self.recurrentOutputs[step]
- local recurrentGradInputs = self.recurrentGradInputs[step]
- if not recurrentGradInputs then
- recurrentGradInputs = {}
- self.recurrentGradInputs[step] = recurrentGradInputs
- end
- for i,modula in ipairs(modules) do
- local output, gradInput = modula.output, modula.gradInput
- assert(gradInput, "missing gradInput")
- local output_ = recurrentOutputs[i]
- assert(output_, "backwardThroughTime should be preceded by updateOutput")
- modula.output = output_
- modula.gradInput = self.recursiveResizeAs(recurrentGradInputs[i], gradInput)
- end
-
- -- backward propagate through this step
- local input = self.inputs[step]
- local output = self.outputs[step-1]
- local gradOutput = self.gradOutputs[step]
- if gradPrevOutput then
- self.recursiveAdd(gradOutput, gradPrevOutput)
- end
- local scale = self.scales[step]
-
- gradInput, gradPrevOutput = unpack(self.recurrentModule:backward({input, output}, gradOutput, scale/rho))
- table.insert(self.gradInputs, 1, gradInput)
-
- for i,modula in ipairs(modules) do
- recurrentGradInputs[i] = modula.gradInput
- end
- end
-
- if stop <= 1 then
- -- set the output/gradOutput states of initialModule
- local modules = self.initialModule:listModules()
- for i,modula in ipairs(modules) do
- modula.output = self.initialOutputs[i]
- modula.gradInput = self.recursiveResizeAs(self.initialGradInputs[i], modula.gradInput)
- end
-
- -- backward propagate through first step
- local input = self.inputs[1]
- local gradOutput = self.gradOutputs[1]
- if gradPrevOutput then
- self.recursiveAdd(gradOutput, gradPrevOutput)
- end
- local scale = self.scales[1]
- gradInput = self.initialModule:backward(input, gradOutput, scale/rho)
- table.insert(self.gradInputs, 1, gradInput)
-
- for i,modula in ipairs(modules) do
- self.initialGradInputs[i] = modula.gradInput
- end
-
- -- startModule's gradParams shouldn't be step-averaged
- -- as it is used only once. So un-step-average it
- local params, gradParams = self.startModule:parameters()
- if gradParams then
- for i,gradParam in ipairs(gradParams) do
- gradParam:mul(rho)
- end
- end
-
- self.gradParametersAccumulated = true
- return gradInput
- end
- else
- local gradInput = self:updateGradInputThroughTime()
- self:accGradParametersThroughTime()
- return gradInput
- end
-end
-
-function Recurrent:updateGradInputThroughTime()
- assert(self.step > 1, "expecting at least one updateOutput")
- self.gradInputs = {}
- local gradInput, gradPrevOutput
- local rho = math.min(self.rho, self.step-1)
- local stop = self.step - rho
- for step=self.step-1,math.max(stop,2),-1 do
- -- set the output/gradOutput states of current Module
- local modules = self.recurrentModule:listModules()
- local recurrentOutputs = self.recurrentOutputs[step]
- local recurrentGradInputs = self.recurrentGradInputs[step]
- if not recurrentGradInputs then
- recurrentGradInputs = {}
- self.recurrentGradInputs[step] = recurrentGradInputs
- end
- for i,modula in ipairs(modules) do
- local output, gradInput = modula.output, modula.gradInput
- local output_ = recurrentOutputs[i]
- assert(output_, "updateGradInputThroughTime should be preceded by updateOutput")
- modula.output = output_
- modula.gradInput = self.recursiveResizeAs(recurrentGradInputs[i], gradInput)
- end
-
- -- backward propagate through this step
- local input = self.inputs[step]
- local output = self.outputs[step-1]
- local gradOutput = self.gradOutputs[step]
- if gradPrevOutput then
- self.recursiveAdd(gradOutput, gradPrevOutput)
- end
-
- gradInput, gradPrevOutput = unpack(self.recurrentModule:updateGradInput({input, output}, gradOutput))
- table.insert(self.gradInputs, 1, gradInput)
-
- for i,modula in ipairs(modules) do
- recurrentGradInputs[i] = modula.gradInput
- end
- end
-
- if stop <= 1 then
- -- set the output/gradOutput states of initialModule
- local modules = self.initialModule:listModules()
- for i,modula in ipairs(modules) do
- modula.output = self.initialOutputs[i]
- modula.gradInput = self.recursiveResizeAs(self.initialGradInputs[i], modula.gradInput)
- end
-
- -- backward propagate through first step
- local input = self.inputs[1]
- local gradOutput = self.gradOutputs[1]
- if gradPrevOutput then
- self.recursiveAdd(gradOutput, gradPrevOutput)
- end
- gradInput = self.initialModule:updateGradInput(input, gradOutput)
- table.insert(self.gradInputs, 1, gradInput)
-
- for i,modula in ipairs(modules) do
- self.initialGradInputs[i] = modula.gradInput
- end
- end
-
- return gradInput
-end
-
-function Recurrent:accGradParametersThroughTime()
- local rho = math.min(self.rho, self.step-1)
- local stop = self.step - rho
- for step=self.step-1,math.max(stop,2),-1 do
- -- set the output/gradOutput states of current Module
- local modules = self.recurrentModule:listModules()
- local recurrentOutputs = self.recurrentOutputs[step]
- local recurrentGradInputs = self.recurrentGradInputs[step]
-
- for i,modula in ipairs(modules) do
- local output, gradInput = modula.output, modula.gradInput
- local output_ = recurrentOutputs[i]
- local gradInput_ = recurrentGradInputs[i]
- assert(output_, "accGradParametersThroughTime should be preceded by updateOutput")
- assert(gradInput_, "accGradParametersThroughTime should be preceded by updateGradInputThroughTime")
- modula.output = output_
- modula.gradInput = gradInput_
- end
-
- -- backward propagate through this step
- local input = self.inputs[step]
- local output = self.outputs[step-1]
- local gradOutput = self.gradOutputs[step]
-
- local scale = self.scales[step]
- self.recurrentModule:accGradParameters({input, output}, gradOutput, scale/rho)
- end
-
- if stop <= 1 then
- -- set the output/gradOutput states of initialModule
- local modules = self.initialModule:listModules()
- for i,modula in ipairs(modules) do
- local output, gradInput = modula.output, modula.gradInput
- local output_ = self.initialOutputs[i]
- local gradInput_ = self.initialGradInputs[i]
- modula.output = output_
- modula.gradInput = gradInput_
- end
-
- -- backward propagate through first step
- local input = self.inputs[1]
- local gradOutput = self.gradOutputs[1]
- local scale = self.scales[1]
- self.initialModule:accGradParameters(input, gradOutput, scale/rho)
-
- -- startModule's gradParams shouldn't be step-averaged
- -- as it is used only once. So un-step-average it
- local params, gradParams = self.startModule:parameters()
- if gradParams then
- for i,gradParam in ipairs(gradParams) do
- gradParam:mul(rho)
- end
- end
- end
-
- self.gradParametersAccumulated = true
- return gradInput
-end
-
-function Recurrent:accUpdateGradParametersThroughTime(lr)
- local rho = math.min(self.rho, self.step-1)
- local stop = self.step - rho
- for step=self.step-1,math.max(stop,2),-1 do
- -- set the output/gradOutput states of current Module
- local modules = self.recurrentModule:listModules()
- local recurrentOutputs = self.recurrentOutputs[step]
- local recurrentGradInputs = self.recurrentGradInputs[step]
-
- for i,modula in ipairs(modules) do
- local output, gradInput = modula.output, modula.gradInput
- local output_ = recurrentOutputs[i]
- local gradInput_ = recurrentGradInputs[i]
- assert(output_, "accGradParametersThroughTime should be preceded by updateOutput")
- assert(gradInput_, "accGradParametersThroughTime should be preceded by updateGradInputThroughTime")
- modula.output = output_
- modula.gradInput = gradInput_
- end
-
- -- backward propagate through this step
- local input = self.inputs[step]
- local output = self.outputs[step-1]
- local gradOutput = self.gradOutputs[step]
-
- local scale = self.scales[step]
- self.recurrentModule:accUpdateGradParameters({input, output}, gradOutput, lr*scale/rho)
- end
-
- if stop <= 1 then
- -- set the output/gradOutput states of initialModule
- local modules = self.initialModule:listModules()
- for i,modula in ipairs(modules) do
- local output, gradInput = modula.output, modula.gradInput
- local output_ = self.initialOutputs[i]
- local gradInput_ = self.initialGradInputs[i]
- modula.output = output_
- modula.gradInput = gradInput_
- end
-
- -- backward propagate through first step
- local input = self.inputs[1]
- local gradOutput = self.gradOutputs[1]
- local scale = self.scales[1]
- self.inputModule:accUpdateGradParameters(input, self.startModule.gradInput, lr*scale/rho)
- -- startModule's gradParams shouldn't be step-averaged as it is used only once.
- self.startModule:accUpdateGradParameters(self.inputModule.output, self.transferModule.gradInput, lr*scale)
- end
-
- return gradInput
-end
-
-function Recurrent:forget()
- parent.forget(self, 1)
-end
-
-function Recurrent:__tostring__()
- local tab = ' '
- local line = '\n'
- local next = ' -> '
- local str = torch.type(self)
- str = str .. ' {' .. line .. tab .. '[{input(t), output(t-1)}'
- for i=1,3 do
- str = str .. next .. '(' .. i .. ')'
- end
- str = str .. next .. 'output(t)]'
-
- local tab = ' '
- local line = '\n '
- local next = ' |`-> '
- local ext = ' | '
- local last = ' ... -> '
- str = str .. line .. '(1): ' .. ' {' .. line .. tab .. 'input(t)'
- str = str .. line .. tab .. next .. '(t==0): ' .. tostring(self.startModule):gsub('\n', '\n' .. tab .. ext)
- str = str .. line .. tab .. next .. '(t~=0): ' .. tostring(self.inputModule):gsub('\n', '\n' .. tab .. ext)
- str = str .. line .. tab .. 'output(t-1)'
- str = str .. line .. tab .. next .. tostring(self.feedbackModule):gsub('\n', line .. tab .. ext)
- str = str .. line .. "}"
- local tab = ' '
- local line = '\n'
- local next = ' -> '
- str = str .. line .. tab .. '(' .. 2 .. '): ' .. tostring(self.mergeModule):gsub(line, line .. tab)
- str = str .. line .. tab .. '(' .. 3 .. '): ' .. tostring(self.transferModule):gsub(line, line .. tab)
- str = str .. line .. '}'
- return str
-end
diff --git a/Repeater.lua b/Repeater.lua
deleted file mode 100644
index 68ea41b..0000000
--- a/Repeater.lua
+++ /dev/null
@@ -1,87 +0,0 @@
-------------------------------------------------------------------------
---[[ Repeater ]]--
--- Encapsulates an AbstractRecurrent instance (rnn) which is repeatedly
--- presented with the same input for nStep time steps.
--- The output is a table of nStep outputs of the rnn.
-------------------------------------------------------------------------
-local Repeater, parent = torch.class("nn.Repeater", "nn.Container")
-
-function Repeater:__init(rnn, nStep)
- parent.__init(self)
- assert(torch.type(nStep) == 'number', "expecting number value for arg 2")
- self.nStep = nStep
- self.rnn = rnn
- assert(rnn.backwardThroughTime, "expecting AbstractRecurrent instance for arg 1")
- self.modules[1] = rnn
- self.output = {}
-end
-
-function Repeater:updateOutput(input)
- self.rnn:forget()
- for step=1,self.nStep do
- self.output[step] = self.rnn:updateOutput(input)
- end
- return self.output
-end
-
-local recursiveAdd = nn.AbstractRecurrent.recursiveAdd
-local recursiveCopy = nn.AbstractRecurrent.recursiveCopy
-
-function Repeater:updateGradInput(input, gradOutput)
- assert(self.rnn.step - 1 == self.nStep, "inconsistent rnn steps")
- assert(torch.type(gradOutput) == 'table', "expecting gradOutput table")
- assert(#gradOutput == self.nStep, "gradOutput should have nStep elements")
- for step=1,self.nStep do
- self.rnn.step = step + 1
- self.rnn:updateGradInput(input, gradOutput[step])
- end
- -- back-propagate through time (BPTT)
- self.rnn:updateGradInputThroughTime()
-
- for i,currentGradInput in ipairs(self.rnn.gradInputs) do
- if i == 1 then
- self.gradInput = recursiveCopy(self.gradInput, currentGradInput)
- else
- recursiveAdd(self.gradInput, currentGradInput)
- end
- end
-
- return self.gradInput
-end
-
-function Repeater:accGradParameters(input, gradOutput, scale)
- assert(self.rnn.step - 1 == self.nStep, "inconsistent rnn steps")
- assert(torch.type(gradOutput) == 'table', "expecting gradOutput table")
- assert(#gradOutput == self.nStep, "gradOutput should have nStep elements")
- for step=1,self.nStep do
- self.rnn.step = step + 1
- self.rnn:accGradParameters(input, gradOutput[step], scale)
- end
- -- back-propagate through time (BPTT)
- self.rnn:accGradParametersThroughTime()
-end
-
-function Repeater:accUpdateGradParameters(input, gradOutput, lr)
- assert(self.rnn.step - 1 == self.nStep, "inconsistent rnn steps")
- assert(torch.type(gradOutput) == 'table', "expecting gradOutput table")
- assert(#gradOutput == self.nStep, "gradOutput should have nStep elements")
- for step=1,self.nStep do
- self.rnn.step = step + 1
- self.rnn:accGradParameters(input, gradOutput[step], 1)
- end
- -- back-propagate through time (BPTT)
- self.rnn:accUpdateGradParametersThroughTime(lr)
-end
-
-function Repeater:__tostring__()
- local tab = ' '
- local line = '\n'
- local str = torch.type(self) .. ' {' .. line
- str = str .. tab .. '[ input, input, ..., input ]'.. line
- str = str .. tab .. ' V V V '.. line
- str = str .. tab .. tostring(self.modules[1]):gsub(line, line .. tab) .. line
- str = str .. tab .. ' V V V '.. line
- str = str .. tab .. '[output(1),output(2),...,output('..self.nStep..')]' .. line
- str = str .. '}'
- return str
-end
diff --git a/RepeaterCriterion.lua b/RepeaterCriterion.lua
deleted file mode 100644
index a6ad078..0000000
--- a/RepeaterCriterion.lua
+++ /dev/null
@@ -1,49 +0,0 @@
-------------------------------------------------------------------------
---[[ RepeaterCriterion ]]--
--- Applies a criterion to each of the inputs in a Table using the
--- same target (the target is repeated).
--- Useful for nn.Repeater and nn.Sequencer.
-------------------------------------------------------------------------
-local RepeaterCriterion, parent = torch.class("nn.RepeaterCriterion", "nn.Criterion")
-
-function RepeaterCriterion:__init(criterion)
- parent.__init(self)
- self.criterion = criterion
- self.gradInput = {}
-end
-
-function RepeaterCriterion:forward(inputTable, target)
- self.output = 0
- for i,input in ipairs(inputTable) do
- self.output = self.output + self.criterion:forward(input, target)
- end
- return self.output
-end
-
-function RepeaterCriterion:backward(inputTable, target)
- for i,input in ipairs(inputTable) do
- local gradInput = self.criterion:backward(input, target)
- self.gradInput[i] = self.gradInput[i] or gradInput.new()
- self.gradInput[i]:resizeAs(gradInput):copy(gradInput)
- end
- return self.gradInput
-end
-
-local function recursiveType(param, type_str)
- if torch.type(param) == 'table' then
- for i = 1, #param do
- param[i] = recursiveType(param[i], type_str)
- end
- else
- if torch.typename(param) and
- torch.typename(param):find('torch%..+Tensor') then
- param = param:type(type_str)
- end
- end
- return param
-end
-
-function RepeaterCriterion:type(type)
- self.gradInput = recursiveType(self.gradInput)
- return self.criterion:type(type)
-end
diff --git a/Sequencer.lua b/Sequencer.lua
deleted file mode 100644
index 97f9d6c..0000000
--- a/Sequencer.lua
+++ /dev/null
@@ -1,197 +0,0 @@
-------------------------------------------------------------------------
---[[ Sequencer ]]--
--- Encapsulates a Module.
--- Input is a sequence (a table) of tensors.
--- Output is a sequence (a table) of tensors of the same length.
--- Applies the module to each element in the sequence.
--- Handles both recurrent modules and non-recurrent modules.
--- The sequences in a batch must have the same size.
--- But the sequence length of each batch can vary.
-------------------------------------------------------------------------
-local Sequencer, parent = torch.class("nn.Sequencer", "nn.Container")
-
-function Sequencer:__init(module)
- parent.__init(self)
- self.module = module
- self.isRecurrent = module.backwardThroughTime ~= nil
- self.modules[1] = module
- self.sequenceOutputs = {}
- self.output = {}
- self.step = 1
-end
-
-local function recursiveResizeAs(t1,t2)
- if torch.type(t2) == 'table' then
- t1 = (torch.type(t1) == 'table') and t1 or {t1}
- for key,_ in pairs(t2) do
- t1[key], t2[key] = recursiveResizeAs(t1[key], t2[key])
- end
- elseif torch.isTensor(t2) then
- t1 = t1 or t2.new()
- t1:resizeAs(t2)
- else
- error("expecting nested tensors or tables. Got "..
- torch.type(t1).." and "..torch.type(t2).." instead")
- end
- return t1, t2
-end
-
-
-function Sequencer:updateOutput(inputTable)
- assert(torch.type(inputTable) == 'table', "expecting input table")
- self.output = {}
- if self.isRecurrent then
- self.module:forget()
- for step, input in ipairs(inputTable) do
- self.output[step] = self.module:updateOutput(input)
- end
- else
- for step, input in ipairs(inputTable) do
- -- set output states for this step
- local modules = self.module:listModules()
- local sequenceOutputs = self.sequenceOutputs[step]
- if not sequenceOutputs then
- sequenceOutputs = {}
- self.sequenceOutputs[step] = sequenceOutputs
- end
- for i,modula in ipairs(modules) do
- local output_ = recursiveResizeAs(sequenceOutputs[i], modula.output)
- modula.output = output_
- end
-
- -- forward propagate this step
- self.output[step] = self.module:updateOutput(input)
-
- -- save output state of this step
- for i,modula in ipairs(modules) do
- sequenceOutputs[i] = modula.output
- end
- end
- end
- return self.output
-end
-
-function Sequencer:updateGradInput(inputTable, gradOutputTable)
- self.gradInput = {}
- if self.isRecurrent then
- assert(torch.type(gradOutputTable) == 'table', "expecting gradOutput table")
- assert(#gradOutputTable == #inputTable, "gradOutput should have as many elements as input")
- for step, input in ipairs(inputTable) do
- self.module.step = step + 1
- self.module:updateGradInput(input, gradOutputTable[step])
- end
- -- back-propagate through time (BPTT)
- self.module:updateGradInputThroughTime()
- assert(self.module.gradInputs, "recurrent module did not fill gradInputs")
- for step=1,#inputTable do
- self.gradInput[step] = self.module.gradInputs[step]
- end
- assert(#self.gradInput == #inputTable, "missing gradInputs")
- else
- for step, input in ipairs(inputTable) do
- -- set the output/gradOutput states for this step
- local modules = self.module:listModules()
- local sequenceOutputs = self.sequenceOutputs[step]
- local sequenceGradInputs = self.sequenceGradInputs[step]
- if not sequenceGradInputs then
- sequenceGradInputs = {}
- self.sequenceGradInputs[step] = sequenceGradInputs
- end
- for i,modula in ipairs(modules) do
- local output, gradInput = modula.output, modula.gradInput
- local output_ = sequenceOutputs[i]
- assert(output_, "updateGradInputThroughTime should be preceded by updateOutput")
- modula.output = output_
- modula.gradInput = recursiveResizeAs(sequenceGradInputs[i], gradInput)
- end
-
- -- backward propagate this step
- self.gradInput[step] = self.module:updateGradInput(input, gradOutputTable[step])
-
- -- save the output/gradOutput states of this step
- for i,modula in ipairs(modules) do
- sequenceGradInputs[i] = modula.gradInput
- end
- end
- end
- return self.gradInput
-end
-
-function Sequencer:accGradParameters(inputTable, gradOutputTable, scale)
- if self.isRecurrent then
- assert(torch.type(gradOutputTable) == 'table', "expecting gradOutput table")
- assert(#gradOutputTable == #inputTable, "gradOutput should have as many elements as input")
- for step, input in ipairs(inputTable) do
- self.module.step = step + 1
- self.module:accGradParameters(input, gradOutputTable[step], scale)
- end
- -- back-propagate through time (BPTT)
- self.module:accGradParametersThroughTime()
- else
- for step, input in ipairs(inputTable) do
- -- set the output/gradOutput states for this step
- local modules = self.module:listModules()
- local sequenceOutputs = self.sequenceOutputs[step]
- local sequenceGradInputs = self.sequenceGradInputs[step]
- if not sequenceGradInputs then
- sequenceGradInputs = {}
- self.sequenceGradInputs[step] = sequenceGradInputs
- end
- for i,modula in ipairs(modules) do
- local output, gradInput = modula.output, modula.gradInput
- local output_ = sequenceOutputs[i]
- modula.output = output_
- modula.gradInput = recursiveResizeAs(sequenceGradInputs[i], gradInput)
- end
-
- -- accumulate parameters for this step
- self.module:accGradParameters(input, gradOutputTable[step], scale)
- end
- end
-end
-
-function Sequencer:accUpdateGradParameters(input, gradOutput, lr)
- if self.isRecurrent then
- assert(torch.type(gradOutputTable) == 'table', "expecting gradOutput table")
- assert(#gradOutputTable == #inputTable, "gradOutput should have as many elements as input")
- for step, input in ipairs(inputTable) do
- self.module.step = step + 1
- self.module:accGradParameters(input, gradOutputTable[step], 1)
- end
- -- back-propagate through time (BPTT)
- self.module:accUpdateGradParametersThroughTime(lr)
- else
- for step, input in ipairs(inputTable) do
- -- set the output/gradOutput states for this step
- local modules = self.module:listModules()
- local sequenceOutputs = self.sequenceOutputs[step]
- local sequenceGradInputs = self.sequenceGradInputs[step]
- if not sequenceGradInputs then
- sequenceGradInputs = {}
- self.sequenceGradInputs[step] = sequenceGradInputs
- end
- for i,modula in ipairs(modules) do
- local output, gradInput = modula.output, modula.gradInput
- local output_ = sequenceOutputs[i]
- modula.output = output_
- modula.gradInput = recursiveResizeAs(sequenceGradInputs[i], gradInput)
- end
-
- -- accumulate parameters for this step
- self.module:accUpdateGradParameters(input, gradOutputTable[step], lr)
- end
- end
-end
-
-function Sequencer:__tostring__()
- local tab = ' '
- local line = '\n'
- local str = torch.type(self) .. ' {' .. line
- str = str .. tab .. '[input(1), input(2), ..., input(T)]'.. line
- str = str .. tab .. ' V V V '.. line
- str = str .. tab .. tostring(self.modules[1]):gsub(line, line .. tab) .. line
- str = str .. tab .. ' V V V '.. line
- str = str .. tab .. '[output(1),output(2),...,output(T)]' .. line
- str = str .. '}'
- return str
-end
diff --git a/init.lua b/init.lua
index a566457..5c8d1b9 100644
--- a/init.lua
+++ b/init.lua
@@ -39,9 +39,6 @@ require 'libnnx'
torch.include('nnx', 'test-all.lua')
torch.include('nnx', 'test-omp.lua')
--- extensions of nn modules
-torch.include('nnx', 'Module.lua')
-
-- tools:
torch.include('nnx', 'Probe.lua')
torch.include('nnx', 'Tic.lua')
@@ -74,24 +71,15 @@ torch.include('nnx', 'Minus.lua')
torch.include('nnx', 'SoftMaxTree.lua')
torch.include('nnx', 'MultiSoftMax.lua')
torch.include('nnx', 'Balance.lua')
-torch.include('nnx', 'NarrowLookupTable.lua')
torch.include('nnx', 'PushTable.lua')
torch.include('nnx', 'PullTable.lua')
torch.include('nnx', 'ZeroGrad.lua')
--- recurrent
-torch.include('nnx', 'AbstractRecurrent.lua')
-torch.include('nnx', 'Recurrent.lua')
-torch.include('nnx', 'LSTM.lua')
-torch.include('nnx', 'Repeater.lua')
-torch.include('nnx', 'Sequencer.lua')
-
-- criterions:
torch.include('nnx', 'SuperCriterion.lua')
torch.include('nnx', 'DistNLLCriterion.lua')
torch.include('nnx', 'DistMarginCriterion.lua')
torch.include('nnx', 'TreeNLLCriterion.lua')
-torch.include('nnx', 'RepeaterCriterion.lua')
-- datasets:
torch.include('nnx', 'DataSet.lua')
diff --git a/test/test-all.lua b/test/test-all.lua
index 0b27f2a..2ef1977 100644
--- a/test/test-all.lua
+++ b/test/test-all.lua
@@ -286,482 +286,6 @@ function nnxtest.SpatialConvolution()
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
end
-function nnxtest.Module_listModules()
- local batchSize = 4
- local inputSize, outputSize = 7, 6
- local linear = nn.Linear(inputSize, outputSize)
- local tanh = nn.Tanh()
- local reshape = nn.Reshape(outputSize/2, 2)
- local mlp3 = nn.Sequential()
- mlp3:add(linear)
- mlp3:add(tanh)
- mlp3:add(reshape)
-
- local mlp2 = nn.Sequential()
- local view = nn.View(outputSize)
- local linear2 = nn.Linear(outputSize, inputSize)
- local tanh2 = nn.Tanh()
- mlp2:add(mlp3)
- mlp2:add(view)
- mlp2:add(linear2)
- mlp2:add(tanh2)
-
- local concat = nn.ConcatTable()
- local id = nn.Identity()
- concat:add(mlp2)
- concat:add(id)
- local mlp = nn.Sequential()
- local add = nn.CAddTable()
- mlp:add(concat)
- mlp:add(add)
-
- local modules2 = {mlp, concat, mlp2, mlp3, linear, tanh, reshape, view, linear2, tanh2, id, add}
- local modules = mlp:listModules()
-
- mytester:assert(#modules2 == #modules, 'missing modules error')
-
- for i,module in ipairs(modules) do
- mytester:assert(torch.type(module) == torch.type(modules2[i]), 'module error')
- end
-
-end
-
-function nnxtest.Recurrent()
- local batchSize = 4
- local inputSize = 10
- local hiddenSize = 12
- local outputSize = 7
- local nSteps = 5
- local inputModule = nn.Linear(inputSize, outputSize)
- local transferModule = nn.Sigmoid()
- -- test MLP feedback Module (because of Module:representations())
- local feedbackModule = nn.Sequential()
- feedbackModule:add(nn.Linear(outputSize, hiddenSize))
- feedbackModule:add(nn.Sigmoid())
- feedbackModule:add(nn.Linear(hiddenSize, outputSize))
- -- rho = nSteps
- local mlp = nn.Recurrent(outputSize, inputModule, feedbackModule, transferModule:clone(), nSteps)
-
- -- test that the internal mlps are recursable :
- local isRecursable = nn.AbstractRecurrent.isRecursable
- mytester:assert(isRecursable(mlp.initialModule, torch.randn(inputSize)), "Recurrent isRecursable() initial error")
- mytester:assert(isRecursable(mlp.recurrentModule, {torch.randn(inputSize), torch.randn(outputSize)}), "Recurrent isRecursable() recurrent error")
-
- -- test that the above test actually works
- local euclidean = nn.Euclidean(inputSize, outputSize)
- mytester:assert(not isRecursable(euclidean, torch.randn(batchSize, inputSize)), "AbstractRecurrent.isRecursable error")
-
- local gradOutputs, outputs = {}, {}
- -- inputs = {inputN, {inputN-1, {inputN-2, ...}}}}}
- local inputs
- local startModule = mlp.startModule:clone()
- inputModule = mlp.inputModule:clone()
- feedbackModule = mlp.feedbackModule:clone()
-
- local mlp6 = mlp:clone()
- mlp6:evaluate()
-
- mlp:zeroGradParameters()
- local mlp7 = mlp:clone()
- mlp7.rho = nSteps - 1
- local inputSequence = {}
- for step=1,nSteps do
- local input = torch.randn(batchSize, inputSize)
- inputSequence[step] = input
- local gradOutput
- if step ~= nSteps then
- -- for the sake of keeping this unit test simple,
- gradOutput = torch.zeros(batchSize, outputSize)
- else
- -- only the last step will get a gradient from the output
- gradOutput = torch.randn(batchSize, outputSize)
- end
-
- local output = mlp:forward(input)
- mlp:backward(input, gradOutput)
-
- local output6 = mlp6:forward(input)
- mytester:assertTensorEq(output, output6, 0.000001, "evaluation error "..step)
-
- local output7 = mlp7:forward(input)
- mlp7:backward(input, gradOutput)
- mytester:assertTensorEq(output, output7, 0.000001, "rho = nSteps-1 forward error "..step)
-
- table.insert(gradOutputs, gradOutput)
- table.insert(outputs, output:clone())
-
- if inputs then
- inputs = {input, inputs}
- else
- inputs = input
- end
- end
- local mlp4 = mlp:clone()
- local mlp5 = mlp:clone()
-
- -- backward propagate through time (BPTT)
- local gradInput = mlp:backwardThroughTime():clone()
- mlp:forget() -- test ability to forget
- mlp:zeroGradParameters()
- local foutputs = {}
- for step=1,nSteps do
- foutputs[step] = mlp:forward(inputSequence[step])
- mytester:assertTensorEq(foutputs[step], outputs[step], 0.00001, "Recurrent forget output error "..step)
- mlp:backward(input, gradOutputs[step])
- end
- local fgradInput = mlp:backwardThroughTime():clone()
- mytester:assertTensorEq(gradInput, fgradInput, 0.00001, "Recurrent forget gradInput error")
-
- mlp4.fastBackward = false
- local gradInput4 = mlp4:backwardThroughTime()
- mytester:assertTensorEq(gradInput, gradInput4, 0.000001, 'error slow vs fast backwardThroughTime')
- local mlp10 = mlp7:clone()
- mytester:assert(mlp10.inputs[1] == nil, 'recycle inputs error')
- mlp10:forget()
- mytester:assert(#mlp10.inputs == 4, 'forget inputs error')
- mytester:assert(#mlp10.outputs == 5, 'forget outputs error')
- local i = 0
- for k,v in pairs(mlp10.recurrentOutputs) do
- i = i + 1
- end
- mytester:assert(i == 4, 'forget recurrentOutputs error')
-
- -- rho = nSteps - 1 : shouldn't update startModule
- mlp7:backwardThroughTime()
-
- local mlp2 -- this one will simulate rho = nSteps
- local outputModules = {}
- for step=1,nSteps do
- local inputModule_ = inputModule:clone()
- local outputModule = transferModule:clone()
- table.insert(outputModules, outputModule)
- inputModule_:share(inputModule, 'weight', 'gradWeight', 'bias', 'gradBias')
- if step == 1 then
- local initialModule = nn.Sequential()
- initialModule:add(inputModule_)
- initialModule:add(startModule)
- initialModule:add(outputModule)
- mlp2 = initialModule
- else
- local parallelModule = nn.ParallelTable()
- parallelModule:add(inputModule_)
- local pastModule = nn.Sequential()
- pastModule:add(mlp2)
- local feedbackModule_ = feedbackModule:clone()
- feedbackModule_:share(feedbackModule, 'weight', 'gradWeight', 'bias', 'gradBias')
- pastModule:add(feedbackModule_)
- parallelModule:add(pastModule)
- local recurrentModule = nn.Sequential()
- recurrentModule:add(parallelModule)
- recurrentModule:add(nn.CAddTable())
- recurrentModule:add(outputModule)
- mlp2 = recurrentModule
- end
- end
-
-
- local output2 = mlp2:forward(inputs)
- mlp2:zeroGradParameters()
-
- -- unlike mlp2, mlp8 will simulate rho = nSteps -1
- local mlp8 = mlp2:clone()
- local inputModule8 = mlp8.modules[1].modules[1]
- local m = mlp8.modules[1].modules[2].modules[1].modules[1].modules[2]
- m = m.modules[1].modules[1].modules[2].modules[1].modules[1].modules[2]
- local feedbackModule8 = m.modules[2]
- local startModule8 = m.modules[1].modules[2] -- before clone
- -- unshare the intialModule:
- m.modules[1] = m.modules[1]:clone()
- m.modules[2] = m.modules[2]:clone()
- mlp8:backward(inputs, gradOutputs[#gradOutputs])
-
- local gradInput2 = mlp2:backward(inputs, gradOutputs[#gradOutputs])
- for step=1,nSteps-1 do
- gradInput2 = gradInput2[2]
- end
-
- mytester:assertTensorEq(gradInput, gradInput2, 0.000001, "recurrent gradInput")
- mytester:assertTensorEq(outputs[#outputs], output2, 0.000001, "recurrent output")
- for step=1,nSteps do
- local output, outputModule = outputs[step], outputModules[step]
- mytester:assertTensorEq(output, outputModule.output, 0.000001, "recurrent output step="..step)
- end
-
- local mlp3 = nn.Sequential()
- -- contains params and grads of mlp2 (the MLP version of the Recurrent)
- mlp3:add(startModule):add(inputModule):add(feedbackModule)
- local params2, gradParams2 = mlp3:parameters()
- local params, gradParams = mlp:parameters()
- mytester:assert(#params2 == #params, 'missing parameters')
- mytester:assert(#gradParams == #params, 'missing gradParameters')
- for i=1,#params do
- if i > 1 then
- gradParams2[i]:div(nSteps)
- end
- mytester:assertTensorEq(gradParams[i], gradParams2[i], 0.000001, 'gradParameter error ' .. i)
- end
-
- local mlp9 = nn.Sequential()
- -- contains params and grads of mlp8
- mlp9:add(startModule8):add(inputModule8):add(feedbackModule8)
- local params9, gradParams9 = mlp9:parameters()
- local params7, gradParams7 = mlp7:parameters()
- mytester:assert(#params9 == #params7, 'missing parameters')
- mytester:assert(#gradParams7 == #params7, 'missing gradParameters')
- for i=1,#params do
- if i > 1 then
- gradParams9[i]:div(nSteps-1)
- end
- mytester:assertTensorEq(gradParams7[i], gradParams9[i], 0.00001, 'gradParameter error ' .. i)
- end
-
- -- already called backwardThroughTime()
- mlp:updateParameters(0.1)
- mlp4:updateParameters(0.1)
-
- local params4 = mlp4:parameters()
- local params5 = mlp5:parameters()
- local params = mlp:parameters()
- mytester:assert(#params4 == #params, 'missing parameters')
- mytester:assert(#params5 == #params, 'missing parameters')
- for i=1,#params do
- mytester:assertTensorEq(params[i], params4[i], 0.000001, 'backwardThroughTime error ' .. i)
- mytester:assertTensorNe(params[i], params5[i], 0.0000000001, 'backwardThroughTime error ' .. i)
- end
-
- -- should call backwardUpdateThroughTime()
- mlp5:updateParameters(0.1)
-
- local params5 = mlp5:parameters()
- local params = mlp:parameters()
- mytester:assert(#params5 == #params, 'missing parameters')
- for i=1,#params do
- mytester:assertTensorEq(params[i], params5[i], 0.000001, 'backwardUpdateThroughTime error ' .. i)
- end
-end
-
-function nnxtest.Recurrent_TestTable()
- -- Set up RNN where internal state is a table.
- -- Trivial example is same RNN from nnxtest.Recurrent test
- -- but all layers are duplicated
- local batchSize = 4
- local inputSize = 10
- local hiddenSize = 12
- local outputSize = 7
- local nSteps = 5
- local inputModule = nn.Linear(inputSize, outputSize)
- local transferModule = nn.Sigmoid()
- local learningRate = 0.1
- -- test MLP feedback Module
- local feedbackModule = nn.Sequential()
- feedbackModule:add(nn.Linear(outputSize, hiddenSize))
- feedbackModule:add(nn.Sigmoid())
- feedbackModule:add(nn.Linear(hiddenSize, outputSize))
- -- rho = nSteps
- local mlp = nn.Recurrent(
- nn.ParallelTable()
- :add(nn.Add(outputSize))
- :add(nn.Add(outputSize)),
- nn.ParallelTable()
- :add(inputModule:clone())
- :add(inputModule:clone()),
- nn.ParallelTable()
- :add(feedbackModule:clone())
- :add(feedbackModule:clone()),
- nn.ParallelTable()
- :add(transferModule:clone())
- :add(transferModule:clone()),
- nSteps,
- nn.ParallelTable()
- :add(nn.CAddTable())
- :add(nn.CAddTable())
- )
-
- local input = torch.randn(batchSize, inputSize)
- local err = torch.randn(batchSize, outputSize)
- for i=1,10 do
- mlp:forward{input, input:clone()}
- mlp:backward({input, input:clone()}, {err, err:clone()})
- end
- mlp:backwardThroughTime(learningRate)
-end
-
-function nnxtest.LSTM()
- local batchSize = math.random(1,2)
- local inputSize = math.random(3,4)
- local outputSize = math.random(5,6)
- local nStep = 3
- local input = {}
- local gradOutput = {}
- for step=1,nStep do
- input[step] = torch.randn(batchSize, inputSize)
- if step == nStep then
- -- for the sake of keeping this unit test simple,
- gradOutput[step] = torch.randn(batchSize, outputSize)
- else
- -- only the last step will get a gradient from the output
- gradOutput[step] = torch.zeros(batchSize, outputSize)
- end
- end
- local lstm = nn.LSTM(inputSize, outputSize)
-
- local isRecursable = nn.AbstractRecurrent.isRecursable
- local inputTable = {torch.randn(batchSize, inputSize), torch.randn(batchSize, outputSize), torch.randn(batchSize, outputSize)}
- mytester:assert(isRecursable(lstm.recurrentModule, inputTable), "LSTM isRecursable() error")
-
- -- we will use this to build an LSTM step by step (with shared params)
- local lstmStep = lstm.recurrentModule:clone()
-
- -- forward/backward through LSTM
- local output = {}
- lstm:zeroGradParameters()
- for step=1,nStep do
- output[step] = lstm:forward(input[step])
- assert(torch.isTensor(input[step]))
- lstm:backward(input[step], gradOutput[step], 1)
- end
- local gradInput = lstm:backwardThroughTime()
-
- local mlp2 -- this one will simulate rho = nSteps
- local inputs
- for step=1,nStep do
- -- iteratively build an LSTM out of non-recurrent components
- local lstm = lstmStep:clone()
- lstm:share(lstmStep, 'weight', 'gradWeight', 'bias', 'gradBias')
- if step == 1 then
- mlp2 = lstm
- else
- local rnn = nn.Sequential()
- local para = nn.ParallelTable()
- para:add(nn.Identity()):add(mlp2)
- rnn:add(para)
- rnn:add(nn.FlattenTable())
- rnn:add(lstm)
- mlp2 = rnn
- end
-
- -- prepare inputs for mlp2
- if inputs then
- inputs = {input[step], inputs}
- else
- inputs = {input[step], torch.zeros(batchSize, outputSize), torch.zeros(batchSize, outputSize)}
- end
- end
- mlp2:add(nn.SelectTable(1)) --just output the output (not cell)
- local output2 = mlp2:forward(inputs)
-
- mlp2:zeroGradParameters()
- local gradInput2 = mlp2:backward(inputs, gradOutput[nStep], 1/nStep)
- mytester:assertTensorEq(gradInput2[2][2][1], gradInput, 0.00001, "LSTM gradInput error")
- mytester:assertTensorEq(output[nStep], output2, 0.00001, "LSTM output error")
-
- local params, gradParams = lstm:parameters()
- local params2, gradParams2 = lstmStep:parameters()
- mytester:assert(#params == #params2, "LSTM parameters error "..#params.." ~= "..#params2)
- for i, gradParam in ipairs(gradParams) do
- local gradParam2 = gradParams2[i]
- mytester:assertTensorEq(gradParam, gradParam2, 0.000001,
- "LSTM gradParam "..i.." error "..tostring(gradParam).." "..tostring(gradParam2))
- end
-
- gradParams = lstm.recursiveCopy(nil, gradParams)
- gradInput = gradInput:clone()
- mytester:assert(lstm.zeroTensor:sum() == 0, "zeroTensor error")
- lstm:forget()
- output = lstm.recursiveCopy(nil, output)
- local output3 = {}
- lstm:zeroGradParameters()
- for step=1,nStep do
- output3[step] = lstm:forward(input[step])
- lstm:backward(input[step], gradOutput[step], 1)
- end
- local gradInput3 = lstm:updateGradInputThroughTime()
- lstm:accGradParametersThroughTime()
-
- mytester:assert(#output == #output3, "LSTM output size error")
- for i,output in ipairs(output) do
- mytester:assertTensorEq(output, output3[i], 0.00001, "LSTM forget (updateOutput) error "..i)
- end
-
- mytester:assertTensorEq(gradInput, gradInput3, 0.00001, "LSTM updateGradInputThroughTime error")
- --if true then return end
- local params3, gradParams3 = lstm:parameters()
- mytester:assert(#params == #params3, "LSTM parameters error "..#params.." ~= "..#params3)
- for i, gradParam in ipairs(gradParams) do
- local gradParam3 = gradParams3[i]
- mytester:assertTensorEq(gradParam, gradParam3, 0.000001,
- "LSTM gradParam "..i.." error "..tostring(gradParam).." "..tostring(gradParam3))
- end
-end
-
-function nnxtest.Sequencer()
- local batchSize = 4
- local inputSize = 10
- local outputSize = 7
- local nSteps = 5
- local inputModule = nn.Linear(inputSize, outputSize)
- local transferModule = nn.Sigmoid()
- -- test MLP feedback Module (because of Module:representations())
- local feedbackModule = nn.Linear(outputSize, outputSize)
- -- rho = nSteps
- local rnn = nn.Recurrent(outputSize, inputModule, feedbackModule, transferModule, nSteps)
- local rnn2 = rnn:clone()
-
- local inputs, outputs, gradOutputs = {}, {}, {}
- for step=1,nSteps do
- inputs[step] = torch.randn(batchSize, inputSize)
- outputs[step] = rnn:forward(inputs[step])
- gradOutputs[step] = torch.randn(batchSize, outputSize)
- rnn:backward(inputs[step], gradOutputs[step])
- end
- rnn:backwardThroughTime()
-
- local rnn3 = nn.Sequencer(rnn2)
- local outputs3 = rnn3:forward(inputs)
- local gradInputs3 = rnn3:backward(inputs, gradOutputs)
- mytester:assert(#outputs3 == #outputs, "Sequencer output size err")
- mytester:assert(#gradInputs3 == #rnn.gradInputs, "Sequencer gradInputs size err")
- for step,output in ipairs(outputs) do
- mytester:assertTensorEq(outputs3[step], output, 0.00001, "Sequencer output "..step)
- mytester:assertTensorEq(gradInputs3[step], rnn.gradInputs[step], 0.00001, "Sequencer gradInputs "..step)
- end
-end
-
-function nnxtest.Repeater()
- local batchSize = 4
- local inputSize = 10
- local outputSize = 7
- local nSteps = 5
- local inputModule = nn.Linear(inputSize, outputSize)
- local transferModule = nn.Sigmoid()
- -- test MLP feedback Module (because of Module:representations())
- local feedbackModule = nn.Linear(outputSize, outputSize)
- -- rho = nSteps
- local rnn = nn.Recurrent(outputSize, inputModule, feedbackModule, transferModule, nSteps)
- local rnn2 = rnn:clone()
-
- local inputs, outputs, gradOutputs = {}, {}, {}
- local input = torch.randn(batchSize, inputSize)
- for step=1,nSteps do
- outputs[step] = rnn:forward(input)
- gradOutputs[step] = torch.randn(batchSize, outputSize)
- rnn:backward(input, gradOutputs[step])
- end
- rnn:backwardThroughTime()
-
- local rnn3 = nn.Repeater(rnn2, nSteps)
- local outputs3 = rnn3:forward(input)
- local gradInput3 = rnn3:backward(input, gradOutputs)
- mytester:assert(#outputs3 == #outputs, "Repeater output size err")
- mytester:assert(#outputs3 == #rnn.gradInputs, "Repeater gradInputs size err")
- local gradInput = rnn.gradInputs[1]:clone():zero()
- for step,output in ipairs(outputs) do
- mytester:assertTensorEq(outputs3[step], output, 0.00001, "Sequencer output "..step)
- gradInput:add(rnn.gradInputs[step])
- end
- mytester:assertTensorEq(gradInput3, gradInput, 0.00001, "Repeater gradInput err")
-end
-
function nnxtest.SpatialNormalization_Gaussian2D()
local inputSize = math.random(11,20)
local kersize = 9