diff options
author | nicholas-leonard <nick@nikopia.org> | 2015-09-03 16:28:18 +0300 |
---|---|---|
committer | nicholas-leonard <nick@nikopia.org> | 2015-09-03 16:28:18 +0300 |
commit | 191a0cbcbcbc5db28d71e8a40cdaac4b53420bbf (patch) | |
tree | 88112f6c1aabcc4038ae1bd44db8a68923ccaf4e | |
parent | a9026fd1377757a148c464784e2d2dce6580c138 (diff) |
Removed deprecated modules
-rw-r--r-- | AbstractRecurrent.lua | 281 | ||||
-rw-r--r-- | BatchNormalization.lua | 80 | ||||
-rw-r--r-- | LSTM.lua | 353 | ||||
-rw-r--r-- | Module.lua | 25 | ||||
-rw-r--r-- | NarrowLookupTable.lua | 148 | ||||
-rw-r--r-- | Recurrent.lua | 405 | ||||
-rw-r--r-- | Repeater.lua | 87 | ||||
-rw-r--r-- | RepeaterCriterion.lua | 49 | ||||
-rw-r--r-- | Sequencer.lua | 197 | ||||
-rw-r--r-- | init.lua | 12 | ||||
-rw-r--r-- | test/test-all.lua | 476 |
11 files changed, 0 insertions, 2113 deletions
diff --git a/AbstractRecurrent.lua b/AbstractRecurrent.lua deleted file mode 100644 index cf40626..0000000 --- a/AbstractRecurrent.lua +++ /dev/null @@ -1,281 +0,0 @@ -local AbstractRecurrent, parent = torch.class('nn.AbstractRecurrent', 'nn.Container') - -function AbstractRecurrent:__init(rho) - parent.__init(self) - - self.rho = rho --the maximum number of time steps to BPTT - - self.fastBackward = true - self.copyInputs = true - - self.inputs = {} - self.outputs = {} - self.gradOutputs = {} - self.scales = {} - - self.gradParametersAccumulated = false - self.step = 1 - - -- stores internal states of Modules at different time-steps - self.recurrentOutputs = {} - self.recurrentGradInputs = {} - - self:reset() -end - -local function recursiveResizeAs(t1,t2) - if torch.type(t2) == 'table' then - t1 = (torch.type(t1) == 'table') and t1 or {t1} - for key,_ in pairs(t2) do - t1[key], t2[key] = recursiveResizeAs(t1[key], t2[key]) - end - elseif torch.isTensor(t2) then - t1 = torch.isTensor(t1) and t1 or t2.new() - t1:resizeAs(t2) - else - error("expecting nested tensors or tables. Got ".. - torch.type(t1).." and "..torch.type(t2).." instead") - end - return t1, t2 -end -AbstractRecurrent.recursiveResizeAs = recursiveResizeAs - -local function recursiveSet(t1,t2) - if torch.type(t2) == 'table' then - t1 = (torch.type(t1) == 'table') and t1 or {t1} - for key,_ in pairs(t2) do - t1[key], t2[key] = recursiveSet(t1[key], t2[key]) - end - elseif torch.isTensor(t2) then - t1 = t1 or t2.new() - t1:set(t2) - else - error("expecting nested tensors or tables. Got ".. - torch.type(t1).." and "..torch.type(t2).." instead") - end - return t1, t2 -end -AbstractRecurrent.recursiveSet = recursiveSet - -local function recursiveCopy(t1,t2) - if torch.type(t2) == 'table' then - t1 = (torch.type(t1) == 'table') and t1 or {t1} - for key,_ in pairs(t2) do - t1[key], t2[key] = recursiveCopy(t1[key], t2[key]) - end - elseif torch.isTensor(t2) then - t1 = torch.isTensor(t1) and t1 or t2.new() - t1:resizeAs(t2):copy(t2) - else - error("expecting nested tensors or tables. Got ".. - torch.type(t1).." and "..torch.type(t2).." instead") - end - return t1, t2 -end -AbstractRecurrent.recursiveCopy = recursiveCopy - -local function recursiveAdd(t1, t2) - if torch.type(t2) == 'table' then - t1 = (torch.type(t1) == 'table') and t1 or {t1} - for key,_ in pairs(t2) do - t1[key], t2[key] = recursiveAdd(t1[key], t2[key]) - end - elseif torch.isTensor(t2) and torch.isTensor(t2) then - t1:add(t2) - else - error("expecting nested tensors or tables. Got ".. - torch.type(t1).." and "..torch.type(t2).." instead") - end - return t1, t2 -end -AbstractRecurrent.recursiveAdd = recursiveAdd - -local function recursiveTensorEq(t1, t2) - if torch.type(t2) == 'table' then - local isEqual = true - if torch.type(t1) ~= 'table' then - return false - end - for key,_ in pairs(t2) do - isEqual = isEqual and recursiveTensorEq(t1[key], t2[key]) - end - return isEqual - elseif torch.isTensor(t2) and torch.isTensor(t2) then - local diff = t1-t2 - local err = diff:abs():max() - return err < 0.00001 - else - error("expecting nested tensors or tables. Got ".. - torch.type(t1).." and "..torch.type(t2).." instead") - end -end -AbstractRecurrent.recursiveTensorEq = recursiveTensorEq - -local function recursiveNormal(t2) - if torch.type(t2) == 'table' then - for key,_ in pairs(t2) do - t2[key] = recursiveNormal(t2[key]) - end - elseif torch.isTensor(t2) then - t2:normal() - else - error("expecting tensor or table thereof. Got " - ..torch.type(t2).." instead") - end - return t2 -end -AbstractRecurrent.recursiveNormal = recursiveNormal - -function AbstractRecurrent:updateGradInput(input, gradOutput) - -- Back-Propagate Through Time (BPTT) happens in updateParameters() - -- for now we just keep a list of the gradOutputs - self.gradOutputs[self.step-1] = self.recursiveCopy(self.gradOutputs[self.step-1] , gradOutput) -end - -function AbstractRecurrent:accGradParameters(input, gradOutput, scale) - -- Back-Propagate Through Time (BPTT) happens in updateParameters() - -- for now we just keep a list of the scales - self.scales[self.step-1] = scale -end - -function AbstractRecurrent:backwardUpdateThroughTime(learningRate) - local gradInput = self:updateGradInputThroughTime() - self:accUpdateGradParametersThroughTime(learningRate) - return gradInput -end - -function AbstractRecurrent:updateParameters(learningRate) - if self.gradParametersAccumulated then - for i=1,#self.modules do - self.modules[i]:updateParameters(learningRate) - end - else - self:backwardUpdateThroughTime(learningRate) - end -end - --- goes hand in hand with the next method : forget() -function AbstractRecurrent:recycle() - -- +1 is to skip initialModule - if self.step > self.rho + 1 then - assert(self.recurrentOutputs[self.step] == nil) - assert(self.recurrentOutputs[self.step-self.rho] ~= nil) - self.recurrentOutputs[self.step] = self.recurrentOutputs[self.step-self.rho] - self.recurrentGradInputs[self.step] = self.recurrentGradInputs[self.step-self.rho] - self.recurrentOutputs[self.step-self.rho] = nil - self.recurrentGradInputs[self.step-self.rho] = nil - -- need to keep rho+1 of these - self.outputs[self.step] = self.outputs[self.step-self.rho-1] - self.outputs[self.step-self.rho-1] = nil - end - if self.step > self.rho then - assert(self.inputs[self.step] == nil) - assert(self.inputs[self.step-self.rho] ~= nil) - self.inputs[self.step] = self.inputs[self.step-self.rho] - self.gradOutputs[self.step] = self.gradOutputs[self.step-self.rho] - self.inputs[self.step-self.rho] = nil - self.gradOutputs[self.step-self.rho] = nil - self.scales[self.step-self.rho] = nil - end -end - -function AbstractRecurrent:forget(offset) - offset = offset or 1 - if self.train ~= false then - -- bring all states back to the start of the sequence buffers - local lastStep = self.step - 1 - - if lastStep > self.rho + offset then - local i = 1 + offset - for step = lastStep-self.rho+offset,lastStep do - self.recurrentOutputs[i] = self.recurrentOutputs[step] - self.recurrentGradInputs[i] = self.recurrentGradInputs[step] - self.recurrentOutputs[step] = nil - self.recurrentGradInputs[step] = nil - -- we keep rho+1 of these : outputs[k]=outputs[k+rho+1] - self.outputs[i-1] = self.outputs[step] - self.outputs[step] = nil - i = i + 1 - end - - end - - if lastStep > self.rho then - local i = 1 - for step = lastStep-self.rho+1,lastStep do - self.inputs[i] = self.inputs[step] - self.gradOutputs[i] = self.gradOutputs[step] - self.inputs[step] = nil - self.gradOutputs[step] = nil - self.scales[step] = nil - i = i + 1 - end - - end - end - - -- forget the past inputs; restart from first step - self.step = 1 -end - --- tests whether or not the mlp can be used internally for recursion. --- forward A, backward A, forward B, forward A should be consistent with --- forward B, backward B, backward A where A and B each --- have their own gradInputs/outputs. -function AbstractRecurrent.isRecursable(mlp, input) - local output = recursiveCopy(nil, mlp:forward(input)) --forward A - local gradOutput = recursiveNormal(recursiveCopy(nil, output)) - mlp:zeroGradParameters() - local gradInput = recursiveCopy(nil, mlp:backward(input, gradOutput)) --backward A - local params, gradParams = mlp:parameters() - gradParams = recursiveCopy(nil, gradParams) - - -- output/gradInput are the only internal module states that we track - local recurrentOutputs = {} - local recurrentGradInputs = {} - - local modules = mlp:listModules() - - -- save the output/gradInput states of A - for i,modula in ipairs(modules) do - recurrentOutputs[i] = modula.output - recurrentGradInputs[i] = modula.gradInput - end - -- set the output/gradInput states for B - local recurrentOutputs2 = {} - local recurrentGradInputs2 = {} - for i,modula in ipairs(modules) do - modula.output = recursiveResizeAs(recurrentOutputs2[i], modula.output) - modula.gradInput = recursiveResizeAs(recurrentGradInputs2[i], modula.gradInput) - end - - local input2 = recursiveNormal(recursiveCopy(nil, input)) - local gradOutput2 = recursiveNormal(recursiveCopy(nil, gradOutput)) - local output2 = mlp:forward(input2) --forward B - mlp:zeroGradParameters() - local gradInput2 = mlp:backward(input2, gradOutput2) --backward B - - -- save the output/gradInput state of B - for i,modula in ipairs(modules) do - recurrentOutputs2[i] = modula.output - recurrentGradInputs2[i] = modula.gradInput - end - - -- set the output/gradInput states for A - for i,modula in ipairs(modules) do - modula.output = recursiveResizeAs(recurrentOutputs[i], modula.output) - modula.gradInput = recursiveResizeAs(recurrentGradInputs[i], modula.gradInput) - end - - mlp:zeroGradParameters() - local gradInput3 = mlp:backward(input, gradOutput) --forward A - local gradInputTest = recursiveTensorEq(gradInput, gradInput3) - local params3, gradParams3 = mlp:parameters() - local nEq = 0 - for i,gradParam in ipairs(gradParams) do - nEq = nEq + (recursiveTensorEq(gradParam, gradParams3[i]) and 1 or 0) - end - local gradParamsTest = (nEq == #gradParams3) - mlp:zeroGradParameters() - return gradParamsTest and gradInputTest, gradParamsTest, gradInputTest -end diff --git a/BatchNormalization.lua b/BatchNormalization.lua deleted file mode 100644 index 90fadfa..0000000 --- a/BatchNormalization.lua +++ /dev/null @@ -1,80 +0,0 @@ ---Based on: http://arxiv.org/pdf/1502.03167v3
---Usage example:
-------------------------------------
--- model:add(nn.BatchNormalization(3 * 32 * 32))
-------------------------------------
-
-require 'nn'
-require 'cunn'
-local BatchNormalization, parent = torch.class('nn.BatchNormalization', 'nn.Module')
-
-function BatchNormalization:__init(inputSize)
- parent.__init(self)
- self.bias = torch.Tensor(inputSize)
- self.weight = torch.Tensor(inputSize)
- self.gradBias = torch.Tensor(inputSize)
- self.gradWeight = torch.Tensor(inputSize)
-
- self:reset(stdv)
-end
-
-function BatchNormalization:reset(stdv)
- if stdv then
- stdv = stdv * math.sqrt(3)
- else
- stdv = 1./math.sqrt(self.bias:nElement())
- end
-
- self.bias:uniform(-stdv,stdv)
- self.weight:uniform(-stdv,stdv)
-end
-
-function BatchNormalization:updateOutput(input)
- self.output = self.output or input.new()
- self.output:resizeAs(input)
- self.size = input:nElement()
- self.std = torch.std(input) * torch.sqrt((self.size - 1.0) / self.size )
- self.mean = torch.mean(input)
- self.stdcube = torch.pow(self.std,3)
- self.ones = torch.Tensor(self.size):fill(1.0)-- :cuda()
- self.output:copy(input):add(-self.mean):div(self.std)
- self.buffer = self.buffer or input.new()
- self.buffer:resizeAs(self.output):copy(self.output)
- self.output:cmul(self.weight)
- self.output:add(self.bias)
-return self.output
-end
-
-function BatchNormalization:updateGradInput(input, gradOutput)
-
- self.buffer = self.buffer or gradOutput.new()
- self.buffer:resizeAs(gradOutput):copy(gradOutput)
- self.buffer:cmul(self.weight)
- self.dotprod1 = torch.dot(self.ones,self.buffer)
- local der1 = self.ones:clone()
- der1:mul(- self.dotprod1 / self.size/self.std)
- -- x_i - mu
- local der2 = input:clone()
- der2:add(-self.mean)
-
- self.dotprod2 = torch.dot(der2,self.buffer)
- der2:mul(self.dotprod2 / self.size / self.stdcube)
-
- self.gradInput = self.buffer:clone()
-
- self.gradInput:div(self.std)
-
- self.gradInput:add(der1)
- self.gradInput:add(-der2)
- return self.gradInput
-end
-
-function BatchNormalization:accGradParameters(input, gradOutput, scale)
- scale = scale or 1
-
- self.gradBias:add(scale,gradOutput)
- self.gradWeight:addcmul(scale,self.buffer,gradOutput)
-end
-
-
-
diff --git a/LSTM.lua b/LSTM.lua deleted file mode 100644 index a3541b8..0000000 --- a/LSTM.lua +++ /dev/null @@ -1,353 +0,0 @@ ------------------------------------------------------------------------- ---[[ LSTM ]]-- --- Long Short Term Memory architecture. --- Ref. A.: http://arxiv.org/pdf/1303.5778v1 (blueprint for this module) --- B. http://web.eecs.utk.edu/~itamar/courses/ECE-692/Bobby_paper1.pdf --- C. https://github.com/wojzaremba/lstm --- Expects 1D or 2D input. --- The first input in sequence uses zero value for cell and hidden state ------------------------------------------------------------------------- -local LSTM, parent = torch.class('nn.LSTM', 'nn.AbstractRecurrent') - -function LSTM:__init(inputSize, outputSize, rho) - parent.__init(self, rho or 999999999999) - self.inputSize = inputSize - self.outputSize = outputSize - -- build the model - self.recurrentModule = self:buildModel() - -- make it work with nn.Container - self.modules[1] = self.recurrentModule - - -- for output(0), cell(0) and gradCell(T) - self.zeroTensor = torch.Tensor() - - self.cells = {} - self.gradCells = {} -end - --------------------------- factory methods ----------------------------- -function LSTM:buildGate() - -- Note : gate expects an input table : {input, output(t-1), cell(t-1)} - local gate = nn.Sequential() - local input2gate = nn.Linear(self.inputSize, self.outputSize) - local output2gate = nn.Linear(self.outputSize, self.outputSize) - local cell2gate = nn.CMul(self.outputSize) -- diagonal cell to gate weight matrix - --output2gate:noBias() --TODO - local para = nn.ParallelTable() - para:add(input2gate):add(output2gate):add(cell2gate) - gate:add(para) - gate:add(nn.CAddTable()) - gate:add(nn.Sigmoid()) - return gate -end - -function LSTM:buildInputGate() - self.inputGate = self:buildGate() - return self.inputGate -end - -function LSTM:buildForgetGate() - self.forgetGate = self:buildGate() - return self.forgetGate -end - -function LSTM:buildHidden() - local hidden = nn.Sequential() - local input2hidden = nn.Linear(self.inputSize, self.outputSize) - local output2hidden = nn.Linear(self.outputSize, self.outputSize) - local para = nn.ParallelTable() - --output2hidden:noBias() - para:add(input2hidden):add(output2hidden) - -- input is {input, output(t-1), cell(t-1)}, but we only need {input, output(t-1)} - local concat = nn.ConcatTable() - concat:add(nn.SelectTable(1)):add(nn.SelectTable(2)) - hidden:add(concat) - hidden:add(para) - hidden:add(nn.CAddTable()) - self.hiddenLayer = hidden - return hidden -end - -function LSTM:buildCell() - -- build - self.inputGate = self:buildInputGate() - self.forgetGate = self:buildForgetGate() - self.hiddenLayer = self:buildHidden() - -- forget = forgetGate{input, output(t-1), cell(t-1)} * cell(t-1) - local forget = nn.Sequential() - local concat = nn.ConcatTable() - concat:add(self.forgetGate):add(nn.SelectTable(3)) - forget:add(concat) - forget:add(nn.CMulTable()) - -- input = inputGate{input, output(t-1), cell(t-1)} * hiddenLayer{input, output(t-1), cell(t-1)} - local input = nn.Sequential() - local concat2 = nn.ConcatTable() - concat2:add(self.inputGate):add(self.hiddenLayer) - input:add(concat2) - input:add(nn.CMulTable()) - -- cell(t) = forget + input - local cell = nn.Sequential() - local concat3 = nn.ConcatTable() - concat3:add(forget):add(input) - cell:add(concat3) - cell:add(nn.CAddTable()) - self.cellLayer = cell - return cell -end - -function LSTM:buildOutputGate() - self.outputGate = self:buildGate() - return self.outputGate -end - --- cell(t) = cellLayer{input, output(t-1), cell(t-1)} --- output(t) = outputGate{input, output(t-1), cell(t)}*tanh(cell(t)) --- output of Model is table : {output(t), cell(t)} -function LSTM:buildModel() - -- build components - self.cellLayer = self:buildCell() - self.outputGate = self:buildOutputGate() - -- assemble - local concat = nn.ConcatTable() - local concat2 = nn.ConcatTable() - concat2:add(nn.SelectTable(1)):add(nn.SelectTable(2)) - concat:add(concat2):add(self.cellLayer) - local model = nn.Sequential() - model:add(concat) - -- output of concat is {{input, output}, cell(t)}, - -- so flatten to {input, output, cell(t)} - model:add(nn.FlattenTable()) - local cellAct = nn.Sequential() - cellAct:add(nn.SelectTable(3)) - cellAct:add(nn.Tanh()) - local concat3 = nn.ConcatTable() - concat3:add(self.outputGate):add(cellAct) - local output = nn.Sequential() - output:add(concat3) - output:add(nn.CMulTable()) - -- we want the model to output : {output(t), cell(t)} - local concat4 = nn.ConcatTable() - concat4:add(output):add(nn.SelectTable(3)) - model:add(concat4) - return model -end - -------------------------- forward backward ----------------------------- -function LSTM:updateOutput(input) - local prevOutput, prevCell - if self.step == 1 then - prevOutput = self.zeroTensor - prevCell = self.zeroTensor - if input:dim() == 2 then - self.zeroTensor:resize(input:size(1), self.outputSize):zero() - else - self.zeroTensor:resize(self.outputSize):zero() - end - self.outputs[0] = self.zeroTensor - self.cells[0] = self.zeroTensor - else - -- previous output and cell of this module - prevOutput = self.output - prevCell = self.cell - end - - -- output(t), cell(t) = lstm{input(t), output(t-1), cell(t-1)} - local output, cell - if self.train ~= false then - -- set/save the output states - local modules = self.recurrentModule:listModules() - self:recycle() - local recurrentOutputs = self.recurrentOutputs[self.step] - if not recurrentOutputs then - recurrentOutputs = {} - self.recurrentOutputs[self.step] = recurrentOutputs - end - for i,modula in ipairs(modules) do - local output_ = self.recursiveResizeAs(recurrentOutputs[i], modula.output) - modula.output = output_ - end - -- the actual forward propagation - output, cell = unpack(self.recurrentModule:updateOutput{input, prevOutput, prevCell}) - - for i,modula in ipairs(modules) do - recurrentOutputs[i] = modula.output - end - else - output, cell = unpack(self.recurrentModule:updateOutput{input, prevOutput, prevCell}) - end - - if self.train ~= false then - local input_ = self.inputs[self.step] - self.inputs[self.step] = self.copyInputs - and self.recursiveCopy(input_, input) - or self.recursiveSet(input_, input) - end - - self.outputs[self.step] = output - self.cells[self.step] = cell - - self.output = output - self.cell = cell - - self.step = self.step + 1 - self.gradParametersAccumulated = false - -- note that we don't return the cell, just the output - return self.output -end - -function LSTM:backwardThroughTime() - assert(self.step > 1, "expecting at least one updateOutput") - self.gradInputs = {} - local rho = math.min(self.rho, self.step-1) - local stop = self.step - rho - if self.fastBackward then - local gradInput, gradPrevOutput, gradCell - for step=self.step-1,math.max(stop,1),-1 do - -- set the output/gradOutput states of current Module - local modules = self.recurrentModule:listModules() - local recurrentOutputs = self.recurrentOutputs[step] - local recurrentGradInputs = self.recurrentGradInputs[step] - if not recurrentGradInputs then - recurrentGradInputs = {} - self.recurrentGradInputs[step] = recurrentGradInputs - end - - for i,modula in ipairs(modules) do - local output, gradInput = modula.output, modula.gradInput - assert(gradInput, "missing gradInput") - local output_ = recurrentOutputs[i] - assert(output_, "backwardThroughTime should be preceded by updateOutput") - modula.output = output_ - modula.gradInput = self.recursiveResizeAs(recurrentGradInputs[i], gradInput) --resize, NOT copy - end - - -- backward propagate through this step - local gradOutput = self.gradOutputs[step] - if gradPrevOutput then - self.recursiveAdd(gradOutput, gradPrevOutput) - end - - self.gradCells[step] = gradCell - local scale = self.scales[step]/rho - - local inputTable = {self.inputs[step], self.outputs[step-1], self.cells[step-1]} - local gradInputTable = self.recurrentModule:backward(inputTable, {gradOutput, gradCell}, scale) - gradInput, gradPrevOutput, gradCell = unpack(gradInputTable) - table.insert(self.gradInputs, 1, gradInput) - - for i,modula in ipairs(modules) do - recurrentGradInputs[i] = modula.gradInput - end - end - return gradInput - else - local gradInput = self:updateGradInputThroughTime() - self:accGradParametersThroughTime() - return gradInput - end -end - -function LSTM:updateGradInputThroughTime() - assert(self.step > 1, "expecting at least one updateOutput") - self.gradInputs = {} - local gradInput, gradPrevOutput - local gradCell = self.zeroTensor - local rho = math.min(self.rho, self.step-1) - local stop = self.step - rho - for step=self.step-1,math.max(stop,1),-1 do - -- set the output/gradOutput states of current Module - local modules = self.recurrentModule:listModules() - local recurrentOutputs = self.recurrentOutputs[step] - local recurrentGradInputs = self.recurrentGradInputs[step] - if not recurrentGradInputs then - recurrentGradInputs = {} - self.recurrentGradInputs[step] = recurrentGradInputs - end - for i,modula in ipairs(modules) do - local output, gradInput = modula.output, modula.gradInput - local output_ = recurrentOutputs[i] - assert(output_, "updateGradInputThroughTime should be preceded by updateOutput") - modula.output = output_ - modula.gradInput = self.recursiveResizeAs(recurrentGradInputs[i], gradInput) - end - - -- backward propagate through this step - local gradOutput = self.gradOutputs[step] - if gradPrevOutput then - self.recursiveAdd(gradOutput, gradPrevOutput) - end - - self.gradCells[step] = gradCell - local scale = self.scales[step]/rho - local inputTable = {self.inputs[step], self.outputs[step-1], self.cells[step-1]} - local gradInputTable = self.recurrentModule:updateGradInput(inputTable, {gradOutput, gradCell}, scale) - gradInput, gradPrevOutput, gradCell = unpack(gradInputTable) - table.insert(self.gradInputs, 1, gradInput) - - for i,modula in ipairs(modules) do - recurrentGradInputs[i] = modula.gradInput - end - end - - return gradInput -end - -function LSTM:accGradParametersThroughTime() - local rho = math.min(self.rho, self.step-1) - local stop = self.step - rho - for step=self.step-1,math.max(stop,1),-1 do - -- set the output/gradOutput states of current Module - local modules = self.recurrentModule:listModules() - local recurrentOutputs = self.recurrentOutputs[step] - local recurrentGradInputs = self.recurrentGradInputs[step] - - for i,modula in ipairs(modules) do - local output, gradInput = modula.output, modula.gradInput - local output_ = recurrentOutputs[i] - local gradInput_ = recurrentGradInputs[i] - assert(output_, "accGradParametersThroughTime should be preceded by updateOutput") - assert(gradInput_, "accGradParametersThroughTime should be preceded by updateGradInputThroughTime") - modula.output = output_ - modula.gradInput = gradInput_ - end - - -- backward propagate through this step - local scale = self.scales[step]/rho - local inputTable = {self.inputs[step], self.outputs[step-1], self.cells[step-1]} - local gradOutputTable = {self.gradOutputs[step], self.gradCells[step]} - self.recurrentModule:accGradParameters(inputTable, gradOutputTable, scale) - end - - self.gradParametersAccumulated = true - return gradInput -end - -function LSTM:accUpdateGradParametersThroughTime(lr) - local rho = math.min(self.rho, self.step-1) - local stop = self.step - rho - for step=self.step-1,math.max(stop,1),-1 do - -- set the output/gradOutput states of current Module - local modules = self.recurrentModule:listModules() - local recurrentOutputs = self.recurrentOutputs[step] - local recurrentGradInputs = self.recurrentGradInputs[step] - - for i,modula in ipairs(modules) do - local output, gradInput = modula.output, modula.gradInput - local output_ = recurrentOutputs[i] - local gradInput_ = recurrentGradInputs[i] - assert(output_, "accGradParametersThroughTime should be preceded by updateOutput") - assert(gradInput_, "accGradParametersThroughTime should be preceded by updateGradInputThroughTime") - modula.output = output_ - modula.gradInput = gradInput_ - end - - -- backward propagate through this step - local scale = self.scales[step]/rho - local inputTable = {self.inputs[step], self.outputs[step-1], self.cells[step]} - local gradOutputTable = {self.gradOutputs[step], self.gradCells[step]} - self.recurrentModule:accUpdateGradParameters(inputTable, gradOutputTable, lr*scale) - end - - return gradInput -end - diff --git a/Module.lua b/Module.lua deleted file mode 100644 index 298e2ce..0000000 --- a/Module.lua +++ /dev/null @@ -1,25 +0,0 @@ -local Module = nn.Module - --- returns a list of modules -function Module:listModules() - local function tinsert(to, from) - if torch.type(from) == 'table' then - for i=1,#from do - tinsert(to,from[i]) - end - else - table.insert(to,from) - end - end - -- include self first - local modules = {self} - if self.modules then - for i=1,#self.modules do - local modulas = self.modules[i]:listModules() - if modulas then - tinsert(modules,modulas) - end - end - end - return modules -end diff --git a/NarrowLookupTable.lua b/NarrowLookupTable.lua deleted file mode 100644 index aef4c2c..0000000 --- a/NarrowLookupTable.lua +++ /dev/null @@ -1,148 +0,0 @@ ------------------------------------------------------------------------- ---[[ NarrowLookupTable ]]-- --- Concatenates embeddings with descending narrowed sizes --- (ascDelta = true). --- Useful for language models, where most recent words in context --- are more useful in predicting next word than older ones. --- If input is ordered furthest to nearest word, use ascDelta = false. ------------------------------------------------------------------------- -local NarrowLookupTable, parent = torch.class('nn.NarrowLookupTable', 'nn.LookupTable') - -function NarrowLookupTable:__init(deltaSize, nIndex, embedSize, ascDelta) - nn.Module.__init(self) - self.deltaSize = deltaSize - self.deltaSizes = torch.LongTensor() - self.embedSize = embedSize - self.ascDelta = (ascDelta == nil) and true or ascDelta - - self.weight = torch.Tensor(nIndex, embedSize) - self.gradWeight = torch.Tensor(nIndex, embedSize):zero() - self.inputs = {} - - self.accUpdate = false - self.nIndex = 0 - - self.nBackward = 0 - self:reset() -end - --- this could be overrided in a subclass : -function NarrowLookupTable:buildSizes(nIndex) - if self.nIndex == nIndex then - return - end - self.deltaSizes:resize(nIndex) - local deltaSize = 0 - if self.ascDelta then - for i=1,self.deltaSizes:size(1),1 do - self.deltaSizes[i] = deltaSize - deltaSize = deltaSize + self.deltaSize - end - else - for i=self.deltaSizes:size(1),1,-1 do - self.deltaSizes[i] = deltaSize - deltaSize = deltaSize + self.deltaSize - end - end - self.outputSize = nIndex*self.embedSize - self.deltaSizes:sum() - self.nIndex = nIndex -end - -function NarrowLookupTable:updateOutput(input) - if input:dim() == 1 then - local nIndex = input:size(1) - self:buildSizes(nIndex) - self.output:resize(self.outputSize) - local embedIdx = 1 - for i=1,nIndex do - local embedSize = self.embedSize - self.deltaSizes[i] - local embed = self.weight[input[i]]:narrow(1, 1, embedSize) - self.output:narrow(1, embedIdx, embedSize):copy(embed) - embedIdx = embedIdx + embedSize - end - elseif input:dim() == 2 then - local nExample = input:size(1) - local nIndex = input:size(2) - self:buildSizes(nIndex) - self.output:resize(nExample, self.outputSize) - for i=1,nExample do - local output = self.output:select(1, i) - local input = input:select(1, i) - local embedIdx = 1 - for j=1,nIndex do - local embedSize = self.embedSize - self.deltaSizes[j] - local embed = self.weight[input[j]]:narrow(1, 1, embedSize) - output:narrow(1, embedIdx, embedSize):copy(embed) - embedIdx = embedIdx + embedSize - end - end - end - - return self.output -end - -function NarrowLookupTable:accGradParameters(input, gradOutput, scale) - scale = scale or 1 - if input:dim() == 1 then - self.nBackward = self.nBackward + 1 - local embedIdx = 1 - for i=1,input:size(1) do - local k = input[i] - self.inputs[k] = (self.inputs[k] or 0) + 1 - local embedSize = self.embedSize - self.deltaSizes[i] - local gradEmbed = gradOutput:narrow(1, embedIdx, embedSize) - self.gradWeight[input[i]]:narrow(1, 1, embedSize):add(gradEmbed) - embedIdx = embedIdx + embedSize - end - elseif input:dim() == 2 then - self.nBackward = self.nBackward + input:size(1) - for i=1,input:size(1) do - local input = input:select(1, i) - local gradOutput = gradOutput:select(1, i) - local embedIdx = 1 - for j=1,input:size(1) do - local k = input[j] - self.inputs[k] = (self.inputs[k] or 0) + 1 - local embedSize = self.embedSize - self.deltaSizes[j] - local gradEmbed = gradOutput:narrow(1, embedIdx, embedSize) - self.gradWeight[input[j]]:narrow(1, 1, embedSize):add(gradEmbed) - embedIdx = embedIdx + embedSize - end - end - end -end - -function NarrowLookupTable:accUpdateGradParameters(input, gradOutput, lr) - if input:dim() == 1 then - local embedIdx = 1 - for i=1,input:size(1) do - local k = input[j] - local kscale = self:scaleUpdateByKey(k) - local embedSize = self.embedSize - self.deltaSizes[i] - local gradEmbed = gradOutput:narrow(1, embedIdx, embedSize) - self.weight[input[i]]:narrow(1, 1, embedSize):add(-lr*kscale, gradEmbed) - embedIdx = embedIdx + embedSize - end - elseif input:dim() == 2 then - for i=1,input:size(1) do - local input = input:select(1, i) - local gradOutput = gradOutput:select(1, i) - local embedIdx = 1 - for j=1,input:size(1) do - local k = input[j] - local kscale = self:scaleUpdateByKey(k) - local embedSize = self.embedSize - self.deltaSizes[j] - local gradEmbed = gradOutput:narrow(1, embedIdx, embedSize) - self.weight[input[j]]:narrow(1, 1, embedSize):add(-lr*kscale, gradEmbed) - embedIdx = embedIdx + embedSize - end - end - end -end - -function NarrowLookupTable:type(type) - self.gradInput = self.gradInput:type(type) - self.output = self.output:type(type) - self.weight = self.weight:type(type) - self.gradWeight = self.gradWeight:type(type) -end diff --git a/Recurrent.lua b/Recurrent.lua deleted file mode 100644 index eb5c61d..0000000 --- a/Recurrent.lua +++ /dev/null @@ -1,405 +0,0 @@ ------------------------------------------------------------------------- ---[[ Recurrent ]]-- --- Ref. A.: http://goo.gl/vtVGkO (Mikolov et al.) --- B. http://goo.gl/hu1Lqm --- Processes the sequence one timestep (forward/backward) at a time. --- A call to backward only keeps a log of the gradOutputs and scales. --- Back-Propagation Through Time (BPTT) is done when updateParameters --- is called. The Module keeps a list of all previous representations --- (Module.outputs), including intermediate ones for BPTT. --- To use this module with batches, we suggest using different --- sequences of the same size within a batch and calling --- updateParameters() at the end of the Sequence. --- Note that this won't work with modules that use more than the --- output attribute to keep track of their internal state between --- forward and backward. ------------------------------------------------------------------------- -local Recurrent, parent = torch.class('nn.Recurrent', 'nn.AbstractRecurrent') - -function Recurrent:__init(start, input, feedback, transfer, rho, merge) - parent.__init(self, rho or 5) - - local ts = torch.type(start) - if ts == 'torch.LongTensor' or ts == 'number' then - start = nn.Add(start) - end - - self.startModule = start - self.inputModule = input - self.feedbackModule = feedback - self.transferModule = transfer or nn.Sigmoid() - self.mergeModule = merge or nn.CAddTable() - - self.modules = {self.startModule, self.inputModule, self.feedbackModule, self.transferModule, self.mergeModule} - - self:buildInitialModule() - self:buildRecurrentModule() - - self.initialOutputs = {} - self.initialGradInputs = {} -end - --- build module used for the first step (steps == 1) -function Recurrent:buildInitialModule() - self.initialModule = nn.Sequential() - self.initialModule:add(self.inputModule) - self.initialModule:add(self.startModule) - self.initialModule:add(self.transferModule) -end - --- build module used for the other steps (steps > 1) -function Recurrent:buildRecurrentModule() - local parallelModule = nn.ParallelTable() - parallelModule:add(self.inputModule) - parallelModule:add(self.feedbackModule) - self.recurrentModule = nn.Sequential() - self.recurrentModule:add(parallelModule) - self.recurrentModule:add(self.mergeModule) - self.recurrentModule:add(self.transferModule) -end - -function Recurrent:updateOutput(input) - -- output(t) = transfer(feedback(output_(t-1)) + input(input_(t))) - local output - if self.step == 1 then - -- set/save the output states - local modules = self.initialModule:listModules() - for i,modula in ipairs(modules) do - local output_ = self.recursiveResizeAs(self.initialOutputs[i], modula.output) - modula.output = output_ - end - output = self.initialModule:updateOutput(input) - for i,modula in ipairs(modules) do - self.initialOutputs[i] = modula.output - end - else - if self.train ~= false then - -- set/save the output states - local modules = self.recurrentModule:listModules() - self:recycle() - local recurrentOutputs = self.recurrentOutputs[self.step] - if not recurrentOutputs then - recurrentOutputs = {} - self.recurrentOutputs[self.step] = recurrentOutputs - end - for i,modula in ipairs(modules) do - local output_ = self.recursiveResizeAs(recurrentOutputs[i], modula.output) - modula.output = output_ - end - -- self.output is the previous output of this module - output = self.recurrentModule:updateOutput{input, self.output} - for i,modula in ipairs(modules) do - recurrentOutputs[i] = modula.output - end - else - -- self.output is the previous output of this module - output = self.recurrentModule:updateOutput{input, self.output} - end - end - - if self.train ~= false then - local input_ = self.inputs[self.step] - self.inputs[self.step] = self.copyInputs - and self.recursiveCopy(input_, input) - or self.recursiveSet(input_, input) - end - - self.outputs[self.step] = output - self.output = output - self.step = self.step + 1 - self.gradParametersAccumulated = false - return self.output -end - --- not to be confused with the hit movie Back to the Future -function Recurrent:backwardThroughTime() - assert(self.step > 1, "expecting at least one updateOutput") - local rho = math.min(self.rho, self.step-1) - local stop = self.step - rho - if self.fastBackward then - self.gradInputs = {} - local gradInput, gradPrevOutput - for step=self.step-1,math.max(stop, 2),-1 do - -- set the output/gradOutput states of current Module - local modules = self.recurrentModule:listModules() - local recurrentOutputs = self.recurrentOutputs[step] - local recurrentGradInputs = self.recurrentGradInputs[step] - if not recurrentGradInputs then - recurrentGradInputs = {} - self.recurrentGradInputs[step] = recurrentGradInputs - end - for i,modula in ipairs(modules) do - local output, gradInput = modula.output, modula.gradInput - assert(gradInput, "missing gradInput") - local output_ = recurrentOutputs[i] - assert(output_, "backwardThroughTime should be preceded by updateOutput") - modula.output = output_ - modula.gradInput = self.recursiveResizeAs(recurrentGradInputs[i], gradInput) - end - - -- backward propagate through this step - local input = self.inputs[step] - local output = self.outputs[step-1] - local gradOutput = self.gradOutputs[step] - if gradPrevOutput then - self.recursiveAdd(gradOutput, gradPrevOutput) - end - local scale = self.scales[step] - - gradInput, gradPrevOutput = unpack(self.recurrentModule:backward({input, output}, gradOutput, scale/rho)) - table.insert(self.gradInputs, 1, gradInput) - - for i,modula in ipairs(modules) do - recurrentGradInputs[i] = modula.gradInput - end - end - - if stop <= 1 then - -- set the output/gradOutput states of initialModule - local modules = self.initialModule:listModules() - for i,modula in ipairs(modules) do - modula.output = self.initialOutputs[i] - modula.gradInput = self.recursiveResizeAs(self.initialGradInputs[i], modula.gradInput) - end - - -- backward propagate through first step - local input = self.inputs[1] - local gradOutput = self.gradOutputs[1] - if gradPrevOutput then - self.recursiveAdd(gradOutput, gradPrevOutput) - end - local scale = self.scales[1] - gradInput = self.initialModule:backward(input, gradOutput, scale/rho) - table.insert(self.gradInputs, 1, gradInput) - - for i,modula in ipairs(modules) do - self.initialGradInputs[i] = modula.gradInput - end - - -- startModule's gradParams shouldn't be step-averaged - -- as it is used only once. So un-step-average it - local params, gradParams = self.startModule:parameters() - if gradParams then - for i,gradParam in ipairs(gradParams) do - gradParam:mul(rho) - end - end - - self.gradParametersAccumulated = true - return gradInput - end - else - local gradInput = self:updateGradInputThroughTime() - self:accGradParametersThroughTime() - return gradInput - end -end - -function Recurrent:updateGradInputThroughTime() - assert(self.step > 1, "expecting at least one updateOutput") - self.gradInputs = {} - local gradInput, gradPrevOutput - local rho = math.min(self.rho, self.step-1) - local stop = self.step - rho - for step=self.step-1,math.max(stop,2),-1 do - -- set the output/gradOutput states of current Module - local modules = self.recurrentModule:listModules() - local recurrentOutputs = self.recurrentOutputs[step] - local recurrentGradInputs = self.recurrentGradInputs[step] - if not recurrentGradInputs then - recurrentGradInputs = {} - self.recurrentGradInputs[step] = recurrentGradInputs - end - for i,modula in ipairs(modules) do - local output, gradInput = modula.output, modula.gradInput - local output_ = recurrentOutputs[i] - assert(output_, "updateGradInputThroughTime should be preceded by updateOutput") - modula.output = output_ - modula.gradInput = self.recursiveResizeAs(recurrentGradInputs[i], gradInput) - end - - -- backward propagate through this step - local input = self.inputs[step] - local output = self.outputs[step-1] - local gradOutput = self.gradOutputs[step] - if gradPrevOutput then - self.recursiveAdd(gradOutput, gradPrevOutput) - end - - gradInput, gradPrevOutput = unpack(self.recurrentModule:updateGradInput({input, output}, gradOutput)) - table.insert(self.gradInputs, 1, gradInput) - - for i,modula in ipairs(modules) do - recurrentGradInputs[i] = modula.gradInput - end - end - - if stop <= 1 then - -- set the output/gradOutput states of initialModule - local modules = self.initialModule:listModules() - for i,modula in ipairs(modules) do - modula.output = self.initialOutputs[i] - modula.gradInput = self.recursiveResizeAs(self.initialGradInputs[i], modula.gradInput) - end - - -- backward propagate through first step - local input = self.inputs[1] - local gradOutput = self.gradOutputs[1] - if gradPrevOutput then - self.recursiveAdd(gradOutput, gradPrevOutput) - end - gradInput = self.initialModule:updateGradInput(input, gradOutput) - table.insert(self.gradInputs, 1, gradInput) - - for i,modula in ipairs(modules) do - self.initialGradInputs[i] = modula.gradInput - end - end - - return gradInput -end - -function Recurrent:accGradParametersThroughTime() - local rho = math.min(self.rho, self.step-1) - local stop = self.step - rho - for step=self.step-1,math.max(stop,2),-1 do - -- set the output/gradOutput states of current Module - local modules = self.recurrentModule:listModules() - local recurrentOutputs = self.recurrentOutputs[step] - local recurrentGradInputs = self.recurrentGradInputs[step] - - for i,modula in ipairs(modules) do - local output, gradInput = modula.output, modula.gradInput - local output_ = recurrentOutputs[i] - local gradInput_ = recurrentGradInputs[i] - assert(output_, "accGradParametersThroughTime should be preceded by updateOutput") - assert(gradInput_, "accGradParametersThroughTime should be preceded by updateGradInputThroughTime") - modula.output = output_ - modula.gradInput = gradInput_ - end - - -- backward propagate through this step - local input = self.inputs[step] - local output = self.outputs[step-1] - local gradOutput = self.gradOutputs[step] - - local scale = self.scales[step] - self.recurrentModule:accGradParameters({input, output}, gradOutput, scale/rho) - end - - if stop <= 1 then - -- set the output/gradOutput states of initialModule - local modules = self.initialModule:listModules() - for i,modula in ipairs(modules) do - local output, gradInput = modula.output, modula.gradInput - local output_ = self.initialOutputs[i] - local gradInput_ = self.initialGradInputs[i] - modula.output = output_ - modula.gradInput = gradInput_ - end - - -- backward propagate through first step - local input = self.inputs[1] - local gradOutput = self.gradOutputs[1] - local scale = self.scales[1] - self.initialModule:accGradParameters(input, gradOutput, scale/rho) - - -- startModule's gradParams shouldn't be step-averaged - -- as it is used only once. So un-step-average it - local params, gradParams = self.startModule:parameters() - if gradParams then - for i,gradParam in ipairs(gradParams) do - gradParam:mul(rho) - end - end - end - - self.gradParametersAccumulated = true - return gradInput -end - -function Recurrent:accUpdateGradParametersThroughTime(lr) - local rho = math.min(self.rho, self.step-1) - local stop = self.step - rho - for step=self.step-1,math.max(stop,2),-1 do - -- set the output/gradOutput states of current Module - local modules = self.recurrentModule:listModules() - local recurrentOutputs = self.recurrentOutputs[step] - local recurrentGradInputs = self.recurrentGradInputs[step] - - for i,modula in ipairs(modules) do - local output, gradInput = modula.output, modula.gradInput - local output_ = recurrentOutputs[i] - local gradInput_ = recurrentGradInputs[i] - assert(output_, "accGradParametersThroughTime should be preceded by updateOutput") - assert(gradInput_, "accGradParametersThroughTime should be preceded by updateGradInputThroughTime") - modula.output = output_ - modula.gradInput = gradInput_ - end - - -- backward propagate through this step - local input = self.inputs[step] - local output = self.outputs[step-1] - local gradOutput = self.gradOutputs[step] - - local scale = self.scales[step] - self.recurrentModule:accUpdateGradParameters({input, output}, gradOutput, lr*scale/rho) - end - - if stop <= 1 then - -- set the output/gradOutput states of initialModule - local modules = self.initialModule:listModules() - for i,modula in ipairs(modules) do - local output, gradInput = modula.output, modula.gradInput - local output_ = self.initialOutputs[i] - local gradInput_ = self.initialGradInputs[i] - modula.output = output_ - modula.gradInput = gradInput_ - end - - -- backward propagate through first step - local input = self.inputs[1] - local gradOutput = self.gradOutputs[1] - local scale = self.scales[1] - self.inputModule:accUpdateGradParameters(input, self.startModule.gradInput, lr*scale/rho) - -- startModule's gradParams shouldn't be step-averaged as it is used only once. - self.startModule:accUpdateGradParameters(self.inputModule.output, self.transferModule.gradInput, lr*scale) - end - - return gradInput -end - -function Recurrent:forget() - parent.forget(self, 1) -end - -function Recurrent:__tostring__() - local tab = ' ' - local line = '\n' - local next = ' -> ' - local str = torch.type(self) - str = str .. ' {' .. line .. tab .. '[{input(t), output(t-1)}' - for i=1,3 do - str = str .. next .. '(' .. i .. ')' - end - str = str .. next .. 'output(t)]' - - local tab = ' ' - local line = '\n ' - local next = ' |`-> ' - local ext = ' | ' - local last = ' ... -> ' - str = str .. line .. '(1): ' .. ' {' .. line .. tab .. 'input(t)' - str = str .. line .. tab .. next .. '(t==0): ' .. tostring(self.startModule):gsub('\n', '\n' .. tab .. ext) - str = str .. line .. tab .. next .. '(t~=0): ' .. tostring(self.inputModule):gsub('\n', '\n' .. tab .. ext) - str = str .. line .. tab .. 'output(t-1)' - str = str .. line .. tab .. next .. tostring(self.feedbackModule):gsub('\n', line .. tab .. ext) - str = str .. line .. "}" - local tab = ' ' - local line = '\n' - local next = ' -> ' - str = str .. line .. tab .. '(' .. 2 .. '): ' .. tostring(self.mergeModule):gsub(line, line .. tab) - str = str .. line .. tab .. '(' .. 3 .. '): ' .. tostring(self.transferModule):gsub(line, line .. tab) - str = str .. line .. '}' - return str -end diff --git a/Repeater.lua b/Repeater.lua deleted file mode 100644 index 68ea41b..0000000 --- a/Repeater.lua +++ /dev/null @@ -1,87 +0,0 @@ ------------------------------------------------------------------------- ---[[ Repeater ]]-- --- Encapsulates an AbstractRecurrent instance (rnn) which is repeatedly --- presented with the same input for nStep time steps. --- The output is a table of nStep outputs of the rnn. ------------------------------------------------------------------------- -local Repeater, parent = torch.class("nn.Repeater", "nn.Container") - -function Repeater:__init(rnn, nStep) - parent.__init(self) - assert(torch.type(nStep) == 'number', "expecting number value for arg 2") - self.nStep = nStep - self.rnn = rnn - assert(rnn.backwardThroughTime, "expecting AbstractRecurrent instance for arg 1") - self.modules[1] = rnn - self.output = {} -end - -function Repeater:updateOutput(input) - self.rnn:forget() - for step=1,self.nStep do - self.output[step] = self.rnn:updateOutput(input) - end - return self.output -end - -local recursiveAdd = nn.AbstractRecurrent.recursiveAdd -local recursiveCopy = nn.AbstractRecurrent.recursiveCopy - -function Repeater:updateGradInput(input, gradOutput) - assert(self.rnn.step - 1 == self.nStep, "inconsistent rnn steps") - assert(torch.type(gradOutput) == 'table', "expecting gradOutput table") - assert(#gradOutput == self.nStep, "gradOutput should have nStep elements") - for step=1,self.nStep do - self.rnn.step = step + 1 - self.rnn:updateGradInput(input, gradOutput[step]) - end - -- back-propagate through time (BPTT) - self.rnn:updateGradInputThroughTime() - - for i,currentGradInput in ipairs(self.rnn.gradInputs) do - if i == 1 then - self.gradInput = recursiveCopy(self.gradInput, currentGradInput) - else - recursiveAdd(self.gradInput, currentGradInput) - end - end - - return self.gradInput -end - -function Repeater:accGradParameters(input, gradOutput, scale) - assert(self.rnn.step - 1 == self.nStep, "inconsistent rnn steps") - assert(torch.type(gradOutput) == 'table', "expecting gradOutput table") - assert(#gradOutput == self.nStep, "gradOutput should have nStep elements") - for step=1,self.nStep do - self.rnn.step = step + 1 - self.rnn:accGradParameters(input, gradOutput[step], scale) - end - -- back-propagate through time (BPTT) - self.rnn:accGradParametersThroughTime() -end - -function Repeater:accUpdateGradParameters(input, gradOutput, lr) - assert(self.rnn.step - 1 == self.nStep, "inconsistent rnn steps") - assert(torch.type(gradOutput) == 'table', "expecting gradOutput table") - assert(#gradOutput == self.nStep, "gradOutput should have nStep elements") - for step=1,self.nStep do - self.rnn.step = step + 1 - self.rnn:accGradParameters(input, gradOutput[step], 1) - end - -- back-propagate through time (BPTT) - self.rnn:accUpdateGradParametersThroughTime(lr) -end - -function Repeater:__tostring__() - local tab = ' ' - local line = '\n' - local str = torch.type(self) .. ' {' .. line - str = str .. tab .. '[ input, input, ..., input ]'.. line - str = str .. tab .. ' V V V '.. line - str = str .. tab .. tostring(self.modules[1]):gsub(line, line .. tab) .. line - str = str .. tab .. ' V V V '.. line - str = str .. tab .. '[output(1),output(2),...,output('..self.nStep..')]' .. line - str = str .. '}' - return str -end diff --git a/RepeaterCriterion.lua b/RepeaterCriterion.lua deleted file mode 100644 index a6ad078..0000000 --- a/RepeaterCriterion.lua +++ /dev/null @@ -1,49 +0,0 @@ ------------------------------------------------------------------------- ---[[ RepeaterCriterion ]]-- --- Applies a criterion to each of the inputs in a Table using the --- same target (the target is repeated). --- Useful for nn.Repeater and nn.Sequencer. ------------------------------------------------------------------------- -local RepeaterCriterion, parent = torch.class("nn.RepeaterCriterion", "nn.Criterion") - -function RepeaterCriterion:__init(criterion) - parent.__init(self) - self.criterion = criterion - self.gradInput = {} -end - -function RepeaterCriterion:forward(inputTable, target) - self.output = 0 - for i,input in ipairs(inputTable) do - self.output = self.output + self.criterion:forward(input, target) - end - return self.output -end - -function RepeaterCriterion:backward(inputTable, target) - for i,input in ipairs(inputTable) do - local gradInput = self.criterion:backward(input, target) - self.gradInput[i] = self.gradInput[i] or gradInput.new() - self.gradInput[i]:resizeAs(gradInput):copy(gradInput) - end - return self.gradInput -end - -local function recursiveType(param, type_str) - if torch.type(param) == 'table' then - for i = 1, #param do - param[i] = recursiveType(param[i], type_str) - end - else - if torch.typename(param) and - torch.typename(param):find('torch%..+Tensor') then - param = param:type(type_str) - end - end - return param -end - -function RepeaterCriterion:type(type) - self.gradInput = recursiveType(self.gradInput) - return self.criterion:type(type) -end diff --git a/Sequencer.lua b/Sequencer.lua deleted file mode 100644 index 97f9d6c..0000000 --- a/Sequencer.lua +++ /dev/null @@ -1,197 +0,0 @@ ------------------------------------------------------------------------- ---[[ Sequencer ]]-- --- Encapsulates a Module. --- Input is a sequence (a table) of tensors. --- Output is a sequence (a table) of tensors of the same length. --- Applies the module to each element in the sequence. --- Handles both recurrent modules and non-recurrent modules. --- The sequences in a batch must have the same size. --- But the sequence length of each batch can vary. ------------------------------------------------------------------------- -local Sequencer, parent = torch.class("nn.Sequencer", "nn.Container") - -function Sequencer:__init(module) - parent.__init(self) - self.module = module - self.isRecurrent = module.backwardThroughTime ~= nil - self.modules[1] = module - self.sequenceOutputs = {} - self.output = {} - self.step = 1 -end - -local function recursiveResizeAs(t1,t2) - if torch.type(t2) == 'table' then - t1 = (torch.type(t1) == 'table') and t1 or {t1} - for key,_ in pairs(t2) do - t1[key], t2[key] = recursiveResizeAs(t1[key], t2[key]) - end - elseif torch.isTensor(t2) then - t1 = t1 or t2.new() - t1:resizeAs(t2) - else - error("expecting nested tensors or tables. Got ".. - torch.type(t1).." and "..torch.type(t2).." instead") - end - return t1, t2 -end - - -function Sequencer:updateOutput(inputTable) - assert(torch.type(inputTable) == 'table', "expecting input table") - self.output = {} - if self.isRecurrent then - self.module:forget() - for step, input in ipairs(inputTable) do - self.output[step] = self.module:updateOutput(input) - end - else - for step, input in ipairs(inputTable) do - -- set output states for this step - local modules = self.module:listModules() - local sequenceOutputs = self.sequenceOutputs[step] - if not sequenceOutputs then - sequenceOutputs = {} - self.sequenceOutputs[step] = sequenceOutputs - end - for i,modula in ipairs(modules) do - local output_ = recursiveResizeAs(sequenceOutputs[i], modula.output) - modula.output = output_ - end - - -- forward propagate this step - self.output[step] = self.module:updateOutput(input) - - -- save output state of this step - for i,modula in ipairs(modules) do - sequenceOutputs[i] = modula.output - end - end - end - return self.output -end - -function Sequencer:updateGradInput(inputTable, gradOutputTable) - self.gradInput = {} - if self.isRecurrent then - assert(torch.type(gradOutputTable) == 'table', "expecting gradOutput table") - assert(#gradOutputTable == #inputTable, "gradOutput should have as many elements as input") - for step, input in ipairs(inputTable) do - self.module.step = step + 1 - self.module:updateGradInput(input, gradOutputTable[step]) - end - -- back-propagate through time (BPTT) - self.module:updateGradInputThroughTime() - assert(self.module.gradInputs, "recurrent module did not fill gradInputs") - for step=1,#inputTable do - self.gradInput[step] = self.module.gradInputs[step] - end - assert(#self.gradInput == #inputTable, "missing gradInputs") - else - for step, input in ipairs(inputTable) do - -- set the output/gradOutput states for this step - local modules = self.module:listModules() - local sequenceOutputs = self.sequenceOutputs[step] - local sequenceGradInputs = self.sequenceGradInputs[step] - if not sequenceGradInputs then - sequenceGradInputs = {} - self.sequenceGradInputs[step] = sequenceGradInputs - end - for i,modula in ipairs(modules) do - local output, gradInput = modula.output, modula.gradInput - local output_ = sequenceOutputs[i] - assert(output_, "updateGradInputThroughTime should be preceded by updateOutput") - modula.output = output_ - modula.gradInput = recursiveResizeAs(sequenceGradInputs[i], gradInput) - end - - -- backward propagate this step - self.gradInput[step] = self.module:updateGradInput(input, gradOutputTable[step]) - - -- save the output/gradOutput states of this step - for i,modula in ipairs(modules) do - sequenceGradInputs[i] = modula.gradInput - end - end - end - return self.gradInput -end - -function Sequencer:accGradParameters(inputTable, gradOutputTable, scale) - if self.isRecurrent then - assert(torch.type(gradOutputTable) == 'table', "expecting gradOutput table") - assert(#gradOutputTable == #inputTable, "gradOutput should have as many elements as input") - for step, input in ipairs(inputTable) do - self.module.step = step + 1 - self.module:accGradParameters(input, gradOutputTable[step], scale) - end - -- back-propagate through time (BPTT) - self.module:accGradParametersThroughTime() - else - for step, input in ipairs(inputTable) do - -- set the output/gradOutput states for this step - local modules = self.module:listModules() - local sequenceOutputs = self.sequenceOutputs[step] - local sequenceGradInputs = self.sequenceGradInputs[step] - if not sequenceGradInputs then - sequenceGradInputs = {} - self.sequenceGradInputs[step] = sequenceGradInputs - end - for i,modula in ipairs(modules) do - local output, gradInput = modula.output, modula.gradInput - local output_ = sequenceOutputs[i] - modula.output = output_ - modula.gradInput = recursiveResizeAs(sequenceGradInputs[i], gradInput) - end - - -- accumulate parameters for this step - self.module:accGradParameters(input, gradOutputTable[step], scale) - end - end -end - -function Sequencer:accUpdateGradParameters(input, gradOutput, lr) - if self.isRecurrent then - assert(torch.type(gradOutputTable) == 'table', "expecting gradOutput table") - assert(#gradOutputTable == #inputTable, "gradOutput should have as many elements as input") - for step, input in ipairs(inputTable) do - self.module.step = step + 1 - self.module:accGradParameters(input, gradOutputTable[step], 1) - end - -- back-propagate through time (BPTT) - self.module:accUpdateGradParametersThroughTime(lr) - else - for step, input in ipairs(inputTable) do - -- set the output/gradOutput states for this step - local modules = self.module:listModules() - local sequenceOutputs = self.sequenceOutputs[step] - local sequenceGradInputs = self.sequenceGradInputs[step] - if not sequenceGradInputs then - sequenceGradInputs = {} - self.sequenceGradInputs[step] = sequenceGradInputs - end - for i,modula in ipairs(modules) do - local output, gradInput = modula.output, modula.gradInput - local output_ = sequenceOutputs[i] - modula.output = output_ - modula.gradInput = recursiveResizeAs(sequenceGradInputs[i], gradInput) - end - - -- accumulate parameters for this step - self.module:accUpdateGradParameters(input, gradOutputTable[step], lr) - end - end -end - -function Sequencer:__tostring__() - local tab = ' ' - local line = '\n' - local str = torch.type(self) .. ' {' .. line - str = str .. tab .. '[input(1), input(2), ..., input(T)]'.. line - str = str .. tab .. ' V V V '.. line - str = str .. tab .. tostring(self.modules[1]):gsub(line, line .. tab) .. line - str = str .. tab .. ' V V V '.. line - str = str .. tab .. '[output(1),output(2),...,output(T)]' .. line - str = str .. '}' - return str -end @@ -39,9 +39,6 @@ require 'libnnx' torch.include('nnx', 'test-all.lua') torch.include('nnx', 'test-omp.lua') --- extensions of nn modules -torch.include('nnx', 'Module.lua') - -- tools: torch.include('nnx', 'Probe.lua') torch.include('nnx', 'Tic.lua') @@ -74,24 +71,15 @@ torch.include('nnx', 'Minus.lua') torch.include('nnx', 'SoftMaxTree.lua') torch.include('nnx', 'MultiSoftMax.lua') torch.include('nnx', 'Balance.lua') -torch.include('nnx', 'NarrowLookupTable.lua') torch.include('nnx', 'PushTable.lua') torch.include('nnx', 'PullTable.lua') torch.include('nnx', 'ZeroGrad.lua') --- recurrent -torch.include('nnx', 'AbstractRecurrent.lua') -torch.include('nnx', 'Recurrent.lua') -torch.include('nnx', 'LSTM.lua') -torch.include('nnx', 'Repeater.lua') -torch.include('nnx', 'Sequencer.lua') - -- criterions: torch.include('nnx', 'SuperCriterion.lua') torch.include('nnx', 'DistNLLCriterion.lua') torch.include('nnx', 'DistMarginCriterion.lua') torch.include('nnx', 'TreeNLLCriterion.lua') -torch.include('nnx', 'RepeaterCriterion.lua') -- datasets: torch.include('nnx', 'DataSet.lua') diff --git a/test/test-all.lua b/test/test-all.lua index 0b27f2a..2ef1977 100644 --- a/test/test-all.lua +++ b/test/test-all.lua @@ -286,482 +286,6 @@ function nnxtest.SpatialConvolution() mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') end -function nnxtest.Module_listModules() - local batchSize = 4 - local inputSize, outputSize = 7, 6 - local linear = nn.Linear(inputSize, outputSize) - local tanh = nn.Tanh() - local reshape = nn.Reshape(outputSize/2, 2) - local mlp3 = nn.Sequential() - mlp3:add(linear) - mlp3:add(tanh) - mlp3:add(reshape) - - local mlp2 = nn.Sequential() - local view = nn.View(outputSize) - local linear2 = nn.Linear(outputSize, inputSize) - local tanh2 = nn.Tanh() - mlp2:add(mlp3) - mlp2:add(view) - mlp2:add(linear2) - mlp2:add(tanh2) - - local concat = nn.ConcatTable() - local id = nn.Identity() - concat:add(mlp2) - concat:add(id) - local mlp = nn.Sequential() - local add = nn.CAddTable() - mlp:add(concat) - mlp:add(add) - - local modules2 = {mlp, concat, mlp2, mlp3, linear, tanh, reshape, view, linear2, tanh2, id, add} - local modules = mlp:listModules() - - mytester:assert(#modules2 == #modules, 'missing modules error') - - for i,module in ipairs(modules) do - mytester:assert(torch.type(module) == torch.type(modules2[i]), 'module error') - end - -end - -function nnxtest.Recurrent() - local batchSize = 4 - local inputSize = 10 - local hiddenSize = 12 - local outputSize = 7 - local nSteps = 5 - local inputModule = nn.Linear(inputSize, outputSize) - local transferModule = nn.Sigmoid() - -- test MLP feedback Module (because of Module:representations()) - local feedbackModule = nn.Sequential() - feedbackModule:add(nn.Linear(outputSize, hiddenSize)) - feedbackModule:add(nn.Sigmoid()) - feedbackModule:add(nn.Linear(hiddenSize, outputSize)) - -- rho = nSteps - local mlp = nn.Recurrent(outputSize, inputModule, feedbackModule, transferModule:clone(), nSteps) - - -- test that the internal mlps are recursable : - local isRecursable = nn.AbstractRecurrent.isRecursable - mytester:assert(isRecursable(mlp.initialModule, torch.randn(inputSize)), "Recurrent isRecursable() initial error") - mytester:assert(isRecursable(mlp.recurrentModule, {torch.randn(inputSize), torch.randn(outputSize)}), "Recurrent isRecursable() recurrent error") - - -- test that the above test actually works - local euclidean = nn.Euclidean(inputSize, outputSize) - mytester:assert(not isRecursable(euclidean, torch.randn(batchSize, inputSize)), "AbstractRecurrent.isRecursable error") - - local gradOutputs, outputs = {}, {} - -- inputs = {inputN, {inputN-1, {inputN-2, ...}}}}} - local inputs - local startModule = mlp.startModule:clone() - inputModule = mlp.inputModule:clone() - feedbackModule = mlp.feedbackModule:clone() - - local mlp6 = mlp:clone() - mlp6:evaluate() - - mlp:zeroGradParameters() - local mlp7 = mlp:clone() - mlp7.rho = nSteps - 1 - local inputSequence = {} - for step=1,nSteps do - local input = torch.randn(batchSize, inputSize) - inputSequence[step] = input - local gradOutput - if step ~= nSteps then - -- for the sake of keeping this unit test simple, - gradOutput = torch.zeros(batchSize, outputSize) - else - -- only the last step will get a gradient from the output - gradOutput = torch.randn(batchSize, outputSize) - end - - local output = mlp:forward(input) - mlp:backward(input, gradOutput) - - local output6 = mlp6:forward(input) - mytester:assertTensorEq(output, output6, 0.000001, "evaluation error "..step) - - local output7 = mlp7:forward(input) - mlp7:backward(input, gradOutput) - mytester:assertTensorEq(output, output7, 0.000001, "rho = nSteps-1 forward error "..step) - - table.insert(gradOutputs, gradOutput) - table.insert(outputs, output:clone()) - - if inputs then - inputs = {input, inputs} - else - inputs = input - end - end - local mlp4 = mlp:clone() - local mlp5 = mlp:clone() - - -- backward propagate through time (BPTT) - local gradInput = mlp:backwardThroughTime():clone() - mlp:forget() -- test ability to forget - mlp:zeroGradParameters() - local foutputs = {} - for step=1,nSteps do - foutputs[step] = mlp:forward(inputSequence[step]) - mytester:assertTensorEq(foutputs[step], outputs[step], 0.00001, "Recurrent forget output error "..step) - mlp:backward(input, gradOutputs[step]) - end - local fgradInput = mlp:backwardThroughTime():clone() - mytester:assertTensorEq(gradInput, fgradInput, 0.00001, "Recurrent forget gradInput error") - - mlp4.fastBackward = false - local gradInput4 = mlp4:backwardThroughTime() - mytester:assertTensorEq(gradInput, gradInput4, 0.000001, 'error slow vs fast backwardThroughTime') - local mlp10 = mlp7:clone() - mytester:assert(mlp10.inputs[1] == nil, 'recycle inputs error') - mlp10:forget() - mytester:assert(#mlp10.inputs == 4, 'forget inputs error') - mytester:assert(#mlp10.outputs == 5, 'forget outputs error') - local i = 0 - for k,v in pairs(mlp10.recurrentOutputs) do - i = i + 1 - end - mytester:assert(i == 4, 'forget recurrentOutputs error') - - -- rho = nSteps - 1 : shouldn't update startModule - mlp7:backwardThroughTime() - - local mlp2 -- this one will simulate rho = nSteps - local outputModules = {} - for step=1,nSteps do - local inputModule_ = inputModule:clone() - local outputModule = transferModule:clone() - table.insert(outputModules, outputModule) - inputModule_:share(inputModule, 'weight', 'gradWeight', 'bias', 'gradBias') - if step == 1 then - local initialModule = nn.Sequential() - initialModule:add(inputModule_) - initialModule:add(startModule) - initialModule:add(outputModule) - mlp2 = initialModule - else - local parallelModule = nn.ParallelTable() - parallelModule:add(inputModule_) - local pastModule = nn.Sequential() - pastModule:add(mlp2) - local feedbackModule_ = feedbackModule:clone() - feedbackModule_:share(feedbackModule, 'weight', 'gradWeight', 'bias', 'gradBias') - pastModule:add(feedbackModule_) - parallelModule:add(pastModule) - local recurrentModule = nn.Sequential() - recurrentModule:add(parallelModule) - recurrentModule:add(nn.CAddTable()) - recurrentModule:add(outputModule) - mlp2 = recurrentModule - end - end - - - local output2 = mlp2:forward(inputs) - mlp2:zeroGradParameters() - - -- unlike mlp2, mlp8 will simulate rho = nSteps -1 - local mlp8 = mlp2:clone() - local inputModule8 = mlp8.modules[1].modules[1] - local m = mlp8.modules[1].modules[2].modules[1].modules[1].modules[2] - m = m.modules[1].modules[1].modules[2].modules[1].modules[1].modules[2] - local feedbackModule8 = m.modules[2] - local startModule8 = m.modules[1].modules[2] -- before clone - -- unshare the intialModule: - m.modules[1] = m.modules[1]:clone() - m.modules[2] = m.modules[2]:clone() - mlp8:backward(inputs, gradOutputs[#gradOutputs]) - - local gradInput2 = mlp2:backward(inputs, gradOutputs[#gradOutputs]) - for step=1,nSteps-1 do - gradInput2 = gradInput2[2] - end - - mytester:assertTensorEq(gradInput, gradInput2, 0.000001, "recurrent gradInput") - mytester:assertTensorEq(outputs[#outputs], output2, 0.000001, "recurrent output") - for step=1,nSteps do - local output, outputModule = outputs[step], outputModules[step] - mytester:assertTensorEq(output, outputModule.output, 0.000001, "recurrent output step="..step) - end - - local mlp3 = nn.Sequential() - -- contains params and grads of mlp2 (the MLP version of the Recurrent) - mlp3:add(startModule):add(inputModule):add(feedbackModule) - local params2, gradParams2 = mlp3:parameters() - local params, gradParams = mlp:parameters() - mytester:assert(#params2 == #params, 'missing parameters') - mytester:assert(#gradParams == #params, 'missing gradParameters') - for i=1,#params do - if i > 1 then - gradParams2[i]:div(nSteps) - end - mytester:assertTensorEq(gradParams[i], gradParams2[i], 0.000001, 'gradParameter error ' .. i) - end - - local mlp9 = nn.Sequential() - -- contains params and grads of mlp8 - mlp9:add(startModule8):add(inputModule8):add(feedbackModule8) - local params9, gradParams9 = mlp9:parameters() - local params7, gradParams7 = mlp7:parameters() - mytester:assert(#params9 == #params7, 'missing parameters') - mytester:assert(#gradParams7 == #params7, 'missing gradParameters') - for i=1,#params do - if i > 1 then - gradParams9[i]:div(nSteps-1) - end - mytester:assertTensorEq(gradParams7[i], gradParams9[i], 0.00001, 'gradParameter error ' .. i) - end - - -- already called backwardThroughTime() - mlp:updateParameters(0.1) - mlp4:updateParameters(0.1) - - local params4 = mlp4:parameters() - local params5 = mlp5:parameters() - local params = mlp:parameters() - mytester:assert(#params4 == #params, 'missing parameters') - mytester:assert(#params5 == #params, 'missing parameters') - for i=1,#params do - mytester:assertTensorEq(params[i], params4[i], 0.000001, 'backwardThroughTime error ' .. i) - mytester:assertTensorNe(params[i], params5[i], 0.0000000001, 'backwardThroughTime error ' .. i) - end - - -- should call backwardUpdateThroughTime() - mlp5:updateParameters(0.1) - - local params5 = mlp5:parameters() - local params = mlp:parameters() - mytester:assert(#params5 == #params, 'missing parameters') - for i=1,#params do - mytester:assertTensorEq(params[i], params5[i], 0.000001, 'backwardUpdateThroughTime error ' .. i) - end -end - -function nnxtest.Recurrent_TestTable() - -- Set up RNN where internal state is a table. - -- Trivial example is same RNN from nnxtest.Recurrent test - -- but all layers are duplicated - local batchSize = 4 - local inputSize = 10 - local hiddenSize = 12 - local outputSize = 7 - local nSteps = 5 - local inputModule = nn.Linear(inputSize, outputSize) - local transferModule = nn.Sigmoid() - local learningRate = 0.1 - -- test MLP feedback Module - local feedbackModule = nn.Sequential() - feedbackModule:add(nn.Linear(outputSize, hiddenSize)) - feedbackModule:add(nn.Sigmoid()) - feedbackModule:add(nn.Linear(hiddenSize, outputSize)) - -- rho = nSteps - local mlp = nn.Recurrent( - nn.ParallelTable() - :add(nn.Add(outputSize)) - :add(nn.Add(outputSize)), - nn.ParallelTable() - :add(inputModule:clone()) - :add(inputModule:clone()), - nn.ParallelTable() - :add(feedbackModule:clone()) - :add(feedbackModule:clone()), - nn.ParallelTable() - :add(transferModule:clone()) - :add(transferModule:clone()), - nSteps, - nn.ParallelTable() - :add(nn.CAddTable()) - :add(nn.CAddTable()) - ) - - local input = torch.randn(batchSize, inputSize) - local err = torch.randn(batchSize, outputSize) - for i=1,10 do - mlp:forward{input, input:clone()} - mlp:backward({input, input:clone()}, {err, err:clone()}) - end - mlp:backwardThroughTime(learningRate) -end - -function nnxtest.LSTM() - local batchSize = math.random(1,2) - local inputSize = math.random(3,4) - local outputSize = math.random(5,6) - local nStep = 3 - local input = {} - local gradOutput = {} - for step=1,nStep do - input[step] = torch.randn(batchSize, inputSize) - if step == nStep then - -- for the sake of keeping this unit test simple, - gradOutput[step] = torch.randn(batchSize, outputSize) - else - -- only the last step will get a gradient from the output - gradOutput[step] = torch.zeros(batchSize, outputSize) - end - end - local lstm = nn.LSTM(inputSize, outputSize) - - local isRecursable = nn.AbstractRecurrent.isRecursable - local inputTable = {torch.randn(batchSize, inputSize), torch.randn(batchSize, outputSize), torch.randn(batchSize, outputSize)} - mytester:assert(isRecursable(lstm.recurrentModule, inputTable), "LSTM isRecursable() error") - - -- we will use this to build an LSTM step by step (with shared params) - local lstmStep = lstm.recurrentModule:clone() - - -- forward/backward through LSTM - local output = {} - lstm:zeroGradParameters() - for step=1,nStep do - output[step] = lstm:forward(input[step]) - assert(torch.isTensor(input[step])) - lstm:backward(input[step], gradOutput[step], 1) - end - local gradInput = lstm:backwardThroughTime() - - local mlp2 -- this one will simulate rho = nSteps - local inputs - for step=1,nStep do - -- iteratively build an LSTM out of non-recurrent components - local lstm = lstmStep:clone() - lstm:share(lstmStep, 'weight', 'gradWeight', 'bias', 'gradBias') - if step == 1 then - mlp2 = lstm - else - local rnn = nn.Sequential() - local para = nn.ParallelTable() - para:add(nn.Identity()):add(mlp2) - rnn:add(para) - rnn:add(nn.FlattenTable()) - rnn:add(lstm) - mlp2 = rnn - end - - -- prepare inputs for mlp2 - if inputs then - inputs = {input[step], inputs} - else - inputs = {input[step], torch.zeros(batchSize, outputSize), torch.zeros(batchSize, outputSize)} - end - end - mlp2:add(nn.SelectTable(1)) --just output the output (not cell) - local output2 = mlp2:forward(inputs) - - mlp2:zeroGradParameters() - local gradInput2 = mlp2:backward(inputs, gradOutput[nStep], 1/nStep) - mytester:assertTensorEq(gradInput2[2][2][1], gradInput, 0.00001, "LSTM gradInput error") - mytester:assertTensorEq(output[nStep], output2, 0.00001, "LSTM output error") - - local params, gradParams = lstm:parameters() - local params2, gradParams2 = lstmStep:parameters() - mytester:assert(#params == #params2, "LSTM parameters error "..#params.." ~= "..#params2) - for i, gradParam in ipairs(gradParams) do - local gradParam2 = gradParams2[i] - mytester:assertTensorEq(gradParam, gradParam2, 0.000001, - "LSTM gradParam "..i.." error "..tostring(gradParam).." "..tostring(gradParam2)) - end - - gradParams = lstm.recursiveCopy(nil, gradParams) - gradInput = gradInput:clone() - mytester:assert(lstm.zeroTensor:sum() == 0, "zeroTensor error") - lstm:forget() - output = lstm.recursiveCopy(nil, output) - local output3 = {} - lstm:zeroGradParameters() - for step=1,nStep do - output3[step] = lstm:forward(input[step]) - lstm:backward(input[step], gradOutput[step], 1) - end - local gradInput3 = lstm:updateGradInputThroughTime() - lstm:accGradParametersThroughTime() - - mytester:assert(#output == #output3, "LSTM output size error") - for i,output in ipairs(output) do - mytester:assertTensorEq(output, output3[i], 0.00001, "LSTM forget (updateOutput) error "..i) - end - - mytester:assertTensorEq(gradInput, gradInput3, 0.00001, "LSTM updateGradInputThroughTime error") - --if true then return end - local params3, gradParams3 = lstm:parameters() - mytester:assert(#params == #params3, "LSTM parameters error "..#params.." ~= "..#params3) - for i, gradParam in ipairs(gradParams) do - local gradParam3 = gradParams3[i] - mytester:assertTensorEq(gradParam, gradParam3, 0.000001, - "LSTM gradParam "..i.." error "..tostring(gradParam).." "..tostring(gradParam3)) - end -end - -function nnxtest.Sequencer() - local batchSize = 4 - local inputSize = 10 - local outputSize = 7 - local nSteps = 5 - local inputModule = nn.Linear(inputSize, outputSize) - local transferModule = nn.Sigmoid() - -- test MLP feedback Module (because of Module:representations()) - local feedbackModule = nn.Linear(outputSize, outputSize) - -- rho = nSteps - local rnn = nn.Recurrent(outputSize, inputModule, feedbackModule, transferModule, nSteps) - local rnn2 = rnn:clone() - - local inputs, outputs, gradOutputs = {}, {}, {} - for step=1,nSteps do - inputs[step] = torch.randn(batchSize, inputSize) - outputs[step] = rnn:forward(inputs[step]) - gradOutputs[step] = torch.randn(batchSize, outputSize) - rnn:backward(inputs[step], gradOutputs[step]) - end - rnn:backwardThroughTime() - - local rnn3 = nn.Sequencer(rnn2) - local outputs3 = rnn3:forward(inputs) - local gradInputs3 = rnn3:backward(inputs, gradOutputs) - mytester:assert(#outputs3 == #outputs, "Sequencer output size err") - mytester:assert(#gradInputs3 == #rnn.gradInputs, "Sequencer gradInputs size err") - for step,output in ipairs(outputs) do - mytester:assertTensorEq(outputs3[step], output, 0.00001, "Sequencer output "..step) - mytester:assertTensorEq(gradInputs3[step], rnn.gradInputs[step], 0.00001, "Sequencer gradInputs "..step) - end -end - -function nnxtest.Repeater() - local batchSize = 4 - local inputSize = 10 - local outputSize = 7 - local nSteps = 5 - local inputModule = nn.Linear(inputSize, outputSize) - local transferModule = nn.Sigmoid() - -- test MLP feedback Module (because of Module:representations()) - local feedbackModule = nn.Linear(outputSize, outputSize) - -- rho = nSteps - local rnn = nn.Recurrent(outputSize, inputModule, feedbackModule, transferModule, nSteps) - local rnn2 = rnn:clone() - - local inputs, outputs, gradOutputs = {}, {}, {} - local input = torch.randn(batchSize, inputSize) - for step=1,nSteps do - outputs[step] = rnn:forward(input) - gradOutputs[step] = torch.randn(batchSize, outputSize) - rnn:backward(input, gradOutputs[step]) - end - rnn:backwardThroughTime() - - local rnn3 = nn.Repeater(rnn2, nSteps) - local outputs3 = rnn3:forward(input) - local gradInput3 = rnn3:backward(input, gradOutputs) - mytester:assert(#outputs3 == #outputs, "Repeater output size err") - mytester:assert(#outputs3 == #rnn.gradInputs, "Repeater gradInputs size err") - local gradInput = rnn.gradInputs[1]:clone():zero() - for step,output in ipairs(outputs) do - mytester:assertTensorEq(outputs3[step], output, 0.00001, "Sequencer output "..step) - gradInput:add(rnn.gradInputs[step]) - end - mytester:assertTensorEq(gradInput3, gradInput, 0.00001, "Repeater gradInput err") -end - function nnxtest.SpatialNormalization_Gaussian2D() local inputSize = math.random(11,20) local kersize = 9 |