diff options
author | Natalia Gimelshein <ngimelshein@nvidia.com> | 2017-04-26 02:29:24 +0300 |
---|---|---|
committer | Natalia Gimelshein <ngimelshein@nvidia.com> | 2017-04-26 02:29:24 +0300 |
commit | 39d6c28b1eddb9632c90c18d1e2fc104ebdf259f (patch) | |
tree | 29eaa0dce7a89d74086f297a581c13fe6a7ee72c | |
parent | 4a50d76a79cf432e69e066f0348d085bc2d469c6 (diff) | |
parent | a418f1e530cee811ab10e97a5388b62a6865aa20 (diff) |
Merge branch 'master' into HEAD
-rw-r--r-- | RNN.lua | 336 | ||||
-rw-r--r-- | test/test_rnn.lua | 216 |
2 files changed, 486 insertions, 66 deletions
@@ -28,6 +28,7 @@ function RNN:__init(inputSize, hiddenSize, numLayers, batchFirst, dropout, remem self.batchFirst = batchFirst or false -- Set to true for batch x time x inputdim. self.rememberStates = rememberStates or false self.sync = true + self.inputPacked = false self.gradInput = torch.CudaTensor() self.output = torch.CudaTensor() self.weight = torch.CudaTensor() @@ -65,7 +66,8 @@ function RNN:reset(stdv) self:resetDropoutDescriptor() self:resetRNNDescriptor() - self:resetIODescriptors() + self:resetInputDescriptor() + self:resetOutputDescriptor() local weightSizePtr = ffi.new("size_t[1]") errcheck('cudnnGetRNNParamsSize', @@ -186,28 +188,60 @@ function RNN:resetWeightDescriptor() ) end -function RNN:resetIODescriptors() +function RNN:resetInputDescriptor(input, batchSizes) self.xDescs = self:createTensorDescriptors(self.seqLength) - self.yDescs = self:createTensorDescriptors(self.seqLength) - for i = 0, self.seqLength - 1 do - local dim = torch.IntTensor({ self.miniBatch,self.inputSize, 1}) - local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1}) - errcheck('cudnnSetTensorNdDescriptor', - self.xDescs[i], - self.datatype, - 3, - dim:data(), - stride:data()) + if self.inputPacked and input ~= nil and batchSizes ~= nil then + assert(#batchSizes == self.seqLength) + for i = 0, self.seqLength - 1 do + -- tensor shape is (# of sequences in the batch at the timestep, inputSize, 1 (for cudnn)) + local dim = torch.IntTensor({batchSizes[i+1], input:size(2), 1}) + local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1}) + errcheck('cudnnSetTensorNdDescriptor', + self.xDescs[i], + self.datatype, + 3, + dim:data(), + stride:data()) + end + else + for i = 0, self.seqLength - 1 do + local dim = torch.IntTensor({ self.miniBatch,self.inputSize, 1}) + local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1}) + errcheck('cudnnSetTensorNdDescriptor', + self.xDescs[i], + self.datatype, + 3, + dim:data(), + stride:data()) + end + end +end - local dim = torch.IntTensor({self.miniBatch, self.hiddenSize * self.numDirections, 1}) - local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1}) - errcheck('cudnnSetTensorNdDescriptor', - self.yDescs[i], - self.datatype, - 3, - dim:data(), - stride:data()) +function RNN:resetOutputDescriptor(output, batchSizes) + self.yDescs = self:createTensorDescriptors(self.seqLength) + if self.inputPacked and output ~= nil and batchSizes ~= nil then + for i = 0, self.seqLength - 1 do + local dim = torch.IntTensor({batchSizes[i+1], self.hiddenSize * self.numDirections, 1}) + local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1}) + errcheck('cudnnSetTensorNdDescriptor', + self.yDescs[i], + self.datatype, + 3, + dim:data(), + stride:data()) + end + else + for i = 0, self.seqLength - 1 do + local dim = torch.IntTensor({self.miniBatch, self.hiddenSize * self.numDirections, 1}) + local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1}) + errcheck('cudnnSetTensorNdDescriptor', + self.yDescs[i], + self.datatype, + 3, + dim:data(), + stride:data()) + end end end @@ -265,10 +299,6 @@ function RNN:makeContiguous(input, gradOutput) return input, gradOutput end -function RNN:resizeOutput(tensor) - return tensor:resize(self.seqLength, self.miniBatch, self.hiddenSize * self.numDirections) -end - function RNN:resizeHidden(tensor) return tensor:resize(self.numLayers * self.numDirections, self.miniBatch, self.hiddenSize) end @@ -292,14 +322,145 @@ function RNN:resetStates() end end +-- input a TxBx* tensor (or BxTx* if batchFirst) where T is the length +-- of the longest sequence, B is the batch size, and * is any number of +-- dimensions. +-- +-- lengths is a table of sequence lengths, which should be sorted in +-- decreasing order. +-- +-- returns a table containing a packed tensor of size (sum of lengths x *) +-- and a list of batch sizes per timestep, i.e. the number of sequences +-- with at least timestep elements. +function RNN:packPaddedSequence(input, lengths, batchFirst) + if batchFirst then + input = input:transpose(1, 2) + end + local batches = {} + local bszpts = {} + local lengthsIdx = #lengths + local currentLength = lengths[lengthsIdx] + local steps = input:size(1) + local bsz = input:size(2) + if bsz ~= #lengths then + error("lengths array has incorrect size (expected: " .. bsz .. "but found: " .. #lengths ..")") + end -function RNN:updateOutput(input) - if (self.batchFirst) then - input = input:transpose(1, 2) + for ts = 1, steps do + table.insert(batches, input[ts]:narrow(1, 1, bsz)) + table.insert(bszpts, bsz) + + while ts == currentLength do + if lengthsIdx == 0 then + currentLength = nil + break + else + lengthsIdx = lengthsIdx - 1 + bsz = bsz - 1 + local nextLength = lengths[lengthsIdx] + if currentLength ~= nil and nextLength ~= nil and currentLength > nextLength then + error("lengths array has to be sorted in decreasing order") + end + currentLength = lengths[lengthsIdx] + end + end + + if currentLength == nil then + break + end + end + + return {torch.cat(batches, 1), bszpts} +end + +-- An inverse operation to packPaddedSequence(...) above. Takes a sequence (i.e. +-- a Tensor, bszpts table with the format as returned by packPaddedSequence and +-- reconverts it into the TxBx* (or BxTx* if batchFirst) tensor and lengths array +function RNN:padPackedSequence(seq, batchFirst) + local data, bszpts = unpack(seq) + local maxBatchSize = bszpts[1] + local outputSize = torch.LongStorage(2 + data[1]:nDimension()) + outputSize[1] = #bszpts + outputSize[2] = maxBatchSize + for i = 1, data[1]:nDimension() do + outputSize[i + 2] = data[1]:size(i) + end + local output = torch.Tensor():typeAs(data):resize(outputSize):zero() + + local lengths = {} + local offset = 1 + local pbsz = bszpts[1] + local bsz = nil + + local i = 1 + while i <= #bszpts do + bsz = bszpts[i] + output[i]:narrow(1, 1, bsz):copy(data:narrow(1, offset, bsz)) + offset = offset + bsz + + local dec = pbsz - bsz + for j = 1, dec do + table.insert(lengths, i - 1) + end + pbsz = bsz + i = i + 1 + end + for j = 1, bsz do + table.insert(lengths, i - 1) end - assert(input:dim() == 3, 'input must have 3 dimensions: seqLength, miniBatch, inputSize') + + -- reverse lengths list + local reversed = {} + for i = #lengths, 1, -1 do + table.insert(reversed, lengths[i]) + end + + if batchFirst then + output = output:transpose(1, 2) + end + return output, reversed +end + +-- it feels a little dirty setting this function on the class as opposed +-- to having it be functional, but because we need to access class state, +-- here we are... +function RNN:deriveOutputSize(input) + if self.inputPacked then + return torch.LongStorage({input:size(1), self.hiddenSize * self.numDirections}) + else + return torch.LongStorage({self.seqLength, self.miniBatch, self.hiddenSize * self.numDirections}) + end +end + +-- updateOutput takes either of the following as inputs: +-- +-- 1. A seqLength x miniBatch x inputSize Tensor, where seqLength is the +-- length of the sequence for every input in the batch, miniBatch is the +-- number of elements in the batch, and inputSize is the size of the input vectors +-- at each time step +-- +-- OR +-- +-- 2. A table containing a packed tensor and a list of batch sizes per timestep. In this +-- case we are supporting variable length sequences for the forward pass. This table +-- is the output from packPaddedSequence(...) above +function RNN:updateOutput(input) + local inputPacked = (type(input) == 'table') + local switched = self.inputPacked ~= inputPacked + self.inputPacked = inputPacked + + if self.batchFirst and not self.inputPacked then + input = input:transpose(1, 2) + end + + if self.inputPacked then + assert(input[1]:dim() == 2, 'packed input must have two dimensions: sum(sequence lengths), inputSize') + else + assert(input:dim() == 3, 'input must have 3 dimensions: seqLength, miniBatch, inputSize') + end + assert(self.dropout == 0 or cudnn.version >= 5103, 'dropout supported only in cudnn v5.1 and above') -- Decide which descriptors/tensors need to be updated. local resetRNN = not self.dropoutDesc or not self.rnnDesc @@ -307,19 +468,58 @@ function RNN:updateOutput(input) local resetHC = not self.hxDesc or not self.hyDesc or not self.cxDesc or not self.cyDesc local resetWeight = not self.wDesc - if input:size(1) ~= self.seqLength then - self.seqLength = input:size(1) - resetIO = true - end + if self.inputPacked then + -- Handle resets for packed input - if input:size(2) ~= self.miniBatch then - self.miniBatch = input:size(2) - resetIO = true - resetHC = true - end + -- In the case of packed inputs, the sequence length is the length of the bsz per time list. + -- We need to reset the IO descriptors if this has changed. + if #input[2] ~= self.seqLength then + self.seqLength = #input[2] + resetIO = true + end - assert(input:size(3) == self.inputSize, 'Incorrect input size!') + -- Similarly, the miniBatch "size" is the batch size at the first timestep (when all + -- sequences are in the batch, regardless of length). If this has changed then we need + -- to reset both the IO descriptors and the hidden/cell descriptors + if input[2][1] ~= self.miniBatch then + self.miniBatch = input[2][1] + resetIO = true + resetHC = true + end + assert(input[1]:size(2) == self.inputSize, 'Incorrect input size!') + else + -- Handle resets for standard (i.e. not packed) input + + -- If the length of the sequences in this input batch differ from the previous batch + -- we need to: reset the IO descriptors to describe the new size of the input and + -- output Tensors in the seqLength dimension + if input:size(1) ~= self.seqLength then + self.seqLength = input:size(1) + resetIO = true + end + -- If the batch size has changed we need to: + -- 1. Update the IO descritprs to describe the new size of the input and output Tensors in the + -- batchSize dimension + -- 2. Reset the size of the hidden/cell descriptors so they can store batchSize states + if input:size(2) ~= self.miniBatch then + self.miniBatch = input:size(2) + resetIO = true + resetHC = true + end + assert(input:size(3) == self.inputSize, 'Incorrect input size!') + end + + -- Make sure input is contiguous + local x = self:makeContiguous(self.inputPacked and input[1] or input) + local oSize = self:deriveOutputSize(x) + local oStride = self.inputPacked and + torch.LongStorage({oSize[2], 1}) or + torch.LongStorage({oSize[2] * oSize[3], oSize[3], 1}) + self.output:resize(oSize, oStride) + local y = self.output + local w = self.weight + local bszpts = self.inputPacked and input[2] -- Update descriptors/tensors if resetRNN then @@ -327,7 +527,8 @@ function RNN:updateOutput(input) self:resetRNNDescriptor() end if resetIO then - self:resetIODescriptors(input) + self:resetInputDescriptor(x, bszpts) + self:resetOutputDescriptor(y, bszpts) end if resetHC then self:resetHiddenDescriptors() @@ -337,13 +538,6 @@ function RNN:updateOutput(input) self:resetWeightDescriptor() end - local x = self:makeContiguous(input) - local oSize = torch.LongStorage({self.seqLength, self.miniBatch, self.hiddenSize * self.numDirections}) - local oStride = torch.LongStorage({self.miniBatch * self.hiddenSize * self.numDirections, self.hiddenSize * self.numDirections, 1}) - self.output:resize(oSize, oStride) - local y = self.output - local w = self.weight - -- Optionally use hiddenInput/cellInput parameters if self.rememberStates then if self.hiddenOutput:nDimension() == 3 and self.hiddenOutput:size(1) == self.numLayers * self.numDirections and @@ -400,6 +594,7 @@ function RNN:updateOutput(input) local elemSize = self.reserve:elementSize() reserveSize = math.floor((reserveSize + elemSize - 1) / elemSize) self.reserve:resize(reserveSize) + errcheck('cudnnRNNForwardTraining', cudnn.getHandle(), self.rnnDesc[0], @@ -430,31 +625,36 @@ function RNN:updateOutput(input) wsSize) end if self.sync then cutorch.synchronize() end - if (self.batchFirst) then + if self.batchFirst and not self.inputPacked then self.output = self.output:transpose(1, 2) end return self.output end function RNN:updateGradInput(input, gradOutput) - if (self.batchFirst) then - input = input:transpose(1, 2) - gradOutput = gradOutput:transpose(1, 2) - self.output = self.output:transpose(1, 2) - end + if self.batchFirst and not self.inputPacked then + input = input:transpose(1, 2) + gradOutput = gradOutput:transpose(1, 2) + self.output = self.output:transpose(1, 2) + end assert(self.dropout == 0 or cudnn.version >= 5103, 'dropout supported only in cudnn v 5.1 and above') - assert(input:dim() == 3, 'input should have 3 dimensions: seqLength, miniBatch, inputSize') - assert(input:size(1) == self.seqLength, 'input has incorrect sequence length!') - assert(input:size(2) == self.miniBatch, 'input has incorrect minibatch size!') - assert(input:size(3) == self.inputSize, 'input has incorrect size!') + + if self.inputPacked then + assert(input[1]:dim() == 2, 'packed input must have two dimensions: sum(sequence lengths), inputSize') + else + assert(input:dim() == 3, 'input should have 3 dimensions: seqLength, miniBatch, inputSize') + assert(input:size(1) == self.seqLength, 'input has incorrect sequence length!') + assert(input:size(2) == self.miniBatch, 'input has incorrect minibatch size!') + assert(input:size(3) == self.inputSize, 'input has incorrect size!') + end assert(gradOutput:isSameSizeAs(self.output), 'gradOutput has incorrect size!') assert(self.train, 'updateGradInput can only be called when training!') - local x, dy = self:makeContiguous(input, gradOutput) + local x, dy = self:makeContiguous(self.inputPacked and input[1] or input, gradOutput) local y = self.output local w = self.weight - local dx = self.gradInput:resizeAs(input) + local dx = self.gradInput:resizeAs(self.inputPacked and input[1] or input) local hx = self.hiddenInput local cx = self.cellInput local dhy = self.gradHiddenOutput @@ -524,7 +724,7 @@ function RNN:updateGradInput(input, gradOutput) wsPtr, wsSize, self.reserve:data(), self.reserve:size(1) * self.reserve:elementSize()) if self.sync then cutorch.synchronize() end - if (self.batchFirst) then + if self.batchFirst and not self.inputPacked then self.gradInput = self.gradInput:transpose(1, 2) self.output = self.output:transpose(1, 2) end @@ -532,7 +732,7 @@ function RNN:updateGradInput(input, gradOutput) end function RNN:accGradParameters(input, gradOutput, scale) - if (self.batchFirst) then + if self.batchFirst and not self.inputPacked then input = input:transpose(1, 2) gradOutput = gradOutput:transpose(1, 2) self.output = self.output:transpose(1, 2) @@ -540,15 +740,19 @@ function RNN:accGradParameters(input, gradOutput, scale) scale = scale or 1 if scale == 0 then return end assert(self.dropout == 0 or cudnn.version >= 5103, 'dropout supported only in cudnn 5.1 and above') - assert(input:dim() == 3, 'input should have 3 dimensions: seqLength, miniBatch, inputSize') - assert(input:size(1) == self.seqLength, 'input has incorrect sequence length!') - assert(input:size(2) == self.miniBatch, 'input has incorrect minibatch size!') - assert(input:size(3) == self.inputSize, 'input has incorrect size!') + if self.inputPacked then + assert(input[1]:dim() == 2, 'packed input must have two dimensions: sum(sequence lengths), inputSize') + else + assert(input:dim() == 3, 'input should have 3 dimensions: seqLength, miniBatch, inputSize') + assert(input:size(1) == self.seqLength, 'input has incorrect sequence length!') + assert(input:size(2) == self.miniBatch, 'input has incorrect minibatch size!') + assert(input:size(3) == self.inputSize, 'input has incorrect size!') + end assert(gradOutput:isSameSizeAs(self.output), 'gradOutput has incorrect size!') assert(self.train, 'accGradParameters can only be called when training!') - local x, dy = self:makeContiguous(input, gradOutput) + local x, dy = self:makeContiguous(self.inputPacked and input[1] or input, gradOutput) local hx = self.hiddenInput local y = self.output local dw = self.gradWeight @@ -604,7 +808,7 @@ function RNN:accGradParameters(input, gradOutput, scale) scaleTensor:data()) end - if (self.batchFirst) then + if self.batchFirst and not self.inputPacked then gradOutput = gradOutput:transpose(1, 2) self.output = self.output:transpose(1, 2) end diff --git a/test/test_rnn.lua b/test/test_rnn.lua index 63520b6..0372983 100644 --- a/test/test_rnn.lua +++ b/test/test_rnn.lua @@ -261,6 +261,222 @@ function getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numbe return checkSums end +function cudnntest.testPackPadSequences() + -- T is 4, B = 5, vector size = 3 + local input = torch.CudaIntTensor({ + {{101, 102, 103}, + {201, 202, 203}, + {301, 302, 303}, + {401, 402, 403}, + {501, 502, 503}}, + {{104, 105, 106}, + {204, 205, 206}, + {304, 305, 306}, + { 0, 0, 0}, + { 0, 0, 0}}, + {{107, 108, 109}, + {207, 208, 209}, + { 0, 0, 0}, + { 0, 0, 0}, + { 0, 0, 0}}, + {{110, 111, 112}, + { 0, 0, 0}, + { 0, 0, 0}, + { 0, 0, 0}, + { 0, 0, 0}}, + }) + local lengths = {4, 3, 2, 1, 1} + + local expectedPacked = torch.CudaIntTensor({ + {101, 102, 103}, {201, 202, 203}, {301, 302, 303}, {401, 402, 403}, {501, 502, 503}, + {104, 105, 106}, {204, 205, 206}, {304, 305, 306}, + {107, 108, 109}, {207, 208, 209}, + {110, 111, 112} + }) + local expectedBSPT = {5, 3, 2, 1} + + local result = cudnn.RNN:packPaddedSequence(input, lengths) + local actualPacked, actualBSPT = unpack(result) + mytester:assertTensorEq(expectedPacked, actualPacked) + mytester:assertTableEq(expectedBSPT, actualBSPT) + + local actualUnpacked, actualLengths = cudnn.RNN:padPackedSequence(result) + mytester:assertTensorEq(input, actualUnpacked) + mytester:assertTableEq(lengths, actualLengths) + + -- test again with batchFirst + input = input:transpose(1, 2) + + local result = cudnn.RNN:packPaddedSequence(input, lengths, true) + local actualPacked, actualBSPT = unpack(result) + mytester:assertTensorEq(expectedPacked, actualPacked) + mytester:assertTableEq(expectedBSPT, actualBSPT) + + local actualUnpacked, actualLengths = cudnn.RNN:padPackedSequence(result, true) + mytester:assertTensorEq(input, actualUnpacked) + mytester:assertTableEq(lengths, actualLengths) +end + +-- clone the parameters of src into dest, assumes both RNNs were created with +-- the same options (e.g. same input size, hidden size, layers, etc.) +local function deepcopyRNN(dest, src) + dest.weight = src.weight:clone() -- encompasses W_hh, W_xh etc. + dest.gradWeight = src.gradWeight:clone() +end + +function cudnntest.testVariableLengthSequences() + local input = torch.CudaTensor({ + {{1, 2, 2, 1}, + {2, 1, 2, 2}, + {1, 1, 1, 2}, + {2, 2, 2, 1}}, + {{4, 1, 3, 1}, + {3, 1, 2, 1}, + {1, 1, 2, 1}, + {0, 0, 0, 0}}, + {{1, 1, 2, 1}, + {2, 1, 2, 2}, + {1, 2, 2, 1}, + {0, 0, 0, 0}}, + {{1, 2, 1, 1}, + {0, 0, 0, 0}, + {0, 0, 0, 0}, + {0, 0, 0, 0}} + }) + + -- same as above + local indivInputs = { + torch.CudaTensor({ + {{1, 2, 2, 1}}, + {{4, 1, 3, 1}}, + {{1, 1, 2, 1}}, + {{1, 2, 1, 1}}, + }), + torch.CudaTensor({ + {{2, 1, 2, 2}}, + {{3, 1, 2, 1}}, + {{2, 1, 2, 2}}, + }), + torch.CudaTensor({ + {{1, 1, 1, 2}}, + {{1, 1, 2, 1}}, + {{1, 2, 2, 1}}, + }), + torch.CudaTensor({ + {{2, 2, 2, 1}}, + }), + } + + local lengths = {4, 3, 3, 1} + local maxLength = 4 + + -- Generate gradOutput based on input sizes + local gradOutput = torch.CudaTensor(11, 1, 10):uniform() + local indivGradOutputs = { + torch.cat({gradOutput:narrow(1, 1, 1), gradOutput:narrow(1, 5, 1), gradOutput:narrow(1, 8, 1), gradOutput:narrow(1, 11, 1)}, 1):clone(), + torch.cat({gradOutput:narrow(1, 2, 1), gradOutput:narrow(1, 6, 1), gradOutput:narrow(1, 9, 1)}, 1):clone(), + torch.cat({gradOutput:narrow(1, 3, 1), gradOutput:narrow(1, 7, 1), gradOutput:narrow(1, 10, 1)}, 1):clone(), + gradOutput:narrow(1, 4, 1):clone() + } + gradOutput = gradOutput:squeeze() + + local inputSize = 4 + local hiddenSize = 10 + local numLayers = 1 + local batchFirst = false + local dropout = false + local rememberStates = false + + local lstm = cudnn.LSTM( + inputSize, + hiddenSize, + numLayers, + batchFirst, + dropout, + rememberStates) + + local lstm2 = cudnn.LSTM( + inputSize, + hiddenSize, + numLayers, + batchFirst, + dropout, + rememberStates) + + deepcopyRNN(lstm2, lstm) + + -- Step 1: Pass Sequences as batch and individually, verify weights, outputs + -- are the same in both instances + + -- batched + local packed = cudnn.RNN:packPaddedSequence(input, lengths) + local packedOutput = lstm:updateOutput(packed) + local packedHiddenOutput = lstm.hiddenOutput:clone() + -- could use padPackedSequence here, but for testing simplicity, we'll just + -- operate on the returned results + + local separate = {} + local hids = {} + local indivGradInputs = {} + + for i, length in ipairs(lengths) do + local inp = indivInputs[i] + local output = lstm2:updateOutput(inp):clone() + table.insert(separate, output) + local hid = lstm2.hiddenOutput:clone() + table.insert(hids, hid) + + -- need to do backwards pass here too + local gradOutput = indivGradOutputs[i] + local gradInp = lstm2:updateGradInput(inp, gradOutput):clone() + table.insert(indivGradInputs, gradInp) + end + separate = torch.cat(separate, 1):squeeze() + hids = torch.cat(hids, 1):squeeze() + + mytester:asserteq(packedOutput:size(1), separate:size(1)) + mytester:asserteq(packedOutput:size(2), separate:size(2)) + + -- packedOutput has format where all 4 from first batch, then all 3 from + -- second batch, etc. while separate has all 4 from first sequence, + -- all 3 from next sequence, etc. I manually map the matches here + local corresponding = { + {1, 1}, + {2, 5}, + {3, 8}, + {4, 11}, + {5, 2}, + {6, 6}, + {7, 9}, + {8, 3}, + {9, 7}, + {10, 10}, + {11, 4} + } + for _, pair in ipairs(corresponding) do + local sep, batched = unpack(pair) + local diff = torch.csub(separate[sep], packedOutput[batched]):abs():sum() + mytester:assert(diff < 1e-7) + end + + local hdiff = torch.csub(packedHiddenOutput, hids):abs():sum() + mytester:assert(hdiff < 1e7) + + -- Step 2: update grad input as batch and individually + + local packedGradInput = lstm:updateGradInput(packed, gradOutput) + local igiTestable = torch.cat(indivGradInputs, 1):squeeze(2) + + for _, pair in ipairs(corresponding) do + sep, batched = unpack(pair) + local diff = torch.csub(igiTestable[sep], packedGradInput[batched]):abs():sum() + mytester:assert(diff < 1e-7) + end + + -- Step 3: Basically verify that accGradParameters works for batch + lstm:accGradParameters(packed, gradOutput) +end + mytester = torch.Tester() mytester:add(cudnntest) mytester:run() |