Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/soumith/cudnn.torch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNatalia Gimelshein <ngimelshein@nvidia.com>2017-04-26 02:29:24 +0300
committerNatalia Gimelshein <ngimelshein@nvidia.com>2017-04-26 02:29:24 +0300
commit39d6c28b1eddb9632c90c18d1e2fc104ebdf259f (patch)
tree29eaa0dce7a89d74086f297a581c13fe6a7ee72c
parent4a50d76a79cf432e69e066f0348d085bc2d469c6 (diff)
parenta418f1e530cee811ab10e97a5388b62a6865aa20 (diff)
Merge branch 'master' into HEAD
-rw-r--r--RNN.lua336
-rw-r--r--test/test_rnn.lua216
2 files changed, 486 insertions, 66 deletions
diff --git a/RNN.lua b/RNN.lua
index 0145da0..f75f505 100644
--- a/RNN.lua
+++ b/RNN.lua
@@ -28,6 +28,7 @@ function RNN:__init(inputSize, hiddenSize, numLayers, batchFirst, dropout, remem
self.batchFirst = batchFirst or false -- Set to true for batch x time x inputdim.
self.rememberStates = rememberStates or false
self.sync = true
+ self.inputPacked = false
self.gradInput = torch.CudaTensor()
self.output = torch.CudaTensor()
self.weight = torch.CudaTensor()
@@ -65,7 +66,8 @@ function RNN:reset(stdv)
self:resetDropoutDescriptor()
self:resetRNNDescriptor()
- self:resetIODescriptors()
+ self:resetInputDescriptor()
+ self:resetOutputDescriptor()
local weightSizePtr = ffi.new("size_t[1]")
errcheck('cudnnGetRNNParamsSize',
@@ -186,28 +188,60 @@ function RNN:resetWeightDescriptor()
)
end
-function RNN:resetIODescriptors()
+function RNN:resetInputDescriptor(input, batchSizes)
self.xDescs = self:createTensorDescriptors(self.seqLength)
- self.yDescs = self:createTensorDescriptors(self.seqLength)
- for i = 0, self.seqLength - 1 do
- local dim = torch.IntTensor({ self.miniBatch,self.inputSize, 1})
- local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1})
- errcheck('cudnnSetTensorNdDescriptor',
- self.xDescs[i],
- self.datatype,
- 3,
- dim:data(),
- stride:data())
+ if self.inputPacked and input ~= nil and batchSizes ~= nil then
+ assert(#batchSizes == self.seqLength)
+ for i = 0, self.seqLength - 1 do
+ -- tensor shape is (# of sequences in the batch at the timestep, inputSize, 1 (for cudnn))
+ local dim = torch.IntTensor({batchSizes[i+1], input:size(2), 1})
+ local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1})
+ errcheck('cudnnSetTensorNdDescriptor',
+ self.xDescs[i],
+ self.datatype,
+ 3,
+ dim:data(),
+ stride:data())
+ end
+ else
+ for i = 0, self.seqLength - 1 do
+ local dim = torch.IntTensor({ self.miniBatch,self.inputSize, 1})
+ local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1})
+ errcheck('cudnnSetTensorNdDescriptor',
+ self.xDescs[i],
+ self.datatype,
+ 3,
+ dim:data(),
+ stride:data())
+ end
+ end
+end
- local dim = torch.IntTensor({self.miniBatch, self.hiddenSize * self.numDirections, 1})
- local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1})
- errcheck('cudnnSetTensorNdDescriptor',
- self.yDescs[i],
- self.datatype,
- 3,
- dim:data(),
- stride:data())
+function RNN:resetOutputDescriptor(output, batchSizes)
+ self.yDescs = self:createTensorDescriptors(self.seqLength)
+ if self.inputPacked and output ~= nil and batchSizes ~= nil then
+ for i = 0, self.seqLength - 1 do
+ local dim = torch.IntTensor({batchSizes[i+1], self.hiddenSize * self.numDirections, 1})
+ local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1})
+ errcheck('cudnnSetTensorNdDescriptor',
+ self.yDescs[i],
+ self.datatype,
+ 3,
+ dim:data(),
+ stride:data())
+ end
+ else
+ for i = 0, self.seqLength - 1 do
+ local dim = torch.IntTensor({self.miniBatch, self.hiddenSize * self.numDirections, 1})
+ local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1})
+ errcheck('cudnnSetTensorNdDescriptor',
+ self.yDescs[i],
+ self.datatype,
+ 3,
+ dim:data(),
+ stride:data())
+ end
end
end
@@ -265,10 +299,6 @@ function RNN:makeContiguous(input, gradOutput)
return input, gradOutput
end
-function RNN:resizeOutput(tensor)
- return tensor:resize(self.seqLength, self.miniBatch, self.hiddenSize * self.numDirections)
-end
-
function RNN:resizeHidden(tensor)
return tensor:resize(self.numLayers * self.numDirections, self.miniBatch, self.hiddenSize)
end
@@ -292,14 +322,145 @@ function RNN:resetStates()
end
end
+-- input a TxBx* tensor (or BxTx* if batchFirst) where T is the length
+-- of the longest sequence, B is the batch size, and * is any number of
+-- dimensions.
+--
+-- lengths is a table of sequence lengths, which should be sorted in
+-- decreasing order.
+--
+-- returns a table containing a packed tensor of size (sum of lengths x *)
+-- and a list of batch sizes per timestep, i.e. the number of sequences
+-- with at least timestep elements.
+function RNN:packPaddedSequence(input, lengths, batchFirst)
+ if batchFirst then
+ input = input:transpose(1, 2)
+ end
+ local batches = {}
+ local bszpts = {}
+ local lengthsIdx = #lengths
+ local currentLength = lengths[lengthsIdx]
+ local steps = input:size(1)
+ local bsz = input:size(2)
+ if bsz ~= #lengths then
+ error("lengths array has incorrect size (expected: " .. bsz .. "but found: " .. #lengths ..")")
+ end
-function RNN:updateOutput(input)
- if (self.batchFirst) then
- input = input:transpose(1, 2)
+ for ts = 1, steps do
+ table.insert(batches, input[ts]:narrow(1, 1, bsz))
+ table.insert(bszpts, bsz)
+
+ while ts == currentLength do
+ if lengthsIdx == 0 then
+ currentLength = nil
+ break
+ else
+ lengthsIdx = lengthsIdx - 1
+ bsz = bsz - 1
+ local nextLength = lengths[lengthsIdx]
+ if currentLength ~= nil and nextLength ~= nil and currentLength > nextLength then
+ error("lengths array has to be sorted in decreasing order")
+ end
+ currentLength = lengths[lengthsIdx]
+ end
+ end
+
+ if currentLength == nil then
+ break
+ end
+ end
+
+ return {torch.cat(batches, 1), bszpts}
+end
+
+-- An inverse operation to packPaddedSequence(...) above. Takes a sequence (i.e.
+-- a Tensor, bszpts table with the format as returned by packPaddedSequence and
+-- reconverts it into the TxBx* (or BxTx* if batchFirst) tensor and lengths array
+function RNN:padPackedSequence(seq, batchFirst)
+ local data, bszpts = unpack(seq)
+ local maxBatchSize = bszpts[1]
+ local outputSize = torch.LongStorage(2 + data[1]:nDimension())
+ outputSize[1] = #bszpts
+ outputSize[2] = maxBatchSize
+ for i = 1, data[1]:nDimension() do
+ outputSize[i + 2] = data[1]:size(i)
+ end
+ local output = torch.Tensor():typeAs(data):resize(outputSize):zero()
+
+ local lengths = {}
+ local offset = 1
+ local pbsz = bszpts[1]
+ local bsz = nil
+
+ local i = 1
+ while i <= #bszpts do
+ bsz = bszpts[i]
+ output[i]:narrow(1, 1, bsz):copy(data:narrow(1, offset, bsz))
+ offset = offset + bsz
+
+ local dec = pbsz - bsz
+ for j = 1, dec do
+ table.insert(lengths, i - 1)
+ end
+ pbsz = bsz
+ i = i + 1
+ end
+ for j = 1, bsz do
+ table.insert(lengths, i - 1)
end
- assert(input:dim() == 3, 'input must have 3 dimensions: seqLength, miniBatch, inputSize')
+
+ -- reverse lengths list
+ local reversed = {}
+ for i = #lengths, 1, -1 do
+ table.insert(reversed, lengths[i])
+ end
+
+ if batchFirst then
+ output = output:transpose(1, 2)
+ end
+ return output, reversed
+end
+
+-- it feels a little dirty setting this function on the class as opposed
+-- to having it be functional, but because we need to access class state,
+-- here we are...
+function RNN:deriveOutputSize(input)
+ if self.inputPacked then
+ return torch.LongStorage({input:size(1), self.hiddenSize * self.numDirections})
+ else
+ return torch.LongStorage({self.seqLength, self.miniBatch, self.hiddenSize * self.numDirections})
+ end
+end
+
+-- updateOutput takes either of the following as inputs:
+--
+-- 1. A seqLength x miniBatch x inputSize Tensor, where seqLength is the
+-- length of the sequence for every input in the batch, miniBatch is the
+-- number of elements in the batch, and inputSize is the size of the input vectors
+-- at each time step
+--
+-- OR
+--
+-- 2. A table containing a packed tensor and a list of batch sizes per timestep. In this
+-- case we are supporting variable length sequences for the forward pass. This table
+-- is the output from packPaddedSequence(...) above
+function RNN:updateOutput(input)
+ local inputPacked = (type(input) == 'table')
+ local switched = self.inputPacked ~= inputPacked
+ self.inputPacked = inputPacked
+
+ if self.batchFirst and not self.inputPacked then
+ input = input:transpose(1, 2)
+ end
+
+ if self.inputPacked then
+ assert(input[1]:dim() == 2, 'packed input must have two dimensions: sum(sequence lengths), inputSize')
+ else
+ assert(input:dim() == 3, 'input must have 3 dimensions: seqLength, miniBatch, inputSize')
+ end
+
assert(self.dropout == 0 or cudnn.version >= 5103, 'dropout supported only in cudnn v5.1 and above')
-- Decide which descriptors/tensors need to be updated.
local resetRNN = not self.dropoutDesc or not self.rnnDesc
@@ -307,19 +468,58 @@ function RNN:updateOutput(input)
local resetHC = not self.hxDesc or not self.hyDesc or not self.cxDesc or not self.cyDesc
local resetWeight = not self.wDesc
- if input:size(1) ~= self.seqLength then
- self.seqLength = input:size(1)
- resetIO = true
- end
+ if self.inputPacked then
+ -- Handle resets for packed input
- if input:size(2) ~= self.miniBatch then
- self.miniBatch = input:size(2)
- resetIO = true
- resetHC = true
- end
+ -- In the case of packed inputs, the sequence length is the length of the bsz per time list.
+ -- We need to reset the IO descriptors if this has changed.
+ if #input[2] ~= self.seqLength then
+ self.seqLength = #input[2]
+ resetIO = true
+ end
- assert(input:size(3) == self.inputSize, 'Incorrect input size!')
+ -- Similarly, the miniBatch "size" is the batch size at the first timestep (when all
+ -- sequences are in the batch, regardless of length). If this has changed then we need
+ -- to reset both the IO descriptors and the hidden/cell descriptors
+ if input[2][1] ~= self.miniBatch then
+ self.miniBatch = input[2][1]
+ resetIO = true
+ resetHC = true
+ end
+ assert(input[1]:size(2) == self.inputSize, 'Incorrect input size!')
+ else
+ -- Handle resets for standard (i.e. not packed) input
+
+ -- If the length of the sequences in this input batch differ from the previous batch
+ -- we need to: reset the IO descriptors to describe the new size of the input and
+ -- output Tensors in the seqLength dimension
+ if input:size(1) ~= self.seqLength then
+ self.seqLength = input:size(1)
+ resetIO = true
+ end
+ -- If the batch size has changed we need to:
+ -- 1. Update the IO descritprs to describe the new size of the input and output Tensors in the
+ -- batchSize dimension
+ -- 2. Reset the size of the hidden/cell descriptors so they can store batchSize states
+ if input:size(2) ~= self.miniBatch then
+ self.miniBatch = input:size(2)
+ resetIO = true
+ resetHC = true
+ end
+ assert(input:size(3) == self.inputSize, 'Incorrect input size!')
+ end
+
+ -- Make sure input is contiguous
+ local x = self:makeContiguous(self.inputPacked and input[1] or input)
+ local oSize = self:deriveOutputSize(x)
+ local oStride = self.inputPacked and
+ torch.LongStorage({oSize[2], 1}) or
+ torch.LongStorage({oSize[2] * oSize[3], oSize[3], 1})
+ self.output:resize(oSize, oStride)
+ local y = self.output
+ local w = self.weight
+ local bszpts = self.inputPacked and input[2]
-- Update descriptors/tensors
if resetRNN then
@@ -327,7 +527,8 @@ function RNN:updateOutput(input)
self:resetRNNDescriptor()
end
if resetIO then
- self:resetIODescriptors(input)
+ self:resetInputDescriptor(x, bszpts)
+ self:resetOutputDescriptor(y, bszpts)
end
if resetHC then
self:resetHiddenDescriptors()
@@ -337,13 +538,6 @@ function RNN:updateOutput(input)
self:resetWeightDescriptor()
end
- local x = self:makeContiguous(input)
- local oSize = torch.LongStorage({self.seqLength, self.miniBatch, self.hiddenSize * self.numDirections})
- local oStride = torch.LongStorage({self.miniBatch * self.hiddenSize * self.numDirections, self.hiddenSize * self.numDirections, 1})
- self.output:resize(oSize, oStride)
- local y = self.output
- local w = self.weight
-
-- Optionally use hiddenInput/cellInput parameters
if self.rememberStates then
if self.hiddenOutput:nDimension() == 3 and self.hiddenOutput:size(1) == self.numLayers * self.numDirections and
@@ -400,6 +594,7 @@ function RNN:updateOutput(input)
local elemSize = self.reserve:elementSize()
reserveSize = math.floor((reserveSize + elemSize - 1) / elemSize)
self.reserve:resize(reserveSize)
+
errcheck('cudnnRNNForwardTraining',
cudnn.getHandle(),
self.rnnDesc[0],
@@ -430,31 +625,36 @@ function RNN:updateOutput(input)
wsSize)
end
if self.sync then cutorch.synchronize() end
- if (self.batchFirst) then
+ if self.batchFirst and not self.inputPacked then
self.output = self.output:transpose(1, 2)
end
return self.output
end
function RNN:updateGradInput(input, gradOutput)
- if (self.batchFirst) then
- input = input:transpose(1, 2)
- gradOutput = gradOutput:transpose(1, 2)
- self.output = self.output:transpose(1, 2)
- end
+ if self.batchFirst and not self.inputPacked then
+ input = input:transpose(1, 2)
+ gradOutput = gradOutput:transpose(1, 2)
+ self.output = self.output:transpose(1, 2)
+ end
assert(self.dropout == 0 or cudnn.version >= 5103, 'dropout supported only in cudnn v 5.1 and above')
- assert(input:dim() == 3, 'input should have 3 dimensions: seqLength, miniBatch, inputSize')
- assert(input:size(1) == self.seqLength, 'input has incorrect sequence length!')
- assert(input:size(2) == self.miniBatch, 'input has incorrect minibatch size!')
- assert(input:size(3) == self.inputSize, 'input has incorrect size!')
+
+ if self.inputPacked then
+ assert(input[1]:dim() == 2, 'packed input must have two dimensions: sum(sequence lengths), inputSize')
+ else
+ assert(input:dim() == 3, 'input should have 3 dimensions: seqLength, miniBatch, inputSize')
+ assert(input:size(1) == self.seqLength, 'input has incorrect sequence length!')
+ assert(input:size(2) == self.miniBatch, 'input has incorrect minibatch size!')
+ assert(input:size(3) == self.inputSize, 'input has incorrect size!')
+ end
assert(gradOutput:isSameSizeAs(self.output), 'gradOutput has incorrect size!')
assert(self.train, 'updateGradInput can only be called when training!')
- local x, dy = self:makeContiguous(input, gradOutput)
+ local x, dy = self:makeContiguous(self.inputPacked and input[1] or input, gradOutput)
local y = self.output
local w = self.weight
- local dx = self.gradInput:resizeAs(input)
+ local dx = self.gradInput:resizeAs(self.inputPacked and input[1] or input)
local hx = self.hiddenInput
local cx = self.cellInput
local dhy = self.gradHiddenOutput
@@ -524,7 +724,7 @@ function RNN:updateGradInput(input, gradOutput)
wsPtr, wsSize,
self.reserve:data(), self.reserve:size(1) * self.reserve:elementSize())
if self.sync then cutorch.synchronize() end
- if (self.batchFirst) then
+ if self.batchFirst and not self.inputPacked then
self.gradInput = self.gradInput:transpose(1, 2)
self.output = self.output:transpose(1, 2)
end
@@ -532,7 +732,7 @@ function RNN:updateGradInput(input, gradOutput)
end
function RNN:accGradParameters(input, gradOutput, scale)
- if (self.batchFirst) then
+ if self.batchFirst and not self.inputPacked then
input = input:transpose(1, 2)
gradOutput = gradOutput:transpose(1, 2)
self.output = self.output:transpose(1, 2)
@@ -540,15 +740,19 @@ function RNN:accGradParameters(input, gradOutput, scale)
scale = scale or 1
if scale == 0 then return end
assert(self.dropout == 0 or cudnn.version >= 5103, 'dropout supported only in cudnn 5.1 and above')
- assert(input:dim() == 3, 'input should have 3 dimensions: seqLength, miniBatch, inputSize')
- assert(input:size(1) == self.seqLength, 'input has incorrect sequence length!')
- assert(input:size(2) == self.miniBatch, 'input has incorrect minibatch size!')
- assert(input:size(3) == self.inputSize, 'input has incorrect size!')
+ if self.inputPacked then
+ assert(input[1]:dim() == 2, 'packed input must have two dimensions: sum(sequence lengths), inputSize')
+ else
+ assert(input:dim() == 3, 'input should have 3 dimensions: seqLength, miniBatch, inputSize')
+ assert(input:size(1) == self.seqLength, 'input has incorrect sequence length!')
+ assert(input:size(2) == self.miniBatch, 'input has incorrect minibatch size!')
+ assert(input:size(3) == self.inputSize, 'input has incorrect size!')
+ end
assert(gradOutput:isSameSizeAs(self.output), 'gradOutput has incorrect size!')
assert(self.train, 'accGradParameters can only be called when training!')
- local x, dy = self:makeContiguous(input, gradOutput)
+ local x, dy = self:makeContiguous(self.inputPacked and input[1] or input, gradOutput)
local hx = self.hiddenInput
local y = self.output
local dw = self.gradWeight
@@ -604,7 +808,7 @@ function RNN:accGradParameters(input, gradOutput, scale)
scaleTensor:data())
end
- if (self.batchFirst) then
+ if self.batchFirst and not self.inputPacked then
gradOutput = gradOutput:transpose(1, 2)
self.output = self.output:transpose(1, 2)
end
diff --git a/test/test_rnn.lua b/test/test_rnn.lua
index 63520b6..0372983 100644
--- a/test/test_rnn.lua
+++ b/test/test_rnn.lua
@@ -261,6 +261,222 @@ function getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numbe
return checkSums
end
+function cudnntest.testPackPadSequences()
+ -- T is 4, B = 5, vector size = 3
+ local input = torch.CudaIntTensor({
+ {{101, 102, 103},
+ {201, 202, 203},
+ {301, 302, 303},
+ {401, 402, 403},
+ {501, 502, 503}},
+ {{104, 105, 106},
+ {204, 205, 206},
+ {304, 305, 306},
+ { 0, 0, 0},
+ { 0, 0, 0}},
+ {{107, 108, 109},
+ {207, 208, 209},
+ { 0, 0, 0},
+ { 0, 0, 0},
+ { 0, 0, 0}},
+ {{110, 111, 112},
+ { 0, 0, 0},
+ { 0, 0, 0},
+ { 0, 0, 0},
+ { 0, 0, 0}},
+ })
+ local lengths = {4, 3, 2, 1, 1}
+
+ local expectedPacked = torch.CudaIntTensor({
+ {101, 102, 103}, {201, 202, 203}, {301, 302, 303}, {401, 402, 403}, {501, 502, 503},
+ {104, 105, 106}, {204, 205, 206}, {304, 305, 306},
+ {107, 108, 109}, {207, 208, 209},
+ {110, 111, 112}
+ })
+ local expectedBSPT = {5, 3, 2, 1}
+
+ local result = cudnn.RNN:packPaddedSequence(input, lengths)
+ local actualPacked, actualBSPT = unpack(result)
+ mytester:assertTensorEq(expectedPacked, actualPacked)
+ mytester:assertTableEq(expectedBSPT, actualBSPT)
+
+ local actualUnpacked, actualLengths = cudnn.RNN:padPackedSequence(result)
+ mytester:assertTensorEq(input, actualUnpacked)
+ mytester:assertTableEq(lengths, actualLengths)
+
+ -- test again with batchFirst
+ input = input:transpose(1, 2)
+
+ local result = cudnn.RNN:packPaddedSequence(input, lengths, true)
+ local actualPacked, actualBSPT = unpack(result)
+ mytester:assertTensorEq(expectedPacked, actualPacked)
+ mytester:assertTableEq(expectedBSPT, actualBSPT)
+
+ local actualUnpacked, actualLengths = cudnn.RNN:padPackedSequence(result, true)
+ mytester:assertTensorEq(input, actualUnpacked)
+ mytester:assertTableEq(lengths, actualLengths)
+end
+
+-- clone the parameters of src into dest, assumes both RNNs were created with
+-- the same options (e.g. same input size, hidden size, layers, etc.)
+local function deepcopyRNN(dest, src)
+ dest.weight = src.weight:clone() -- encompasses W_hh, W_xh etc.
+ dest.gradWeight = src.gradWeight:clone()
+end
+
+function cudnntest.testVariableLengthSequences()
+ local input = torch.CudaTensor({
+ {{1, 2, 2, 1},
+ {2, 1, 2, 2},
+ {1, 1, 1, 2},
+ {2, 2, 2, 1}},
+ {{4, 1, 3, 1},
+ {3, 1, 2, 1},
+ {1, 1, 2, 1},
+ {0, 0, 0, 0}},
+ {{1, 1, 2, 1},
+ {2, 1, 2, 2},
+ {1, 2, 2, 1},
+ {0, 0, 0, 0}},
+ {{1, 2, 1, 1},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0},
+ {0, 0, 0, 0}}
+ })
+
+ -- same as above
+ local indivInputs = {
+ torch.CudaTensor({
+ {{1, 2, 2, 1}},
+ {{4, 1, 3, 1}},
+ {{1, 1, 2, 1}},
+ {{1, 2, 1, 1}},
+ }),
+ torch.CudaTensor({
+ {{2, 1, 2, 2}},
+ {{3, 1, 2, 1}},
+ {{2, 1, 2, 2}},
+ }),
+ torch.CudaTensor({
+ {{1, 1, 1, 2}},
+ {{1, 1, 2, 1}},
+ {{1, 2, 2, 1}},
+ }),
+ torch.CudaTensor({
+ {{2, 2, 2, 1}},
+ }),
+ }
+
+ local lengths = {4, 3, 3, 1}
+ local maxLength = 4
+
+ -- Generate gradOutput based on input sizes
+ local gradOutput = torch.CudaTensor(11, 1, 10):uniform()
+ local indivGradOutputs = {
+ torch.cat({gradOutput:narrow(1, 1, 1), gradOutput:narrow(1, 5, 1), gradOutput:narrow(1, 8, 1), gradOutput:narrow(1, 11, 1)}, 1):clone(),
+ torch.cat({gradOutput:narrow(1, 2, 1), gradOutput:narrow(1, 6, 1), gradOutput:narrow(1, 9, 1)}, 1):clone(),
+ torch.cat({gradOutput:narrow(1, 3, 1), gradOutput:narrow(1, 7, 1), gradOutput:narrow(1, 10, 1)}, 1):clone(),
+ gradOutput:narrow(1, 4, 1):clone()
+ }
+ gradOutput = gradOutput:squeeze()
+
+ local inputSize = 4
+ local hiddenSize = 10
+ local numLayers = 1
+ local batchFirst = false
+ local dropout = false
+ local rememberStates = false
+
+ local lstm = cudnn.LSTM(
+ inputSize,
+ hiddenSize,
+ numLayers,
+ batchFirst,
+ dropout,
+ rememberStates)
+
+ local lstm2 = cudnn.LSTM(
+ inputSize,
+ hiddenSize,
+ numLayers,
+ batchFirst,
+ dropout,
+ rememberStates)
+
+ deepcopyRNN(lstm2, lstm)
+
+ -- Step 1: Pass Sequences as batch and individually, verify weights, outputs
+ -- are the same in both instances
+
+ -- batched
+ local packed = cudnn.RNN:packPaddedSequence(input, lengths)
+ local packedOutput = lstm:updateOutput(packed)
+ local packedHiddenOutput = lstm.hiddenOutput:clone()
+ -- could use padPackedSequence here, but for testing simplicity, we'll just
+ -- operate on the returned results
+
+ local separate = {}
+ local hids = {}
+ local indivGradInputs = {}
+
+ for i, length in ipairs(lengths) do
+ local inp = indivInputs[i]
+ local output = lstm2:updateOutput(inp):clone()
+ table.insert(separate, output)
+ local hid = lstm2.hiddenOutput:clone()
+ table.insert(hids, hid)
+
+ -- need to do backwards pass here too
+ local gradOutput = indivGradOutputs[i]
+ local gradInp = lstm2:updateGradInput(inp, gradOutput):clone()
+ table.insert(indivGradInputs, gradInp)
+ end
+ separate = torch.cat(separate, 1):squeeze()
+ hids = torch.cat(hids, 1):squeeze()
+
+ mytester:asserteq(packedOutput:size(1), separate:size(1))
+ mytester:asserteq(packedOutput:size(2), separate:size(2))
+
+ -- packedOutput has format where all 4 from first batch, then all 3 from
+ -- second batch, etc. while separate has all 4 from first sequence,
+ -- all 3 from next sequence, etc. I manually map the matches here
+ local corresponding = {
+ {1, 1},
+ {2, 5},
+ {3, 8},
+ {4, 11},
+ {5, 2},
+ {6, 6},
+ {7, 9},
+ {8, 3},
+ {9, 7},
+ {10, 10},
+ {11, 4}
+ }
+ for _, pair in ipairs(corresponding) do
+ local sep, batched = unpack(pair)
+ local diff = torch.csub(separate[sep], packedOutput[batched]):abs():sum()
+ mytester:assert(diff < 1e-7)
+ end
+
+ local hdiff = torch.csub(packedHiddenOutput, hids):abs():sum()
+ mytester:assert(hdiff < 1e7)
+
+ -- Step 2: update grad input as batch and individually
+
+ local packedGradInput = lstm:updateGradInput(packed, gradOutput)
+ local igiTestable = torch.cat(indivGradInputs, 1):squeeze(2)
+
+ for _, pair in ipairs(corresponding) do
+ sep, batched = unpack(pair)
+ local diff = torch.csub(igiTestable[sep], packedGradInput[batched]):abs():sum()
+ mytester:assert(diff < 1e-7)
+ end
+
+ -- Step 3: Basically verify that accGradParameters works for batch
+ lstm:accGradParameters(packed, gradOutput)
+end
+
mytester = torch.Tester()
mytester:add(cudnntest)
mytester:run()