diff options
author | SeanNaren <taz838@hotmail.co.uk> | 2016-04-11 22:21:40 +0300 |
---|---|---|
committer | Boris Fomitchev <bfomitchev@nvidia.com> | 2016-04-18 23:19:26 +0300 |
commit | 96f13324d07b0b80fb429f6dbe35fa5402234968 (patch) | |
tree | 1440a87a19522ef62d5b28ba98e327591c4796f0 | |
parent | 60a66872730eceaf4f769c281e2ad7289272323e (diff) |
Added tests, modified README and added RNN modules
-rw-r--r-- | BLSTM.lua | 9 | ||||
-rw-r--r-- | GRU.lua | 7 | ||||
-rw-r--r-- | LSTM.lua | 7 | ||||
-rw-r--r-- | README.md | 9 | ||||
-rw-r--r-- | RNN.lua | 103 | ||||
-rw-r--r-- | RNNReLU.lua | 7 | ||||
-rw-r--r-- | RNNTanh.lua | 7 | ||||
-rw-r--r-- | init.lua | 5 | ||||
-rw-r--r-- | test/test_rnn.lua | 316 |
9 files changed, 431 insertions, 39 deletions
diff --git a/BLSTM.lua b/BLSTM.lua new file mode 100644 index 0000000..8feebf1 --- /dev/null +++ b/BLSTM.lua @@ -0,0 +1,9 @@ +local BLSTM, parent = torch.class('cudnn.BLSTM', 'cudnn.RNN') + +function BLSTM:__init(inputSize, hiddenSize, numLayers, batchFirst) + parent.__init(self,inputSize, hiddenSize, numLayers, batchFirst) + self.bidirectional = 'CUDNN_BIDIRECTIONAL' + self.mode = 'CUDNN_LSTM' + self.numDirections = 2 + self:reset() +end @@ -0,0 +1,7 @@ +local GRU, parent = torch.class('cudnn.GRU', 'cudnn.RNN') + +function GRU:__init(inputSize, hiddenSize, numLayers, batchFirst) + parent.__init(self,inputSize, hiddenSize, numLayers, batchFirst) + self.mode = 'CUDNN_GRU' + self:reset() +end diff --git a/LSTM.lua b/LSTM.lua new file mode 100644 index 0000000..29c199c --- /dev/null +++ b/LSTM.lua @@ -0,0 +1,7 @@ +local LSTM, parent = torch.class('cudnn.LSTM', 'cudnn.RNN') + +function LSTM:__init(inputSize, hiddenSize, numLayers, batchFirst) + parent.__init(self,inputSize, hiddenSize, numLayers, batchFirst) + self.mode = 'CUDNN_LSTM' + self:reset() +end @@ -37,6 +37,15 @@ cudnn.SpatialCrossEntropyCriterion() -- A spatial version of LogSoftMax + cudnn.VolumetricConvolution(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH) cudnn.VolumetricMaxPooling(kT, kW, kH, dT, dW, dH, padT, padW, padH) cudnn.VolumetricAveragePooling(kT, kW, kH, dT, dW, dH, padT, padW, padH) + +-- Recurrent Modules + +-- All inputs have to be 3D. Accepts input of seqLength x batch x inputDim, or batch x seqLength x inputDim if batchFirst set to true. +cudnn.RNNReLU(inputDim, outputDim, numberOfLayers, [batchFirst = false]) +cudnn.RNNTanh(inputDim, outputDim, numberOfLayers, [batchFirst = false]) +cudnn.LSTM(inputDim, outputDim, numberOfLayers, [batchFirst = false]) +cudnn.GRU(inputDim, outputDim, numberOfLayers, [batchFirst = false]) +cudnn.BLSTM(inputDim, outputDim, numberOfLayers, [batchFirst = false]) ``` ### Modes @@ -2,7 +2,7 @@ local RNN, parent = torch.class('cudnn.RNN', 'nn.Module') local ffi = require 'ffi' local errcheck = cudnn.errcheck -function RNN:__init(inputSize, hiddenSize, numLayers) +function RNN:__init(inputSize, hiddenSize, numLayers, batchFirst) parent.__init(self) self.datatype = 'CUDNN_DATA_FLOAT' @@ -12,10 +12,12 @@ function RNN:__init(inputSize, hiddenSize, numLayers) self.miniBatch = 1 self.numLayers = numLayers self.bidirectional = 'CUDNN_UNIDIRECTIONAL' + self.numDirections = 1 -- set to 2 for bi-directional. self.inputMode = 'CUDNN_LINEAR_INPUT' self.mode = 'CUDNN_RNN_RELU' self.dropout = 0 self.seed = 0x01234567 + self.batchFirst = batchFirst or false -- Set to true for batch x time x inputdim. self.gradInput = torch.CudaTensor() self.output = torch.CudaTensor() @@ -50,7 +52,7 @@ function RNN:reset(stdv) self.gradWeight:resizeAs(self.weight):zero() end -local function createDescriptors(count, descs_type, create_func, destroy_func) +function RNN:createDescriptors(count, descs_type, create_func, destroy_func) local ds = ffi.new(descs_type, count) for i = 0, count - 1 do errcheck(create_func, ds + i) @@ -64,29 +66,29 @@ local function createDescriptors(count, descs_type, create_func, destroy_func) return ds end -local function createDropoutDescriptors(count) - return createDescriptors(count, +function RNN:createDropoutDescriptors(count) + return self:createDescriptors(count, 'cudnnDropoutDescriptor_t[?]', 'cudnnCreateDropoutDescriptor', 'cudnnDestroyDropoutDescriptor') end -local function createFilterDescriptors(count) - return createDescriptors(count, +function RNN:createFilterDescriptors(count) + return self:createDescriptors(count, 'cudnnFilterDescriptor_t[?]', 'cudnnCreateFilterDescriptor', 'cudnnDestroyFilterDescriptor') end -local function createRNNDescriptors(count) - return createDescriptors(count, +function RNN:createRNNDescriptors(count) + return self:createDescriptors(count, 'cudnnRNNDescriptor_t[?]', 'cudnnCreateRNNDescriptor', 'cudnnDestroyRNNDescriptor') end -local function createTensorDescriptors(count) - return createDescriptors(count, +function RNN:createTensorDescriptors(count) + return self:createDescriptors(count, 'cudnnTensorDescriptor_t[?]', 'cudnnCreateTensorDescriptor', 'cudnnDestroyTensorDescriptor') @@ -94,7 +96,7 @@ end function RNN:resetDropoutDescriptor() if not self.dropoutDesc then - self.dropoutDesc = createDropoutDescriptors(1) + self.dropoutDesc = self:createDropoutDescriptors(1) end self.dropoutStatesSize = torch.LongTensor(1) @@ -113,7 +115,7 @@ end function RNN:resetRNNDescriptor() if not self.rnnDesc then - self.rnnDesc = createRNNDescriptors(1) + self.rnnDesc = self:createRNNDescriptors(1) end errcheck('cudnnSetRNNDescriptor', @@ -130,7 +132,7 @@ end function RNN:resetWeightDescriptor() if not self.wDesc then - self.wDesc = createFilterDescriptors(1) + self.wDesc = self:createFilterDescriptors(1) end local dim = torch.IntTensor({self.weight:size(1), 1, 1}) @@ -144,8 +146,8 @@ function RNN:resetWeightDescriptor() end function RNN:resetIODescriptors() - self.xDescs = createTensorDescriptors(self.seqLength) - self.yDescs = createTensorDescriptors(self.seqLength) + self.xDescs = self:createTensorDescriptors(self.seqLength) + self.yDescs = self:createTensorDescriptors(self.seqLength) for i = 0, self.seqLength - 1 do local dim = torch.IntTensor({self.inputSize, self.miniBatch, self.seqLength}) @@ -157,7 +159,7 @@ function RNN:resetIODescriptors() dim:data(), stride:data()) - local dim = torch.IntTensor({self.hiddenSize, self.miniBatch, self.seqLength}) + local dim = torch.IntTensor({self.hiddenSize * self.numDirections, self.miniBatch, self.seqLength}) local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]}) errcheck('cudnnSetTensorNdDescriptor', self.yDescs[i], @@ -169,8 +171,8 @@ function RNN:resetIODescriptors() end function RNN:resetHiddenDescriptors() - self.hxDesc = createTensorDescriptors(1) - self.hyDesc = createTensorDescriptors(1) + self.hxDesc = self:createTensorDescriptors(1) + self.hyDesc = self:createTensorDescriptors(1) local dim = torch.IntTensor({self.hiddenSize, self.miniBatch, self.numLayers}) local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]}) @@ -190,8 +192,8 @@ function RNN:resetHiddenDescriptors() end function RNN:resetCellDescriptors() - self.cxDesc = createTensorDescriptors(1) - self.cyDesc = createTensorDescriptors(1) + self.cxDesc = self:createTensorDescriptors(1) + self.cyDesc = self:createTensorDescriptors(1) local dim = torch.IntTensor({self.hiddenSize, self.miniBatch, self.numLayers}) local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]}) @@ -210,7 +212,7 @@ function RNN:resetCellDescriptors() stride:data()) end -local function makeContiguous(self, input, gradOutput) +function RNN:makeContiguous(input, gradOutput) if not input:isContiguous() then self._input = self._input or input.new() self._input:typeAs(input):resizeAs(input):copy(input) @@ -224,9 +226,19 @@ local function makeContiguous(self, input, gradOutput) return input, gradOutput end +function RNN:resizeOutput(tensor) + return tensor:resize(self.seqLength, self.miniBatch, self.hiddenSize * self.numDirections) +end + +function RNN:resizeHidden(tensor) + return tensor:resize(self.numLayers * self.numDirections, self.miniBatch, self.hiddenSize) +end + function RNN:updateOutput(input) + if (self.batchFirst) then + input = input:transpose(1, 2) + end assert(input:dim() == 3, 'input must have 3 dimensions: seqLength, miniBatch, inputSize') - -- Decide which descriptors/tensors need to be updated. local resetRNN = not self.dropoutDesc or not self.rnnDesc local resetIO = not self.xDescs or not self.yDescs @@ -263,11 +275,11 @@ function RNN:updateOutput(input) self:resetWeightDescriptor() end - local x = makeContiguous(self, input) - local y = self.output:resize(self.seqLength, self.miniBatch, self.hiddenSize) + local x = self:makeContiguous(input) + local y = self:resizeOutput(self.output) local w = self.weight - local hy = self.hiddenOutput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero() - local cy = self.cellOutput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero() + local hy = self:resizeHidden(self.hiddenOutput):zero() + local cy = self:resizeHidden(self.cellOutput):zero() -- Optionally use hiddenInput/cellInput parameters local hx = self.hiddenInput @@ -275,14 +287,14 @@ function RNN:updateOutput(input) if hx then assert(hx:dim() == 3, 'hiddenInput must have 3 dimensions: numLayers, miniBatch, hiddenSize') - assert(hx:size(1) == self.numLayers, 'hiddenInput has incorrect number of layers!') + assert(hx:size(1) == self.numLayers * self.numDirections, 'hiddenInput has incorrect number of layers!') assert(hx:size(2) == self.miniBatch, 'hiddenInput has incorrect number of minibathes!') assert(hx:size(3) == self.hiddenSize, 'hiddenIinput has incorrect size!') assert(hx:isContiguous(), 'hiddenInput must be contiguous!') end if cx then assert(cx:dim() == 3, 'cellInput must have 3 dimensions: numLayers, miniBatch, hiddenSize') - assert(cx:size(1) == self.numLayers, 'cellInput has incorrect number of layers!') + assert(cx:size(1) == self.numLayers * self.numDirections, 'cellInput has incorrect number of layers!') assert(cx:size(2) == self.miniBatch, 'cellInput has incorrect number of minibathes!') assert(cx:size(3) == self.hiddenSize, 'cellInput has incorrect size!') assert(cx:isContiguous(), 'cellInput must be contiguous!') @@ -338,11 +350,18 @@ function RNN:updateOutput(input) self.cyDesc[0], cy:data(), self.workspace:data(), self.workspace:size(1) * 4) -- sizeof(float) end - + if (self.batchFirst) then + self.output = self.output:transpose(1, 2) + end return self.output end function RNN:updateGradInput(input, gradOutput) + if (self.batchFirst) then + input = input:transpose(1, 2) + gradOutput = gradOutput:transpose(1, 2) + self.output = self.output:transpose(1, 2) + end assert(input:dim() == 3, 'input should have 3 dimensions: seqLength, miniBatch, inputSize') assert(input:size(1) == self.seqLength, 'input has incorrect sequence length!') assert(input:size(2) == self.miniBatch, 'input has incorrect minibatch size!') @@ -351,7 +370,7 @@ function RNN:updateGradInput(input, gradOutput) assert(gradOutput:isSameSizeAs(self.output), 'gradOutput has incorrect size!') assert(self.train, 'updateGradInput can only be called when training!') - local x, dy = makeContiguous(self, input, gradOutput) + local x, dy = self:makeContiguous(input, gradOutput) local y = self.output local w = self.weight local dx = self.gradInput:resizeAs(input) @@ -359,13 +378,13 @@ function RNN:updateGradInput(input, gradOutput) local cx = self.cellInput local dhy = self.gradHiddenOutput local dcy = self.gradCellOutput - local dhx = self.gradHiddenInput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero() - local dcx = self.gradCellInput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero() + local dhx = self:resizeHidden(self.gradHiddenInput):zero() + local dcx = self:resizeHidden(self.gradCellInput):zero() if hx then assert(hx:dim() == 3, 'hiddenInput must have 3 dimensions: numLayers, miniBatch, hiddenSize') - assert(hx:size(1) == self.numLayers, 'hiddenInput has incorrect number of layers!') + assert(hx:size(1) == self.numLayers * self.numDirections, 'hiddenInput has incorrect number of layers!') assert(hx:size(2) == self.miniBatch, 'hiddenInput has incorrect minibatch size!') assert(hx:size(3) == self.hiddenSize, 'hiddenInput has incorrect size!') assert(hx:isContiguous(), 'hiddenInput must be contiguous!') @@ -373,7 +392,7 @@ function RNN:updateGradInput(input, gradOutput) if cx then assert(cx:dim() == 3, 'cellInput must have 3 dimensions: numLayers, miniBatch, hiddenSize') - assert(cx:size(1) == self.numLayers, 'cellInput has incorrect number of layers!') + assert(cx:size(1) == self.numLayers * self.numDirections, 'cellInput has incorrect number of layers!') assert(cx:size(2) == self.miniBatch, 'cellInput has incorrect minibatch size!') assert(cx:size(3) == self.hiddenSize, 'cellInput has incorrect size!') assert(cx:isContiguous(), 'cellInput must be contiguous!') @@ -382,7 +401,7 @@ function RNN:updateGradInput(input, gradOutput) if dhy then assert(dhy:dim() == 3, 'gradHiddenOutput must have 3 dimensions: ' .. 'numLayers, miniBatch, hiddenSize') - assert(dhy:size(1) == self.numLayers, 'gradHiddenOutput has incorrect number of layers!') + assert(dhy:size(1) == self.numLayers * self.numDirections, 'gradHiddenOutput has incorrect number of layers!') assert(dhy:size(2) == self.miniBatch, 'gradHiddenOutput has incorrect minibatch size!') assert(dhy:size(3) == self.hiddenSize, 'gradHiddenOutput has incorrect size!') assert(dhy:isContiguous(), 'gradHiddenOutput must be contiguous!') @@ -391,7 +410,7 @@ function RNN:updateGradInput(input, gradOutput) if dcy then assert(dcy:dim() == 3, 'gradCellOutput must have 3 dimensions: ' .. 'numLayers, miniBatch, hiddenSize') - assert(dcy:size(1) == self.numLayers, 'gradCellOutput has incorrect number of layers!') + assert(dcy:size(1) == self.numLayers * self.numDirections, 'gradCellOutput has incorrect number of layers!') assert(dcy:size(2) == self.miniBatch, 'gradCellOutput has incorrect minibatch size!') assert(dcy:size(3) == self.hiddenSize, 'gradCellOutput has incorrect size!') assert(dcy:isContiguous(), 'gradCellOutput must be contiguous!') @@ -412,11 +431,17 @@ function RNN:updateGradInput(input, gradOutput) self.cxDesc[0], dcx:data(), self.workspace:data(), self.workspace:size(1) * 4, -- sizeof(float) self.reserve:data(), self.reserve:size(1) * 4) -- sizeof(float) - + if (self.batchFirst) then + self.gradInput = self.gradInput:transpose(1, 2) + end return self.gradInput end function RNN:accGradParameters(input, gradOutput, scale) + if (self.batchFirst) then + input = input:transpose(1, 2) + gradOutput = gradOutput:transpose(1, 2) + end scale = scale or 1 if scale == 0 then return end @@ -428,14 +453,14 @@ function RNN:accGradParameters(input, gradOutput, scale) assert(gradOutput:isSameSizeAs(self.output), 'gradOutput has incorrect size!') assert(self.train, 'accGradParameters can only be called when training!') - local x, dy = makeContiguous(self, input, gradOutput) + local x, dy = self:makeContiguous(input, gradOutput) local hx = self.hiddenInput local y = self.output local dw = self.gradWeight if hx then assert(hx:dim() == 3, 'hiddenInput must have 3 dimensions: numLayers, miniBatch, hiddenSize') - assert(hx:size(1) == self.numLayers, 'hiddenInput has incorrect number of layers!') + assert(hx:size(1) == self.numLayers * self.numDirections, 'hiddenInput has incorrect number of layers!') assert(hx:size(2) == self.miniBatch, 'hiddenInput has incorrect minibatch size!') assert(hx:size(3) == self.hiddenSize, 'hiddenIinput has incorrect size!') assert(hx:isContiguous(), 'hiddenInput must be contiguous!') diff --git a/RNNReLU.lua b/RNNReLU.lua new file mode 100644 index 0000000..3aa8ee9 --- /dev/null +++ b/RNNReLU.lua @@ -0,0 +1,7 @@ +local RNNReLU, parent = torch.class('cudnn.RNNReLU', 'cudnn.RNN') + +function RNNReLU:__init(inputSize, hiddenSize, numLayers, batchFirst) + parent.__init(self,inputSize, hiddenSize, numLayers, batchFirst) + self.mode = 'CUDNN_RNN_RELU' + self:reset() +end diff --git a/RNNTanh.lua b/RNNTanh.lua new file mode 100644 index 0000000..98fa87c --- /dev/null +++ b/RNNTanh.lua @@ -0,0 +1,7 @@ +local RNNTanh, parent = torch.class('cudnn.RNNTanh', 'cudnn.RNN') + +function RNNTanh:__init(inputSize, hiddenSize, numLayers, batchFirst) + parent.__init(self,inputSize, hiddenSize, numLayers, batchFirst) + self.mode = 'CUDNN_RNN_TANH' + self:reset() +end @@ -123,6 +123,11 @@ require('cudnn.VolumetricBatchNormalization') require('cudnn.SpatialCrossEntropyCriterion') require('cudnn.TemporalConvolution') require('cudnn.RNN') +require('cudnn.RNNTanh') +require('cudnn.RNNReLU') +require('cudnn.BLSTM') +require('cudnn.LSTM') +require('cudnn.GRU') require('cudnn.functional') require('cudnn.convert') diff --git a/test/test_rnn.lua b/test/test_rnn.lua new file mode 100644 index 0000000..e7ee3de --- /dev/null +++ b/test/test_rnn.lua @@ -0,0 +1,316 @@ +--[[ +-- Tests the implementation of RNN binding using the cudnn v5 library. Cross-check the checksums with cudnn reference +-- sample checksums. +-- ]] + +require 'cudnn' +require 'cunn' +local ffi = require 'ffi' +local errcheck = cudnn.errcheck + +local cudnntest = torch.TestSuite() +local mytester + +local tolerance = 300 + +function cudnntest.testRNNRELU() + local miniBatch = 64 + local seqLength = 20 + local hiddenSize = 512 + local numberOfLayers = 2 + local numberOfLinearLayers = 2 + local rnn = cudnn.RNNReLU(hiddenSize, hiddenSize, numberOfLayers) + rnn.mode = 'CUDNN_RNN_RELU' + local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn) + + -- Checksums to check against are retrieved from cudnn RNN sample. + mytester:assertalmosteq(checkSums.localSumi, 1.315793E+06, tolerance, 'checkSum with reference for localsumi failed') + mytester:assertalmosteq(checkSums.localSumh, 1.315212E+05, tolerance, 'checkSum with reference for localSumh failed') + mytester:assertalmosteq(checkSums.localSumdi, 6.676003E+01, tolerance, 'checkSum with reference for localSumdi failed') + mytester:assertalmosteq(checkSums.localSumdh, 6.425067E+01, tolerance, 'checkSum with reference for localSumdh failed') + mytester:assertalmosteq(checkSums.localSumdw, 1.453750E+09, tolerance, 'checkSum with reference for localSumdw failed') +end + +function cudnntest.testRNNBatchFirst() + local miniBatch = 64 + local seqLength = 20 + local hiddenSize = 512 + local numberOfLayers = 2 + local numberOfLinearLayers = 2 + local batchFirst = true + local rnn = cudnn.RNNReLU(hiddenSize, hiddenSize, numberOfLayers, batchFirst) + rnn.mode = 'CUDNN_RNN_RELU' + local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst) + + -- Checksums to check against are retrieved from cudnn RNN sample. + mytester:assertalmosteq(checkSums.localSumi, 1.315793E+06, tolerance, 'checkSum with reference for localsumi failed') + mytester:assertalmosteq(checkSums.localSumh, 1.315212E+05, tolerance, 'checkSum with reference for localSumh failed') + mytester:assertalmosteq(checkSums.localSumdi, 6.676003E+01, tolerance, 'checkSum with reference for localSumdi failed') + mytester:assertalmosteq(checkSums.localSumdh, 6.425067E+01, tolerance, 'checkSum with reference for localSumdh failed') + mytester:assertalmosteq(checkSums.localSumdw, 1.453750E+09, tolerance, 'checkSum with reference for localSumdw failed') +end + +function cudnntest.testRNNTANH() + local miniBatch = 64 + local seqLength = 20 + local hiddenSize = 512 + local numberOfLayers = 2 + local numberOfLinearLayers = 2 + local rnn = cudnn.RNNTanh(hiddenSize, hiddenSize, numberOfLayers) + rnn.mode = 'CUDNN_RNN_TANH' + local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn) + + -- Checksums to check against are retrieved from cudnn RNN sample. + mytester:assertalmosteq(checkSums.localSumi, 6.319591E+05, tolerance, 'checkSum with reference for localsumi failed') + mytester:assertalmosteq(checkSums.localSumh, 6.319605E+04, tolerance, 'checkSum with reference for localSumh failed') + mytester:assertalmosteq(checkSums.localSumdi, 4.501830E+00, tolerance, 'checkSum with reference for localSumdi failed') + mytester:assertalmosteq(checkSums.localSumdh, 4.489546E+00, tolerance, 'checkSum with reference for localSumdh failed') + mytester:assertalmosteq(checkSums.localSumdw, 5.012598E+07, tolerance, 'checkSum with reference for localSumdw failed') +end + +function cudnntest.testRNNLSTM() + local miniBatch = 64 + local seqLength = 20 + local hiddenSize = 512 + local numberOfLayers = 2 + local numberOfLinearLayers = 8 + local rnn = cudnn.LSTM(hiddenSize, hiddenSize, numberOfLayers) + local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn) + + -- Checksums to check against are retrieved from cudnn RNN sample. + mytester:assertalmosteq(checkSums.localSumi, 5.749536E+05, tolerance, 'checkSum with reference for localsumi failed') + mytester:assertalmosteq(checkSums.localSumc, 4.365091E+05, tolerance, 'checkSum with reference for localSumc failed') + mytester:assertalmosteq(checkSums.localSumh, 5.774818E+04, tolerance, 'checkSum with reference for localSumh failed') + mytester:assertalmosteq(checkSums.localSumdi, 3.842206E+02, tolerance, 'checkSum with reference for localSumdi failed') + mytester:assertalmosteq(checkSums.localSumdc, 9.323785E+03, tolerance, 'checkSum with reference for localSumdc failed') + mytester:assertalmosteq(checkSums.localSumdh, 1.182566E+01, tolerance, 'checkSum with reference for localSumdh failed') + mytester:assertalmosteq(checkSums.localSumdw, 4.313461E+08, tolerance, 'checkSum with reference for localSumdw failed') +end + +function cudnntest.testRNNGRU() + local miniBatch = 64 + local seqLength = 20 + local hiddenSize = 512 + local numberOfLayers = 2 + local numberOfLinearLayers = 6 + local rnn = cudnn.GRU(hiddenSize, hiddenSize, numberOfLayers) + local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn) + -- Checksums to check against are retrieved from cudnn RNN sample. + mytester:assertalmosteq(checkSums.localSumi, 6.358978E+05, tolerance, 'checkSum with reference for localsumi failed') + mytester:assertalmosteq(checkSums.localSumh, 6.281680E+04, tolerance, 'checkSum with reference for localSumh failed') + mytester:assertalmosteq(checkSums.localSumdi, 6.296622E+00, tolerance, 'checkSum with reference for localSumdi failed') + mytester:assertalmosteq(checkSums.localSumdh, 2.289960E+05, tolerance, 'checkSum with reference for localSumdh failed') + mytester:assertalmosteq(checkSums.localSumdw, 5.397419E+07, tolerance, 'checkSum with reference for localSumdw failed') +end + +function cudnntest.testBiDirectionalRELURNN() + local miniBatch = 64 + local seqLength = 20 + local hiddenSize = 512 + local numberOfLayers = 2 + local numberOfLinearLayers = 2 + local nbDirections = 2 + local batchFirst = false + local rnn = cudnn.RNN(hiddenSize, hiddenSize, numberOfLayers) + rnn.bidirectional = 'CUDNN_BIDIRECTIONAL' + rnn.mode = 'CUDNN_RNN_RELU' + rnn.numDirections = 2 + + local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst, nbDirections) + -- Checksums to check against are retrieved from cudnn RNN sample. + mytester:assertalmosteq(checkSums.localSumi, 1.388634E+01, tolerance, 'checkSum with reference for localsumi failed') + mytester:assertalmosteq(checkSums.localSumh, 1.288997E+01, tolerance, 'checkSum with reference for localSumh failed') + mytester:assertalmosteq(checkSums.localSumdi, 1.288729E+01, tolerance, 'checkSum with reference for localSumdi failed') + mytester:assertalmosteq(checkSums.localSumdh, 1.279004E+01, tolerance, 'checkSum with reference for localSumdh failed') + mytester:assertalmosteq(checkSums.localSumdw, 7.061081E+07, tolerance, 'checkSum with reference for localSumdw failed') +end + +function cudnntest.testBiDirectionalTANHRNN() + local miniBatch = 64 + local seqLength = 20 + local hiddenSize = 512 + local numberOfLayers = 2 + local numberOfLinearLayers = 2 + local nbDirections = 2 + local batchFirst = false + local rnn = cudnn.RNN(hiddenSize, hiddenSize, numberOfLayers) + rnn.bidirectional = 'CUDNN_BIDIRECTIONAL' + rnn.mode = 'CUDNN_RNN_TANH' + rnn.numDirections = 2 + + local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst, nbDirections) + -- Checksums to check against are retrieved from cudnn RNN sample. + mytester:assertalmosteq(checkSums.localSumi, 1.388634E+01, tolerance, 'checkSum with reference for localsumi failed') + mytester:assertalmosteq(checkSums.localSumh, 1.288997E+01, tolerance, 'checkSum with reference for localSumh failed') + mytester:assertalmosteq(checkSums.localSumdi, 1.288729E+01, tolerance, 'checkSum with reference for localSumdi failed') + mytester:assertalmosteq(checkSums.localSumdh, 1.279004E+01, tolerance, 'checkSum with reference for localSumdh failed') + mytester:assertalmosteq(checkSums.localSumdw, 7.061081E+07, tolerance, 'checkSum with reference for localSumdw failed') +end + +function cudnntest.testBiDirectionalLSTMRNN() + local miniBatch = 64 + local seqLength = 20 + local hiddenSize = 512 + local numberOfLayers = 2 + local numberOfLinearLayers = 8 + local nbDirections = 2 + local batchFirst = false + local rnn = cudnn.BLSTM(hiddenSize, hiddenSize, numberOfLayers) + + local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst, nbDirections) + -- Checksums to check against are retrieved from cudnn RNN sample. + mytester:assertalmosteq(checkSums.localSumi, 3.134097E+04, tolerance, 'checkSum with reference for localsumi failed') + mytester:assertalmosteq(checkSums.localSumc, 3.845626E+00, tolerance, 'checkSum with reference for localSumc failed') + mytester:assertalmosteq(checkSums.localSumh, 1.922855E+00, tolerance, 'checkSum with reference for localSumh failed') + mytester:assertalmosteq(checkSums.localSumdi, 4.794993E+00, tolerance, 'checkSum with reference for localSumdi failed') + mytester:assertalmosteq(checkSums.localSumdc, 2.870925E+04, tolerance, 'checkSum with reference for localSumdc failed') + mytester:assertalmosteq(checkSums.localSumdh, 2.468645E+00, tolerance, 'checkSum with reference for localSumdh failed') + mytester:assertalmosteq(checkSums.localSumdw, 1.121568E+08, tolerance, 'checkSum with reference for localSumdw failed') +end + +function cudnntest.testBiDirectionalGRURNN() + local miniBatch = 64 + local seqLength = 20 + local hiddenSize = 512 + local numberOfLayers = 2 + local numberOfLinearLayers = 6 + local nbDirections = 2 + local batchFirst = false + local rnn = cudnn.RNN(hiddenSize, hiddenSize, numberOfLayers) + rnn.bidirectional = 'CUDNN_BIDIRECTIONAL' + rnn.mode = 'CUDNN_GRU' + rnn.numDirections = 2 + + local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst, nbDirections) + -- Checksums to check against are retrieved from cudnn RNN sample. + mytester:assertalmosteq(checkSums.localSumi, 6.555183E+04, tolerance, 'checkSum with reference for localsumi failed') + mytester:assertalmosteq(checkSums.localSumh, 5.830924E+00, tolerance, 'checkSum with reference for localSumh failed') + mytester:assertalmosteq(checkSums.localSumdi, 4.271801E+00, tolerance, 'checkSum with reference for localSumdi failed') + mytester:assertalmosteq(checkSums.localSumdh, 6.555744E+04, tolerance, 'checkSum with reference for localSumdh failed') + mytester:assertalmosteq(checkSums.localSumdw, 1.701796E+08, tolerance, 'checkSum with reference for localSumdw failed') +end + +--[[ +-- Method gets Checksums of RNN to compare with ref Checksums in cudnn RNN C sample. +-- ]] +function getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst, nbDirections) + local biDirectionalScale = nbDirections or 1 + -- Reset the rnn and weight descriptor (since we are manually setting values for matrix/bias. + rnn:reset() + rnn:resetWeightDescriptor() + local input + if (batchFirst) then + input = torch.CudaTensor(miniBatch, seqLength, hiddenSize):fill(1) + else + input = torch.CudaTensor(seqLength, miniBatch, hiddenSize):fill(1) -- Input initialised to 1s. + end + if (biDirectionalScale == 2) then + rnn.weight:fill(1 / rnn.weight:size(1)) + else + -- Matrices are initialised to 1 / matrixSize, biases to 1. + for layer = 0, numberOfLayers - 1 do + for layerId = 0, numberOfLinearLayers - 1 do + local linLayerMatDesc = rnn:createFilterDescriptors(1) + local matrixPointer = ffi.new("float*[1]") + errcheck('cudnnGetRNNLinLayerMatrixParams', + cudnn.getHandle(), + rnn.rnnDesc[0], + layer, + rnn.xDescs, + rnn.wDesc[0], + rnn.weight:data(), + layerId, + linLayerMatDesc[0], + ffi.cast("void**", matrixPointer)) + + local dataType = 'CUDNN_DATA_FLOAT' + local format = 'CUDNN_TENSOR_NCHW' + local nbDims = torch.IntTensor(1) + + local minDim = 3 + local filterDimA = torch.ones(minDim):int() + errcheck('cudnnGetFilterNdDescriptor', + linLayerMatDesc[0], + minDim, + ffi.cast("cudnnDataType_t*", dataType), + ffi.cast("cudnnDataType_t*", format), + nbDims:data(), + filterDimA:data()) + + local offset = matrixPointer[0] - rnn.weight:data() + local weightTensor = torch.CudaTensor(rnn.weight:storage(), offset + 1, filterDimA:prod()) + weightTensor:fill(1.0 / filterDimA:prod()) + + local linLayerBiasDesc = rnn:createFilterDescriptors(1) + local biasPointer = ffi.new("float*[1]") + errcheck('cudnnGetRNNLinLayerBiasParams', + cudnn.getHandle(), + rnn.rnnDesc[0], + layer, + rnn.xDescs, + rnn.wDesc[0], + rnn.weight:data(), + layerId, + linLayerBiasDesc[0], + ffi.cast("void**", biasPointer)) + + local dataType = 'CUDNN_DATA_FLOAT' + local format = 'CUDNN_TENSOR_NCHW' + local nbDims = torch.IntTensor(1) + local filterDimA = torch.ones(minDim):int() + + errcheck('cudnnGetFilterNdDescriptor', + linLayerBiasDesc[0], + minDim, + ffi.cast("cudnnDataType_t*", dataType), + ffi.cast("cudnnDataType_t*", format), + nbDims:data(), + filterDimA:data()) + + local offset = biasPointer[0] - rnn.weight:data() + local biasTensor = torch.CudaTensor(rnn.weight:storage(), offset + 1, filterDimA:prod()) + biasTensor:fill(1) + end + end + end + -- Set hx/cx/dhy/dcy data to 1s. + rnn.hiddenInput = torch.CudaTensor(numberOfLayers * biDirectionalScale, miniBatch, hiddenSize):fill(1) + rnn.cellInput = torch.CudaTensor(numberOfLayers * biDirectionalScale, miniBatch, hiddenSize):fill(1) + rnn.gradHiddenOutput = torch.CudaTensor(numberOfLayers * biDirectionalScale, miniBatch, hiddenSize):fill(1) + rnn.gradCellOutput = torch.CudaTensor(numberOfLayers * biDirectionalScale, miniBatch, hiddenSize):fill(1) + local testOutputi = rnn:forward(input) + -- gradInput set to 1s. + local gradInput + if(batchFirst) then + gradInput = torch.CudaTensor(miniBatch, seqLength, hiddenSize * biDirectionalScale):fill(1) + else + gradInput = torch.CudaTensor(seqLength, miniBatch, hiddenSize * biDirectionalScale):fill(1) + end + rnn:backward(input, gradInput) + + -- Sum up all values for each. + local localSumi = torch.sum(testOutputi) + local localSumh = torch.sum(rnn.hiddenOutput) + local localSumc = torch.sum(rnn.cellOutput) + + local localSumdi = torch.sum(rnn.gradInput) + local localSumdh = torch.sum(rnn.gradHiddenInput) + local localSumdc = torch.sum(rnn.gradCellInput) + + local localSumdw = torch.sum(rnn.gradWeight) + + local checkSums = { + localSumi = localSumi, + localSumh = localSumh, + localSumc = localSumc, + localSumdi = localSumdi, + localSumdh = localSumdh, + localSumdc = localSumdc, + localSumdw = localSumdw + } + return checkSums +end + +mytester = torch.Tester() +mytester:add(cudnntest) +mytester:run()
\ No newline at end of file |