Added tests, modified README and added RNN modules

author: SeanNaren <taz838@hotmail.co.uk> 2016-04-11 22:21:40 +0300
committer: Boris Fomitchev <bfomitchev@nvidia.com> 2016-04-18 23:19:26 +0300
commit: 96f13324d07b0b80fb429f6dbe35fa5402234968 (patch)
tree: 1440a87a19522ef62d5b28ba98e327591c4796f0
parent: 60a66872730eceaf4f769c281e2ad7289272323e (diff)
9 files changed, 431 insertions, 39 deletions
diff --git a/BLSTM.lua b/BLSTM.lua
new file mode 100644
index 0000000..8feebf1
--- /dev/null
+++ b/BLSTM.lua
@@ -0,0 +1,9 @@
+local BLSTM, parent = torch.class('cudnn.BLSTM', 'cudnn.RNN')
+
+function BLSTM:__init(inputSize, hiddenSize, numLayers, batchFirst)
+    parent.__init(self,inputSize, hiddenSize, numLayers, batchFirst)
+    self.bidirectional = 'CUDNN_BIDIRECTIONAL'
+    self.mode = 'CUDNN_LSTM'
+    self.numDirections = 2
+    self:reset()
+end
diff --git a/GRU.lua b/GRU.lua
new file mode 100644
index 0000000..615bdf7
--- /dev/null
+++ b/GRU.lua
@@ -0,0 +1,7 @@
+local GRU, parent = torch.class('cudnn.GRU', 'cudnn.RNN')
+
+function GRU:__init(inputSize, hiddenSize, numLayers, batchFirst)
+    parent.__init(self,inputSize, hiddenSize, numLayers, batchFirst)
+    self.mode = 'CUDNN_GRU'
+    self:reset()
+end
diff --git a/LSTM.lua b/LSTM.lua
new file mode 100644
index 0000000..29c199c
--- /dev/null
+++ b/LSTM.lua
@@ -0,0 +1,7 @@
+local LSTM, parent = torch.class('cudnn.LSTM', 'cudnn.RNN')
+
+function LSTM:__init(inputSize, hiddenSize, numLayers, batchFirst)
+    parent.__init(self,inputSize, hiddenSize, numLayers, batchFirst)
+    self.mode = 'CUDNN_LSTM'
+    self:reset()
+end
diff --git a/README.md b/README.md
index 37db4d0..b67ced3 100644
--- a/README.md
+++ b/README.md
@@ -37,6 +37,15 @@ cudnn.SpatialCrossEntropyCriterion()       -- A spatial version of LogSoftMax +
 cudnn.VolumetricConvolution(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH)
 cudnn.VolumetricMaxPooling(kT, kW, kH, dT, dW, dH, padT, padW, padH)
 cudnn.VolumetricAveragePooling(kT, kW, kH, dT, dW, dH, padT, padW, padH)
+
+-- Recurrent Modules
+
+-- All inputs have to be 3D. Accepts input of seqLength x batch x inputDim, or batch x seqLength x inputDim if batchFirst set to true.
+cudnn.RNNReLU(inputDim, outputDim, numberOfLayers, [batchFirst = false])
+cudnn.RNNTanh(inputDim, outputDim, numberOfLayers, [batchFirst = false])
+cudnn.LSTM(inputDim, outputDim, numberOfLayers, [batchFirst = false])
+cudnn.GRU(inputDim, outputDim, numberOfLayers, [batchFirst = false])
+cudnn.BLSTM(inputDim, outputDim, numberOfLayers, [batchFirst = false])
 ```
 
 ### Modes
diff --git a/RNN.lua b/RNN.lua
index 0b7ac09..a4840f7 100644
--- a/RNN.lua
+++ b/RNN.lua
@@ -2,7 +2,7 @@ local RNN, parent = torch.class('cudnn.RNN', 'nn.Module')
 local ffi = require 'ffi'
 local errcheck = cudnn.errcheck
 
-function RNN:__init(inputSize, hiddenSize, numLayers)
+function RNN:__init(inputSize, hiddenSize, numLayers, batchFirst)
    parent.__init(self)
 
    self.datatype = 'CUDNN_DATA_FLOAT'
@@ -12,10 +12,12 @@ function RNN:__init(inputSize, hiddenSize, numLayers)
    self.miniBatch = 1
    self.numLayers = numLayers
    self.bidirectional = 'CUDNN_UNIDIRECTIONAL'
+   self.numDirections = 1 -- set to 2 for bi-directional.
    self.inputMode = 'CUDNN_LINEAR_INPUT'
    self.mode = 'CUDNN_RNN_RELU'
    self.dropout = 0
    self.seed = 0x01234567
+   self.batchFirst = batchFirst or false -- Set to true for batch x time x inputdim.
 
    self.gradInput = torch.CudaTensor()
    self.output = torch.CudaTensor()
@@ -50,7 +52,7 @@ function RNN:reset(stdv)
    self.gradWeight:resizeAs(self.weight):zero()
 end
 
-local function createDescriptors(count, descs_type, create_func, destroy_func)
+function RNN:createDescriptors(count, descs_type, create_func, destroy_func)
    local ds = ffi.new(descs_type, count)
    for i = 0, count - 1 do
       errcheck(create_func, ds + i)
@@ -64,29 +66,29 @@ local function createDescriptors(count, descs_type, create_func, destroy_func)
    return ds
 end
 
-local function createDropoutDescriptors(count)
-   return createDescriptors(count,
+function RNN:createDropoutDescriptors(count)
+   return self:createDescriptors(count,
                             'cudnnDropoutDescriptor_t[?]',
                             'cudnnCreateDropoutDescriptor',
                             'cudnnDestroyDropoutDescriptor')
 end
 
-local function createFilterDescriptors(count)
-   return createDescriptors(count,
+function RNN:createFilterDescriptors(count)
+   return self:createDescriptors(count,
                             'cudnnFilterDescriptor_t[?]',
                             'cudnnCreateFilterDescriptor',
                             'cudnnDestroyFilterDescriptor')
 end
 
-local function createRNNDescriptors(count)
-   return createDescriptors(count,
+function RNN:createRNNDescriptors(count)
+   return self:createDescriptors(count,
                             'cudnnRNNDescriptor_t[?]',
                             'cudnnCreateRNNDescriptor',
                             'cudnnDestroyRNNDescriptor')
 end
 
-local function createTensorDescriptors(count)
-   return createDescriptors(count,
+function RNN:createTensorDescriptors(count)
+   return self:createDescriptors(count,
                             'cudnnTensorDescriptor_t[?]',
                             'cudnnCreateTensorDescriptor',
                             'cudnnDestroyTensorDescriptor')
@@ -94,7 +96,7 @@ end
 
 function RNN:resetDropoutDescriptor()
    if not self.dropoutDesc then
-      self.dropoutDesc = createDropoutDescriptors(1)
+      self.dropoutDesc = self:createDropoutDescriptors(1)
    end
 
    self.dropoutStatesSize = torch.LongTensor(1)
@@ -113,7 +115,7 @@ end
 
 function RNN:resetRNNDescriptor()
    if not self.rnnDesc then
-      self.rnnDesc = createRNNDescriptors(1)
+      self.rnnDesc = self:createRNNDescriptors(1)
    end
 
    errcheck('cudnnSetRNNDescriptor',
@@ -130,7 +132,7 @@ end
 
 function RNN:resetWeightDescriptor()
    if not self.wDesc then
-      self.wDesc = createFilterDescriptors(1)
+      self.wDesc = self:createFilterDescriptors(1)
    end
 
    local dim = torch.IntTensor({self.weight:size(1), 1, 1})
@@ -144,8 +146,8 @@ function RNN:resetWeightDescriptor()
 end
 
 function RNN:resetIODescriptors()
-   self.xDescs = createTensorDescriptors(self.seqLength)
-   self.yDescs = createTensorDescriptors(self.seqLength)
+   self.xDescs = self:createTensorDescriptors(self.seqLength)
+   self.yDescs = self:createTensorDescriptors(self.seqLength)
 
    for i = 0, self.seqLength - 1 do
       local dim = torch.IntTensor({self.inputSize, self.miniBatch, self.seqLength})
@@ -157,7 +159,7 @@ function RNN:resetIODescriptors()
                dim:data(),
                stride:data())
 
-      local dim = torch.IntTensor({self.hiddenSize, self.miniBatch, self.seqLength})
+      local dim = torch.IntTensor({self.hiddenSize * self.numDirections, self.miniBatch, self.seqLength})
       local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]})
       errcheck('cudnnSetTensorNdDescriptor',
                self.yDescs[i],
@@ -169,8 +171,8 @@ function RNN:resetIODescriptors()
 end
 
 function RNN:resetHiddenDescriptors()
-   self.hxDesc = createTensorDescriptors(1)
-   self.hyDesc = createTensorDescriptors(1)
+   self.hxDesc = self:createTensorDescriptors(1)
+   self.hyDesc = self:createTensorDescriptors(1)
 
    local dim = torch.IntTensor({self.hiddenSize, self.miniBatch, self.numLayers})
    local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]})
@@ -190,8 +192,8 @@ function RNN:resetHiddenDescriptors()
 end
 
 function RNN:resetCellDescriptors()
-   self.cxDesc = createTensorDescriptors(1)
-   self.cyDesc = createTensorDescriptors(1)
+   self.cxDesc = self:createTensorDescriptors(1)
+   self.cyDesc = self:createTensorDescriptors(1)
 
    local dim = torch.IntTensor({self.hiddenSize, self.miniBatch, self.numLayers})
    local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]})
@@ -210,7 +212,7 @@ function RNN:resetCellDescriptors()
             stride:data())
 end
 
-local function makeContiguous(self, input, gradOutput)
+function RNN:makeContiguous(input, gradOutput)
    if not input:isContiguous() then
       self._input = self._input or input.new()
       self._input:typeAs(input):resizeAs(input):copy(input)
@@ -224,9 +226,19 @@ local function makeContiguous(self, input, gradOutput)
    return input, gradOutput
 end
 
+function RNN:resizeOutput(tensor)
+    return tensor:resize(self.seqLength, self.miniBatch, self.hiddenSize * self.numDirections)
+end
+
+function RNN:resizeHidden(tensor)
+    return tensor:resize(self.numLayers * self.numDirections, self.miniBatch, self.hiddenSize)
+end
+
 function RNN:updateOutput(input)
+    if (self.batchFirst) then
+        input = input:transpose(1, 2)
+    end
    assert(input:dim() == 3, 'input must have 3 dimensions: seqLength, miniBatch, inputSize')
-
    -- Decide which descriptors/tensors need to be updated.
    local resetRNN = not self.dropoutDesc or not self.rnnDesc
    local resetIO = not self.xDescs or not self.yDescs
@@ -263,11 +275,11 @@ function RNN:updateOutput(input)
       self:resetWeightDescriptor()
    end
 
-   local x = makeContiguous(self, input)
-   local y = self.output:resize(self.seqLength, self.miniBatch, self.hiddenSize)
+   local x = self:makeContiguous(input)
+   local y = self:resizeOutput(self.output)
    local w = self.weight
-   local hy = self.hiddenOutput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero()
-   local cy = self.cellOutput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero()
+   local hy = self:resizeHidden(self.hiddenOutput):zero()
+   local cy = self:resizeHidden(self.cellOutput):zero()
 
    -- Optionally use hiddenInput/cellInput parameters
    local hx = self.hiddenInput
@@ -275,14 +287,14 @@ function RNN:updateOutput(input)
 
    if hx then
       assert(hx:dim() == 3, 'hiddenInput must have 3 dimensions: numLayers, miniBatch, hiddenSize')
-      assert(hx:size(1) == self.numLayers, 'hiddenInput has incorrect number of layers!')
+      assert(hx:size(1) == self.numLayers * self.numDirections, 'hiddenInput has incorrect number of layers!')
       assert(hx:size(2) == self.miniBatch, 'hiddenInput has incorrect number of minibathes!')
       assert(hx:size(3) == self.hiddenSize, 'hiddenIinput has incorrect size!')
       assert(hx:isContiguous(), 'hiddenInput must be contiguous!') end
 
    if cx then
       assert(cx:dim() == 3, 'cellInput must have 3 dimensions: numLayers, miniBatch, hiddenSize')
-      assert(cx:size(1) == self.numLayers, 'cellInput has incorrect number of layers!')
+      assert(cx:size(1) == self.numLayers * self.numDirections, 'cellInput has incorrect number of layers!')
       assert(cx:size(2) == self.miniBatch, 'cellInput has incorrect number of minibathes!')
       assert(cx:size(3) == self.hiddenSize, 'cellInput has incorrect size!')
       assert(cx:isContiguous(), 'cellInput must be contiguous!')
@@ -338,11 +350,18 @@ function RNN:updateOutput(input)
                self.cyDesc[0], cy:data(),
                self.workspace:data(), self.workspace:size(1) * 4) -- sizeof(float)
    end
-
+    if (self.batchFirst) then
+        self.output = self.output:transpose(1, 2)
+    end
    return self.output
 end
 
 function RNN:updateGradInput(input, gradOutput)
+    if (self.batchFirst) then
+        input = input:transpose(1, 2)
+        gradOutput = gradOutput:transpose(1, 2)
+        self.output = self.output:transpose(1, 2)
+    end
    assert(input:dim() == 3, 'input should have 3 dimensions: seqLength, miniBatch, inputSize')
    assert(input:size(1) == self.seqLength, 'input has incorrect sequence length!')
    assert(input:size(2) == self.miniBatch, 'input has incorrect minibatch size!')
@@ -351,7 +370,7 @@ function RNN:updateGradInput(input, gradOutput)
    assert(gradOutput:isSameSizeAs(self.output), 'gradOutput has incorrect size!')
    assert(self.train, 'updateGradInput can only be called when training!')
 
-   local x, dy = makeContiguous(self, input, gradOutput)
+   local x, dy = self:makeContiguous(input, gradOutput)
    local y = self.output
    local w = self.weight
    local dx = self.gradInput:resizeAs(input)
@@ -359,13 +378,13 @@ function RNN:updateGradInput(input, gradOutput)
    local cx = self.cellInput
    local dhy = self.gradHiddenOutput
    local dcy = self.gradCellOutput
-   local dhx = self.gradHiddenInput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero()
-   local dcx = self.gradCellInput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero()
+   local dhx = self:resizeHidden(self.gradHiddenInput):zero()
+   local dcx = self:resizeHidden(self.gradCellInput):zero()
 
 
    if hx then
       assert(hx:dim() == 3, 'hiddenInput must have 3 dimensions: numLayers, miniBatch, hiddenSize')
-      assert(hx:size(1) == self.numLayers, 'hiddenInput has incorrect number of layers!')
+      assert(hx:size(1) == self.numLayers * self.numDirections, 'hiddenInput has incorrect number of layers!')
       assert(hx:size(2) == self.miniBatch, 'hiddenInput has incorrect minibatch size!')
       assert(hx:size(3) == self.hiddenSize, 'hiddenInput has incorrect size!')
       assert(hx:isContiguous(), 'hiddenInput must be contiguous!')
@@ -373,7 +392,7 @@ function RNN:updateGradInput(input, gradOutput)
 
    if cx then
       assert(cx:dim() == 3, 'cellInput must have 3 dimensions: numLayers, miniBatch, hiddenSize')
-      assert(cx:size(1) == self.numLayers, 'cellInput has incorrect number of layers!')
+      assert(cx:size(1) == self.numLayers * self.numDirections, 'cellInput has incorrect number of layers!')
       assert(cx:size(2) == self.miniBatch, 'cellInput has incorrect minibatch size!')
       assert(cx:size(3) == self.hiddenSize, 'cellInput has incorrect size!')
       assert(cx:isContiguous(), 'cellInput must be contiguous!')
@@ -382,7 +401,7 @@ function RNN:updateGradInput(input, gradOutput)
    if dhy then
       assert(dhy:dim() == 3, 'gradHiddenOutput must have 3 dimensions: ' ..
                              'numLayers, miniBatch, hiddenSize')
-      assert(dhy:size(1) == self.numLayers, 'gradHiddenOutput has incorrect number of layers!')
+      assert(dhy:size(1) == self.numLayers * self.numDirections, 'gradHiddenOutput has incorrect number of layers!')
       assert(dhy:size(2) == self.miniBatch, 'gradHiddenOutput has incorrect minibatch size!')
       assert(dhy:size(3) == self.hiddenSize, 'gradHiddenOutput has incorrect size!')
       assert(dhy:isContiguous(), 'gradHiddenOutput must be contiguous!')
@@ -391,7 +410,7 @@ function RNN:updateGradInput(input, gradOutput)
    if dcy then
       assert(dcy:dim() == 3, 'gradCellOutput must have 3 dimensions: ' ..
                              'numLayers, miniBatch, hiddenSize')
-      assert(dcy:size(1) == self.numLayers, 'gradCellOutput has incorrect number of layers!')
+      assert(dcy:size(1) == self.numLayers * self.numDirections, 'gradCellOutput has incorrect number of layers!')
       assert(dcy:size(2) == self.miniBatch, 'gradCellOutput has incorrect minibatch size!')
       assert(dcy:size(3) == self.hiddenSize, 'gradCellOutput has incorrect size!')
       assert(dcy:isContiguous(), 'gradCellOutput must be contiguous!')
@@ -412,11 +431,17 @@ function RNN:updateGradInput(input, gradOutput)
             self.cxDesc[0], dcx:data(),
             self.workspace:data(), self.workspace:size(1) * 4, -- sizeof(float)
             self.reserve:data(), self.reserve:size(1) * 4) -- sizeof(float)
-
+    if (self.batchFirst) then
+        self.gradInput = self.gradInput:transpose(1, 2)
+    end
    return self.gradInput
 end
 
 function RNN:accGradParameters(input, gradOutput, scale)
+    if (self.batchFirst) then
+        input = input:transpose(1, 2)
+        gradOutput = gradOutput:transpose(1, 2)
+    end
    scale = scale or 1
    if scale == 0 then return end
 
@@ -428,14 +453,14 @@ function RNN:accGradParameters(input, gradOutput, scale)
    assert(gradOutput:isSameSizeAs(self.output), 'gradOutput has incorrect size!')
    assert(self.train, 'accGradParameters can only be called when training!')
 
-   local x, dy = makeContiguous(self, input, gradOutput)
+   local x, dy = self:makeContiguous(input, gradOutput)
    local hx = self.hiddenInput
    local y = self.output
    local dw = self.gradWeight
 
    if hx then
       assert(hx:dim() == 3, 'hiddenInput must have 3 dimensions: numLayers, miniBatch, hiddenSize')
-      assert(hx:size(1) == self.numLayers, 'hiddenInput has incorrect number of layers!')
+      assert(hx:size(1) == self.numLayers * self.numDirections, 'hiddenInput has incorrect number of layers!')
       assert(hx:size(2) == self.miniBatch, 'hiddenInput has incorrect minibatch size!')
       assert(hx:size(3) == self.hiddenSize, 'hiddenIinput has incorrect size!')
       assert(hx:isContiguous(), 'hiddenInput must be contiguous!')
diff --git a/RNNReLU.lua b/RNNReLU.lua
new file mode 100644
index 0000000..3aa8ee9
--- /dev/null
+++ b/RNNReLU.lua
@@ -0,0 +1,7 @@
+local RNNReLU, parent = torch.class('cudnn.RNNReLU', 'cudnn.RNN')
+
+function RNNReLU:__init(inputSize, hiddenSize, numLayers, batchFirst)
+    parent.__init(self,inputSize, hiddenSize, numLayers, batchFirst)
+    self.mode = 'CUDNN_RNN_RELU'
+    self:reset()
+end
diff --git a/RNNTanh.lua b/RNNTanh.lua
new file mode 100644
index 0000000..98fa87c
--- /dev/null
+++ b/RNNTanh.lua
@@ -0,0 +1,7 @@
+local RNNTanh, parent = torch.class('cudnn.RNNTanh', 'cudnn.RNN')
+
+function RNNTanh:__init(inputSize, hiddenSize, numLayers, batchFirst)
+    parent.__init(self,inputSize, hiddenSize, numLayers, batchFirst)
+    self.mode = 'CUDNN_RNN_TANH'
+    self:reset()
+end
diff --git a/init.lua b/init.lua
index d7d2cf7..318570b 100644
--- a/init.lua
+++ b/init.lua
@@ -123,6 +123,11 @@ require('cudnn.VolumetricBatchNormalization')
 require('cudnn.SpatialCrossEntropyCriterion')
 require('cudnn.TemporalConvolution')
 require('cudnn.RNN')
+require('cudnn.RNNTanh')
+require('cudnn.RNNReLU')
+require('cudnn.BLSTM')
+require('cudnn.LSTM')
+require('cudnn.GRU')
 require('cudnn.functional')
 require('cudnn.convert')
 
diff --git a/test/test_rnn.lua b/test/test_rnn.lua
new file mode 100644
index 0000000..e7ee3de
--- /dev/null
+++ b/test/test_rnn.lua
@@ -0,0 +1,316 @@
+--[[
+--  Tests the implementation of RNN binding using the cudnn v5 library. Cross-check the checksums with cudnn reference
+--  sample checksums.
+-- ]]
+
+require 'cudnn'
+require 'cunn'
+local ffi = require 'ffi'
+local errcheck = cudnn.errcheck
+
+local cudnntest = torch.TestSuite()
+local mytester
+
+local tolerance = 300
+
+function cudnntest.testRNNRELU()
+    local miniBatch = 64
+    local seqLength = 20
+    local hiddenSize = 512
+    local numberOfLayers = 2
+    local numberOfLinearLayers = 2
+    local rnn = cudnn.RNNReLU(hiddenSize, hiddenSize, numberOfLayers)
+    rnn.mode = 'CUDNN_RNN_RELU'
+    local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn)
+
+    -- Checksums to check against are retrieved from cudnn RNN sample.
+    mytester:assertalmosteq(checkSums.localSumi, 1.315793E+06, tolerance, 'checkSum with reference for localsumi failed')
+    mytester:assertalmosteq(checkSums.localSumh, 1.315212E+05, tolerance, 'checkSum with reference for localSumh failed')
+    mytester:assertalmosteq(checkSums.localSumdi, 6.676003E+01, tolerance, 'checkSum with reference for localSumdi failed')
+    mytester:assertalmosteq(checkSums.localSumdh, 6.425067E+01, tolerance, 'checkSum with reference for localSumdh failed')
+    mytester:assertalmosteq(checkSums.localSumdw, 1.453750E+09, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testRNNBatchFirst()
+    local miniBatch = 64
+    local seqLength = 20
+    local hiddenSize = 512
+    local numberOfLayers = 2
+    local numberOfLinearLayers = 2
+    local batchFirst = true
+    local rnn = cudnn.RNNReLU(hiddenSize, hiddenSize, numberOfLayers, batchFirst)
+    rnn.mode = 'CUDNN_RNN_RELU'
+    local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst)
+
+    -- Checksums to check against are retrieved from cudnn RNN sample.
+    mytester:assertalmosteq(checkSums.localSumi, 1.315793E+06, tolerance, 'checkSum with reference for localsumi failed')
+    mytester:assertalmosteq(checkSums.localSumh, 1.315212E+05, tolerance, 'checkSum with reference for localSumh failed')
+    mytester:assertalmosteq(checkSums.localSumdi, 6.676003E+01, tolerance, 'checkSum with reference for localSumdi failed')
+    mytester:assertalmosteq(checkSums.localSumdh, 6.425067E+01, tolerance, 'checkSum with reference for localSumdh failed')
+    mytester:assertalmosteq(checkSums.localSumdw, 1.453750E+09, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testRNNTANH()
+    local miniBatch = 64
+    local seqLength = 20
+    local hiddenSize = 512
+    local numberOfLayers = 2
+    local numberOfLinearLayers = 2
+    local rnn = cudnn.RNNTanh(hiddenSize, hiddenSize, numberOfLayers)
+    rnn.mode = 'CUDNN_RNN_TANH'
+    local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn)
+
+    -- Checksums to check against are retrieved from cudnn RNN sample.
+    mytester:assertalmosteq(checkSums.localSumi, 6.319591E+05, tolerance, 'checkSum with reference for localsumi failed')
+    mytester:assertalmosteq(checkSums.localSumh, 6.319605E+04, tolerance, 'checkSum with reference for localSumh failed')
+    mytester:assertalmosteq(checkSums.localSumdi, 4.501830E+00, tolerance, 'checkSum with reference for localSumdi failed')
+    mytester:assertalmosteq(checkSums.localSumdh, 4.489546E+00, tolerance, 'checkSum with reference for localSumdh failed')
+    mytester:assertalmosteq(checkSums.localSumdw, 5.012598E+07, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testRNNLSTM()
+    local miniBatch = 64
+    local seqLength = 20
+    local hiddenSize = 512
+    local numberOfLayers = 2
+    local numberOfLinearLayers = 8
+    local rnn = cudnn.LSTM(hiddenSize, hiddenSize, numberOfLayers)
+    local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn)
+
+    -- Checksums to check against are retrieved from cudnn RNN sample.
+    mytester:assertalmosteq(checkSums.localSumi, 5.749536E+05, tolerance, 'checkSum with reference for localsumi failed')
+    mytester:assertalmosteq(checkSums.localSumc, 4.365091E+05, tolerance, 'checkSum with reference for localSumc failed')
+    mytester:assertalmosteq(checkSums.localSumh, 5.774818E+04, tolerance, 'checkSum with reference for localSumh failed')
+    mytester:assertalmosteq(checkSums.localSumdi, 3.842206E+02, tolerance, 'checkSum with reference for localSumdi failed')
+    mytester:assertalmosteq(checkSums.localSumdc, 9.323785E+03, tolerance, 'checkSum with reference for localSumdc failed')
+    mytester:assertalmosteq(checkSums.localSumdh, 1.182566E+01, tolerance, 'checkSum with reference for localSumdh failed')
+    mytester:assertalmosteq(checkSums.localSumdw, 4.313461E+08, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testRNNGRU()
+    local miniBatch = 64
+    local seqLength = 20
+    local hiddenSize = 512
+    local numberOfLayers = 2
+    local numberOfLinearLayers = 6
+    local rnn = cudnn.GRU(hiddenSize, hiddenSize, numberOfLayers)
+    local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn)
+    -- Checksums to check against are retrieved from cudnn RNN sample.
+    mytester:assertalmosteq(checkSums.localSumi, 6.358978E+05, tolerance, 'checkSum with reference for localsumi failed')
+    mytester:assertalmosteq(checkSums.localSumh, 6.281680E+04, tolerance, 'checkSum with reference for localSumh failed')
+    mytester:assertalmosteq(checkSums.localSumdi, 6.296622E+00, tolerance, 'checkSum with reference for localSumdi failed')
+    mytester:assertalmosteq(checkSums.localSumdh, 2.289960E+05, tolerance, 'checkSum with reference for localSumdh failed')
+    mytester:assertalmosteq(checkSums.localSumdw, 5.397419E+07, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testBiDirectionalRELURNN()
+    local miniBatch = 64
+    local seqLength = 20
+    local hiddenSize = 512
+    local numberOfLayers = 2
+    local numberOfLinearLayers = 2
+    local nbDirections = 2
+    local batchFirst = false
+    local rnn = cudnn.RNN(hiddenSize, hiddenSize, numberOfLayers)
+    rnn.bidirectional = 'CUDNN_BIDIRECTIONAL'
+    rnn.mode = 'CUDNN_RNN_RELU'
+    rnn.numDirections = 2
+
+    local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst, nbDirections)
+    -- Checksums to check against are retrieved from cudnn RNN sample.
+    mytester:assertalmosteq(checkSums.localSumi, 1.388634E+01, tolerance, 'checkSum with reference for localsumi failed')
+    mytester:assertalmosteq(checkSums.localSumh, 1.288997E+01, tolerance, 'checkSum with reference for localSumh failed')
+    mytester:assertalmosteq(checkSums.localSumdi, 1.288729E+01, tolerance, 'checkSum with reference for localSumdi failed')
+    mytester:assertalmosteq(checkSums.localSumdh, 1.279004E+01, tolerance, 'checkSum with reference for localSumdh failed')
+    mytester:assertalmosteq(checkSums.localSumdw, 7.061081E+07, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testBiDirectionalTANHRNN()
+    local miniBatch = 64
+    local seqLength = 20
+    local hiddenSize = 512
+    local numberOfLayers = 2
+    local numberOfLinearLayers = 2
+    local nbDirections = 2
+    local batchFirst = false
+    local rnn = cudnn.RNN(hiddenSize, hiddenSize, numberOfLayers)
+    rnn.bidirectional = 'CUDNN_BIDIRECTIONAL'
+    rnn.mode = 'CUDNN_RNN_TANH'
+    rnn.numDirections = 2
+
+    local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst, nbDirections)
+    -- Checksums to check against are retrieved from cudnn RNN sample.
+    mytester:assertalmosteq(checkSums.localSumi, 1.388634E+01, tolerance, 'checkSum with reference for localsumi failed')
+    mytester:assertalmosteq(checkSums.localSumh, 1.288997E+01, tolerance, 'checkSum with reference for localSumh failed')
+    mytester:assertalmosteq(checkSums.localSumdi, 1.288729E+01, tolerance, 'checkSum with reference for localSumdi failed')
+    mytester:assertalmosteq(checkSums.localSumdh, 1.279004E+01, tolerance, 'checkSum with reference for localSumdh failed')
+    mytester:assertalmosteq(checkSums.localSumdw, 7.061081E+07, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testBiDirectionalLSTMRNN()
+    local miniBatch = 64
+    local seqLength = 20
+    local hiddenSize = 512
+    local numberOfLayers = 2
+    local numberOfLinearLayers = 8
+    local nbDirections = 2
+    local batchFirst = false
+    local rnn = cudnn.BLSTM(hiddenSize, hiddenSize, numberOfLayers)
+
+    local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst, nbDirections)
+    -- Checksums to check against are retrieved from cudnn RNN sample.
+    mytester:assertalmosteq(checkSums.localSumi, 3.134097E+04, tolerance, 'checkSum with reference for localsumi failed')
+    mytester:assertalmosteq(checkSums.localSumc, 3.845626E+00, tolerance, 'checkSum with reference for localSumc failed')
+    mytester:assertalmosteq(checkSums.localSumh, 1.922855E+00, tolerance, 'checkSum with reference for localSumh failed')
+    mytester:assertalmosteq(checkSums.localSumdi, 4.794993E+00, tolerance, 'checkSum with reference for localSumdi failed')
+    mytester:assertalmosteq(checkSums.localSumdc, 2.870925E+04, tolerance, 'checkSum with reference for localSumdc failed')
+    mytester:assertalmosteq(checkSums.localSumdh, 2.468645E+00, tolerance, 'checkSum with reference for localSumdh failed')
+    mytester:assertalmosteq(checkSums.localSumdw, 1.121568E+08, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testBiDirectionalGRURNN()
+    local miniBatch = 64
+    local seqLength = 20
+    local hiddenSize = 512
+    local numberOfLayers = 2
+    local numberOfLinearLayers = 6
+    local nbDirections = 2
+    local batchFirst = false
+    local rnn = cudnn.RNN(hiddenSize, hiddenSize, numberOfLayers)
+    rnn.bidirectional = 'CUDNN_BIDIRECTIONAL'
+    rnn.mode = 'CUDNN_GRU'
+    rnn.numDirections = 2
+
+    local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst, nbDirections)
+    -- Checksums to check against are retrieved from cudnn RNN sample.
+    mytester:assertalmosteq(checkSums.localSumi, 6.555183E+04, tolerance, 'checkSum with reference for localsumi failed')
+    mytester:assertalmosteq(checkSums.localSumh, 5.830924E+00, tolerance, 'checkSum with reference for localSumh failed')
+    mytester:assertalmosteq(checkSums.localSumdi, 4.271801E+00, tolerance, 'checkSum with reference for localSumdi failed')
+    mytester:assertalmosteq(checkSums.localSumdh, 6.555744E+04, tolerance, 'checkSum with reference for localSumdh failed')
+    mytester:assertalmosteq(checkSums.localSumdw, 1.701796E+08, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+--[[
+-- Method gets Checksums of RNN to compare with ref Checksums in cudnn RNN C sample.
+-- ]]
+function getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst, nbDirections)
+    local biDirectionalScale = nbDirections or 1
+    -- Reset the rnn and weight descriptor (since we are manually setting values for matrix/bias.
+    rnn:reset()
+    rnn:resetWeightDescriptor()
+    local input
+    if (batchFirst) then
+        input = torch.CudaTensor(miniBatch, seqLength, hiddenSize):fill(1)
+    else
+        input = torch.CudaTensor(seqLength, miniBatch, hiddenSize):fill(1) -- Input initialised to 1s.
+    end
+    if (biDirectionalScale == 2) then
+        rnn.weight:fill(1 / rnn.weight:size(1))
+    else
+        -- Matrices are initialised to 1 / matrixSize, biases to 1.
+        for layer = 0, numberOfLayers - 1 do
+            for layerId = 0, numberOfLinearLayers - 1 do
+                local linLayerMatDesc = rnn:createFilterDescriptors(1)
+                local matrixPointer = ffi.new("float*[1]")
+                errcheck('cudnnGetRNNLinLayerMatrixParams',
+                    cudnn.getHandle(),
+                    rnn.rnnDesc[0],
+                    layer,
+                    rnn.xDescs,
+                    rnn.wDesc[0],
+                    rnn.weight:data(),
+                    layerId,
+                    linLayerMatDesc[0],
+                    ffi.cast("void**", matrixPointer))
+
+                local dataType = 'CUDNN_DATA_FLOAT'
+                local format = 'CUDNN_TENSOR_NCHW'
+                local nbDims = torch.IntTensor(1)
+
+                local minDim = 3
+                local filterDimA = torch.ones(minDim):int()
+                errcheck('cudnnGetFilterNdDescriptor',
+                    linLayerMatDesc[0],
+                    minDim,
+                    ffi.cast("cudnnDataType_t*", dataType),
+                    ffi.cast("cudnnDataType_t*", format),
+                    nbDims:data(),
+                    filterDimA:data())
+
+                local offset = matrixPointer[0] - rnn.weight:data()
+                local weightTensor = torch.CudaTensor(rnn.weight:storage(), offset + 1, filterDimA:prod())
+                weightTensor:fill(1.0 / filterDimA:prod())
+
+                local linLayerBiasDesc = rnn:createFilterDescriptors(1)
+                local biasPointer = ffi.new("float*[1]")
+                errcheck('cudnnGetRNNLinLayerBiasParams',
+                    cudnn.getHandle(),
+                    rnn.rnnDesc[0],
+                    layer,
+                    rnn.xDescs,
+                    rnn.wDesc[0],
+                    rnn.weight:data(),
+                    layerId,
+                    linLayerBiasDesc[0],
+                    ffi.cast("void**", biasPointer))
+
+                local dataType = 'CUDNN_DATA_FLOAT'
+                local format = 'CUDNN_TENSOR_NCHW'
+                local nbDims = torch.IntTensor(1)
+                local filterDimA = torch.ones(minDim):int()
+
+                errcheck('cudnnGetFilterNdDescriptor',
+                    linLayerBiasDesc[0],
+                    minDim,
+                    ffi.cast("cudnnDataType_t*", dataType),
+                    ffi.cast("cudnnDataType_t*", format),
+                    nbDims:data(),
+                    filterDimA:data())
+
+                local offset = biasPointer[0] - rnn.weight:data()
+                local biasTensor = torch.CudaTensor(rnn.weight:storage(), offset + 1, filterDimA:prod())
+                biasTensor:fill(1)
+            end
+        end
+    end
+    -- Set hx/cx/dhy/dcy data to 1s.
+    rnn.hiddenInput = torch.CudaTensor(numberOfLayers * biDirectionalScale, miniBatch, hiddenSize):fill(1)
+    rnn.cellInput = torch.CudaTensor(numberOfLayers * biDirectionalScale, miniBatch, hiddenSize):fill(1)
+    rnn.gradHiddenOutput = torch.CudaTensor(numberOfLayers * biDirectionalScale, miniBatch, hiddenSize):fill(1)
+    rnn.gradCellOutput = torch.CudaTensor(numberOfLayers * biDirectionalScale, miniBatch, hiddenSize):fill(1)
+    local testOutputi = rnn:forward(input)
+    -- gradInput set to 1s.
+    local gradInput
+    if(batchFirst) then
+        gradInput = torch.CudaTensor(miniBatch, seqLength, hiddenSize * biDirectionalScale):fill(1)
+    else
+        gradInput = torch.CudaTensor(seqLength, miniBatch, hiddenSize * biDirectionalScale):fill(1)
+    end
+    rnn:backward(input, gradInput)
+
+    -- Sum up all values for each.
+    local localSumi = torch.sum(testOutputi)
+    local localSumh = torch.sum(rnn.hiddenOutput)
+    local localSumc = torch.sum(rnn.cellOutput)
+
+    local localSumdi = torch.sum(rnn.gradInput)
+    local localSumdh = torch.sum(rnn.gradHiddenInput)
+    local localSumdc = torch.sum(rnn.gradCellInput)
+
+    local localSumdw = torch.sum(rnn.gradWeight)
+
+    local checkSums = {
+        localSumi = localSumi,
+        localSumh = localSumh,
+        localSumc = localSumc,
+        localSumdi = localSumdi,
+        localSumdh = localSumdh,
+        localSumdc = localSumdc,
+        localSumdw = localSumdw
+    }
+    return checkSums
+end
+
+mytester = torch.Tester()
+mytester:add(cudnntest)
+mytester:run()
+\ No newline at end of file
author	SeanNaren <taz838@hotmail.co.uk>	2016-04-11 22:21:40 +0300
committer	Boris Fomitchev <bfomitchev@nvidia.com>	2016-04-18 23:19:26 +0300
commit	96f13324d07b0b80fb429f6dbe35fa5402234968 (patch)
tree	1440a87a19522ef62d5b28ba98e327591c4796f0
parent	60a66872730eceaf4f769c281e2ad7289272323e (diff)