Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/soumith/cudnn.torch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSeanNaren <taz838@hotmail.co.uk>2016-04-11 22:21:40 +0300
committerBoris Fomitchev <bfomitchev@nvidia.com>2016-04-18 23:19:26 +0300
commit96f13324d07b0b80fb429f6dbe35fa5402234968 (patch)
tree1440a87a19522ef62d5b28ba98e327591c4796f0
parent60a66872730eceaf4f769c281e2ad7289272323e (diff)
Added tests, modified README and added RNN modules
-rw-r--r--BLSTM.lua9
-rw-r--r--GRU.lua7
-rw-r--r--LSTM.lua7
-rw-r--r--README.md9
-rw-r--r--RNN.lua103
-rw-r--r--RNNReLU.lua7
-rw-r--r--RNNTanh.lua7
-rw-r--r--init.lua5
-rw-r--r--test/test_rnn.lua316
9 files changed, 431 insertions, 39 deletions
diff --git a/BLSTM.lua b/BLSTM.lua
new file mode 100644
index 0000000..8feebf1
--- /dev/null
+++ b/BLSTM.lua
@@ -0,0 +1,9 @@
+local BLSTM, parent = torch.class('cudnn.BLSTM', 'cudnn.RNN')
+
+function BLSTM:__init(inputSize, hiddenSize, numLayers, batchFirst)
+ parent.__init(self,inputSize, hiddenSize, numLayers, batchFirst)
+ self.bidirectional = 'CUDNN_BIDIRECTIONAL'
+ self.mode = 'CUDNN_LSTM'
+ self.numDirections = 2
+ self:reset()
+end
diff --git a/GRU.lua b/GRU.lua
new file mode 100644
index 0000000..615bdf7
--- /dev/null
+++ b/GRU.lua
@@ -0,0 +1,7 @@
+local GRU, parent = torch.class('cudnn.GRU', 'cudnn.RNN')
+
+function GRU:__init(inputSize, hiddenSize, numLayers, batchFirst)
+ parent.__init(self,inputSize, hiddenSize, numLayers, batchFirst)
+ self.mode = 'CUDNN_GRU'
+ self:reset()
+end
diff --git a/LSTM.lua b/LSTM.lua
new file mode 100644
index 0000000..29c199c
--- /dev/null
+++ b/LSTM.lua
@@ -0,0 +1,7 @@
+local LSTM, parent = torch.class('cudnn.LSTM', 'cudnn.RNN')
+
+function LSTM:__init(inputSize, hiddenSize, numLayers, batchFirst)
+ parent.__init(self,inputSize, hiddenSize, numLayers, batchFirst)
+ self.mode = 'CUDNN_LSTM'
+ self:reset()
+end
diff --git a/README.md b/README.md
index 37db4d0..b67ced3 100644
--- a/README.md
+++ b/README.md
@@ -37,6 +37,15 @@ cudnn.SpatialCrossEntropyCriterion() -- A spatial version of LogSoftMax +
cudnn.VolumetricConvolution(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH)
cudnn.VolumetricMaxPooling(kT, kW, kH, dT, dW, dH, padT, padW, padH)
cudnn.VolumetricAveragePooling(kT, kW, kH, dT, dW, dH, padT, padW, padH)
+
+-- Recurrent Modules
+
+-- All inputs have to be 3D. Accepts input of seqLength x batch x inputDim, or batch x seqLength x inputDim if batchFirst set to true.
+cudnn.RNNReLU(inputDim, outputDim, numberOfLayers, [batchFirst = false])
+cudnn.RNNTanh(inputDim, outputDim, numberOfLayers, [batchFirst = false])
+cudnn.LSTM(inputDim, outputDim, numberOfLayers, [batchFirst = false])
+cudnn.GRU(inputDim, outputDim, numberOfLayers, [batchFirst = false])
+cudnn.BLSTM(inputDim, outputDim, numberOfLayers, [batchFirst = false])
```
### Modes
diff --git a/RNN.lua b/RNN.lua
index 0b7ac09..a4840f7 100644
--- a/RNN.lua
+++ b/RNN.lua
@@ -2,7 +2,7 @@ local RNN, parent = torch.class('cudnn.RNN', 'nn.Module')
local ffi = require 'ffi'
local errcheck = cudnn.errcheck
-function RNN:__init(inputSize, hiddenSize, numLayers)
+function RNN:__init(inputSize, hiddenSize, numLayers, batchFirst)
parent.__init(self)
self.datatype = 'CUDNN_DATA_FLOAT'
@@ -12,10 +12,12 @@ function RNN:__init(inputSize, hiddenSize, numLayers)
self.miniBatch = 1
self.numLayers = numLayers
self.bidirectional = 'CUDNN_UNIDIRECTIONAL'
+ self.numDirections = 1 -- set to 2 for bi-directional.
self.inputMode = 'CUDNN_LINEAR_INPUT'
self.mode = 'CUDNN_RNN_RELU'
self.dropout = 0
self.seed = 0x01234567
+ self.batchFirst = batchFirst or false -- Set to true for batch x time x inputdim.
self.gradInput = torch.CudaTensor()
self.output = torch.CudaTensor()
@@ -50,7 +52,7 @@ function RNN:reset(stdv)
self.gradWeight:resizeAs(self.weight):zero()
end
-local function createDescriptors(count, descs_type, create_func, destroy_func)
+function RNN:createDescriptors(count, descs_type, create_func, destroy_func)
local ds = ffi.new(descs_type, count)
for i = 0, count - 1 do
errcheck(create_func, ds + i)
@@ -64,29 +66,29 @@ local function createDescriptors(count, descs_type, create_func, destroy_func)
return ds
end
-local function createDropoutDescriptors(count)
- return createDescriptors(count,
+function RNN:createDropoutDescriptors(count)
+ return self:createDescriptors(count,
'cudnnDropoutDescriptor_t[?]',
'cudnnCreateDropoutDescriptor',
'cudnnDestroyDropoutDescriptor')
end
-local function createFilterDescriptors(count)
- return createDescriptors(count,
+function RNN:createFilterDescriptors(count)
+ return self:createDescriptors(count,
'cudnnFilterDescriptor_t[?]',
'cudnnCreateFilterDescriptor',
'cudnnDestroyFilterDescriptor')
end
-local function createRNNDescriptors(count)
- return createDescriptors(count,
+function RNN:createRNNDescriptors(count)
+ return self:createDescriptors(count,
'cudnnRNNDescriptor_t[?]',
'cudnnCreateRNNDescriptor',
'cudnnDestroyRNNDescriptor')
end
-local function createTensorDescriptors(count)
- return createDescriptors(count,
+function RNN:createTensorDescriptors(count)
+ return self:createDescriptors(count,
'cudnnTensorDescriptor_t[?]',
'cudnnCreateTensorDescriptor',
'cudnnDestroyTensorDescriptor')
@@ -94,7 +96,7 @@ end
function RNN:resetDropoutDescriptor()
if not self.dropoutDesc then
- self.dropoutDesc = createDropoutDescriptors(1)
+ self.dropoutDesc = self:createDropoutDescriptors(1)
end
self.dropoutStatesSize = torch.LongTensor(1)
@@ -113,7 +115,7 @@ end
function RNN:resetRNNDescriptor()
if not self.rnnDesc then
- self.rnnDesc = createRNNDescriptors(1)
+ self.rnnDesc = self:createRNNDescriptors(1)
end
errcheck('cudnnSetRNNDescriptor',
@@ -130,7 +132,7 @@ end
function RNN:resetWeightDescriptor()
if not self.wDesc then
- self.wDesc = createFilterDescriptors(1)
+ self.wDesc = self:createFilterDescriptors(1)
end
local dim = torch.IntTensor({self.weight:size(1), 1, 1})
@@ -144,8 +146,8 @@ function RNN:resetWeightDescriptor()
end
function RNN:resetIODescriptors()
- self.xDescs = createTensorDescriptors(self.seqLength)
- self.yDescs = createTensorDescriptors(self.seqLength)
+ self.xDescs = self:createTensorDescriptors(self.seqLength)
+ self.yDescs = self:createTensorDescriptors(self.seqLength)
for i = 0, self.seqLength - 1 do
local dim = torch.IntTensor({self.inputSize, self.miniBatch, self.seqLength})
@@ -157,7 +159,7 @@ function RNN:resetIODescriptors()
dim:data(),
stride:data())
- local dim = torch.IntTensor({self.hiddenSize, self.miniBatch, self.seqLength})
+ local dim = torch.IntTensor({self.hiddenSize * self.numDirections, self.miniBatch, self.seqLength})
local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]})
errcheck('cudnnSetTensorNdDescriptor',
self.yDescs[i],
@@ -169,8 +171,8 @@ function RNN:resetIODescriptors()
end
function RNN:resetHiddenDescriptors()
- self.hxDesc = createTensorDescriptors(1)
- self.hyDesc = createTensorDescriptors(1)
+ self.hxDesc = self:createTensorDescriptors(1)
+ self.hyDesc = self:createTensorDescriptors(1)
local dim = torch.IntTensor({self.hiddenSize, self.miniBatch, self.numLayers})
local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]})
@@ -190,8 +192,8 @@ function RNN:resetHiddenDescriptors()
end
function RNN:resetCellDescriptors()
- self.cxDesc = createTensorDescriptors(1)
- self.cyDesc = createTensorDescriptors(1)
+ self.cxDesc = self:createTensorDescriptors(1)
+ self.cyDesc = self:createTensorDescriptors(1)
local dim = torch.IntTensor({self.hiddenSize, self.miniBatch, self.numLayers})
local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]})
@@ -210,7 +212,7 @@ function RNN:resetCellDescriptors()
stride:data())
end
-local function makeContiguous(self, input, gradOutput)
+function RNN:makeContiguous(input, gradOutput)
if not input:isContiguous() then
self._input = self._input or input.new()
self._input:typeAs(input):resizeAs(input):copy(input)
@@ -224,9 +226,19 @@ local function makeContiguous(self, input, gradOutput)
return input, gradOutput
end
+function RNN:resizeOutput(tensor)
+ return tensor:resize(self.seqLength, self.miniBatch, self.hiddenSize * self.numDirections)
+end
+
+function RNN:resizeHidden(tensor)
+ return tensor:resize(self.numLayers * self.numDirections, self.miniBatch, self.hiddenSize)
+end
+
function RNN:updateOutput(input)
+ if (self.batchFirst) then
+ input = input:transpose(1, 2)
+ end
assert(input:dim() == 3, 'input must have 3 dimensions: seqLength, miniBatch, inputSize')
-
-- Decide which descriptors/tensors need to be updated.
local resetRNN = not self.dropoutDesc or not self.rnnDesc
local resetIO = not self.xDescs or not self.yDescs
@@ -263,11 +275,11 @@ function RNN:updateOutput(input)
self:resetWeightDescriptor()
end
- local x = makeContiguous(self, input)
- local y = self.output:resize(self.seqLength, self.miniBatch, self.hiddenSize)
+ local x = self:makeContiguous(input)
+ local y = self:resizeOutput(self.output)
local w = self.weight
- local hy = self.hiddenOutput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero()
- local cy = self.cellOutput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero()
+ local hy = self:resizeHidden(self.hiddenOutput):zero()
+ local cy = self:resizeHidden(self.cellOutput):zero()
-- Optionally use hiddenInput/cellInput parameters
local hx = self.hiddenInput
@@ -275,14 +287,14 @@ function RNN:updateOutput(input)
if hx then
assert(hx:dim() == 3, 'hiddenInput must have 3 dimensions: numLayers, miniBatch, hiddenSize')
- assert(hx:size(1) == self.numLayers, 'hiddenInput has incorrect number of layers!')
+ assert(hx:size(1) == self.numLayers * self.numDirections, 'hiddenInput has incorrect number of layers!')
assert(hx:size(2) == self.miniBatch, 'hiddenInput has incorrect number of minibathes!')
assert(hx:size(3) == self.hiddenSize, 'hiddenIinput has incorrect size!')
assert(hx:isContiguous(), 'hiddenInput must be contiguous!') end
if cx then
assert(cx:dim() == 3, 'cellInput must have 3 dimensions: numLayers, miniBatch, hiddenSize')
- assert(cx:size(1) == self.numLayers, 'cellInput has incorrect number of layers!')
+ assert(cx:size(1) == self.numLayers * self.numDirections, 'cellInput has incorrect number of layers!')
assert(cx:size(2) == self.miniBatch, 'cellInput has incorrect number of minibathes!')
assert(cx:size(3) == self.hiddenSize, 'cellInput has incorrect size!')
assert(cx:isContiguous(), 'cellInput must be contiguous!')
@@ -338,11 +350,18 @@ function RNN:updateOutput(input)
self.cyDesc[0], cy:data(),
self.workspace:data(), self.workspace:size(1) * 4) -- sizeof(float)
end
-
+ if (self.batchFirst) then
+ self.output = self.output:transpose(1, 2)
+ end
return self.output
end
function RNN:updateGradInput(input, gradOutput)
+ if (self.batchFirst) then
+ input = input:transpose(1, 2)
+ gradOutput = gradOutput:transpose(1, 2)
+ self.output = self.output:transpose(1, 2)
+ end
assert(input:dim() == 3, 'input should have 3 dimensions: seqLength, miniBatch, inputSize')
assert(input:size(1) == self.seqLength, 'input has incorrect sequence length!')
assert(input:size(2) == self.miniBatch, 'input has incorrect minibatch size!')
@@ -351,7 +370,7 @@ function RNN:updateGradInput(input, gradOutput)
assert(gradOutput:isSameSizeAs(self.output), 'gradOutput has incorrect size!')
assert(self.train, 'updateGradInput can only be called when training!')
- local x, dy = makeContiguous(self, input, gradOutput)
+ local x, dy = self:makeContiguous(input, gradOutput)
local y = self.output
local w = self.weight
local dx = self.gradInput:resizeAs(input)
@@ -359,13 +378,13 @@ function RNN:updateGradInput(input, gradOutput)
local cx = self.cellInput
local dhy = self.gradHiddenOutput
local dcy = self.gradCellOutput
- local dhx = self.gradHiddenInput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero()
- local dcx = self.gradCellInput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero()
+ local dhx = self:resizeHidden(self.gradHiddenInput):zero()
+ local dcx = self:resizeHidden(self.gradCellInput):zero()
if hx then
assert(hx:dim() == 3, 'hiddenInput must have 3 dimensions: numLayers, miniBatch, hiddenSize')
- assert(hx:size(1) == self.numLayers, 'hiddenInput has incorrect number of layers!')
+ assert(hx:size(1) == self.numLayers * self.numDirections, 'hiddenInput has incorrect number of layers!')
assert(hx:size(2) == self.miniBatch, 'hiddenInput has incorrect minibatch size!')
assert(hx:size(3) == self.hiddenSize, 'hiddenInput has incorrect size!')
assert(hx:isContiguous(), 'hiddenInput must be contiguous!')
@@ -373,7 +392,7 @@ function RNN:updateGradInput(input, gradOutput)
if cx then
assert(cx:dim() == 3, 'cellInput must have 3 dimensions: numLayers, miniBatch, hiddenSize')
- assert(cx:size(1) == self.numLayers, 'cellInput has incorrect number of layers!')
+ assert(cx:size(1) == self.numLayers * self.numDirections, 'cellInput has incorrect number of layers!')
assert(cx:size(2) == self.miniBatch, 'cellInput has incorrect minibatch size!')
assert(cx:size(3) == self.hiddenSize, 'cellInput has incorrect size!')
assert(cx:isContiguous(), 'cellInput must be contiguous!')
@@ -382,7 +401,7 @@ function RNN:updateGradInput(input, gradOutput)
if dhy then
assert(dhy:dim() == 3, 'gradHiddenOutput must have 3 dimensions: ' ..
'numLayers, miniBatch, hiddenSize')
- assert(dhy:size(1) == self.numLayers, 'gradHiddenOutput has incorrect number of layers!')
+ assert(dhy:size(1) == self.numLayers * self.numDirections, 'gradHiddenOutput has incorrect number of layers!')
assert(dhy:size(2) == self.miniBatch, 'gradHiddenOutput has incorrect minibatch size!')
assert(dhy:size(3) == self.hiddenSize, 'gradHiddenOutput has incorrect size!')
assert(dhy:isContiguous(), 'gradHiddenOutput must be contiguous!')
@@ -391,7 +410,7 @@ function RNN:updateGradInput(input, gradOutput)
if dcy then
assert(dcy:dim() == 3, 'gradCellOutput must have 3 dimensions: ' ..
'numLayers, miniBatch, hiddenSize')
- assert(dcy:size(1) == self.numLayers, 'gradCellOutput has incorrect number of layers!')
+ assert(dcy:size(1) == self.numLayers * self.numDirections, 'gradCellOutput has incorrect number of layers!')
assert(dcy:size(2) == self.miniBatch, 'gradCellOutput has incorrect minibatch size!')
assert(dcy:size(3) == self.hiddenSize, 'gradCellOutput has incorrect size!')
assert(dcy:isContiguous(), 'gradCellOutput must be contiguous!')
@@ -412,11 +431,17 @@ function RNN:updateGradInput(input, gradOutput)
self.cxDesc[0], dcx:data(),
self.workspace:data(), self.workspace:size(1) * 4, -- sizeof(float)
self.reserve:data(), self.reserve:size(1) * 4) -- sizeof(float)
-
+ if (self.batchFirst) then
+ self.gradInput = self.gradInput:transpose(1, 2)
+ end
return self.gradInput
end
function RNN:accGradParameters(input, gradOutput, scale)
+ if (self.batchFirst) then
+ input = input:transpose(1, 2)
+ gradOutput = gradOutput:transpose(1, 2)
+ end
scale = scale or 1
if scale == 0 then return end
@@ -428,14 +453,14 @@ function RNN:accGradParameters(input, gradOutput, scale)
assert(gradOutput:isSameSizeAs(self.output), 'gradOutput has incorrect size!')
assert(self.train, 'accGradParameters can only be called when training!')
- local x, dy = makeContiguous(self, input, gradOutput)
+ local x, dy = self:makeContiguous(input, gradOutput)
local hx = self.hiddenInput
local y = self.output
local dw = self.gradWeight
if hx then
assert(hx:dim() == 3, 'hiddenInput must have 3 dimensions: numLayers, miniBatch, hiddenSize')
- assert(hx:size(1) == self.numLayers, 'hiddenInput has incorrect number of layers!')
+ assert(hx:size(1) == self.numLayers * self.numDirections, 'hiddenInput has incorrect number of layers!')
assert(hx:size(2) == self.miniBatch, 'hiddenInput has incorrect minibatch size!')
assert(hx:size(3) == self.hiddenSize, 'hiddenIinput has incorrect size!')
assert(hx:isContiguous(), 'hiddenInput must be contiguous!')
diff --git a/RNNReLU.lua b/RNNReLU.lua
new file mode 100644
index 0000000..3aa8ee9
--- /dev/null
+++ b/RNNReLU.lua
@@ -0,0 +1,7 @@
+local RNNReLU, parent = torch.class('cudnn.RNNReLU', 'cudnn.RNN')
+
+function RNNReLU:__init(inputSize, hiddenSize, numLayers, batchFirst)
+ parent.__init(self,inputSize, hiddenSize, numLayers, batchFirst)
+ self.mode = 'CUDNN_RNN_RELU'
+ self:reset()
+end
diff --git a/RNNTanh.lua b/RNNTanh.lua
new file mode 100644
index 0000000..98fa87c
--- /dev/null
+++ b/RNNTanh.lua
@@ -0,0 +1,7 @@
+local RNNTanh, parent = torch.class('cudnn.RNNTanh', 'cudnn.RNN')
+
+function RNNTanh:__init(inputSize, hiddenSize, numLayers, batchFirst)
+ parent.__init(self,inputSize, hiddenSize, numLayers, batchFirst)
+ self.mode = 'CUDNN_RNN_TANH'
+ self:reset()
+end
diff --git a/init.lua b/init.lua
index d7d2cf7..318570b 100644
--- a/init.lua
+++ b/init.lua
@@ -123,6 +123,11 @@ require('cudnn.VolumetricBatchNormalization')
require('cudnn.SpatialCrossEntropyCriterion')
require('cudnn.TemporalConvolution')
require('cudnn.RNN')
+require('cudnn.RNNTanh')
+require('cudnn.RNNReLU')
+require('cudnn.BLSTM')
+require('cudnn.LSTM')
+require('cudnn.GRU')
require('cudnn.functional')
require('cudnn.convert')
diff --git a/test/test_rnn.lua b/test/test_rnn.lua
new file mode 100644
index 0000000..e7ee3de
--- /dev/null
+++ b/test/test_rnn.lua
@@ -0,0 +1,316 @@
+--[[
+-- Tests the implementation of RNN binding using the cudnn v5 library. Cross-check the checksums with cudnn reference
+-- sample checksums.
+-- ]]
+
+require 'cudnn'
+require 'cunn'
+local ffi = require 'ffi'
+local errcheck = cudnn.errcheck
+
+local cudnntest = torch.TestSuite()
+local mytester
+
+local tolerance = 300
+
+function cudnntest.testRNNRELU()
+ local miniBatch = 64
+ local seqLength = 20
+ local hiddenSize = 512
+ local numberOfLayers = 2
+ local numberOfLinearLayers = 2
+ local rnn = cudnn.RNNReLU(hiddenSize, hiddenSize, numberOfLayers)
+ rnn.mode = 'CUDNN_RNN_RELU'
+ local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn)
+
+ -- Checksums to check against are retrieved from cudnn RNN sample.
+ mytester:assertalmosteq(checkSums.localSumi, 1.315793E+06, tolerance, 'checkSum with reference for localsumi failed')
+ mytester:assertalmosteq(checkSums.localSumh, 1.315212E+05, tolerance, 'checkSum with reference for localSumh failed')
+ mytester:assertalmosteq(checkSums.localSumdi, 6.676003E+01, tolerance, 'checkSum with reference for localSumdi failed')
+ mytester:assertalmosteq(checkSums.localSumdh, 6.425067E+01, tolerance, 'checkSum with reference for localSumdh failed')
+ mytester:assertalmosteq(checkSums.localSumdw, 1.453750E+09, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testRNNBatchFirst()
+ local miniBatch = 64
+ local seqLength = 20
+ local hiddenSize = 512
+ local numberOfLayers = 2
+ local numberOfLinearLayers = 2
+ local batchFirst = true
+ local rnn = cudnn.RNNReLU(hiddenSize, hiddenSize, numberOfLayers, batchFirst)
+ rnn.mode = 'CUDNN_RNN_RELU'
+ local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst)
+
+ -- Checksums to check against are retrieved from cudnn RNN sample.
+ mytester:assertalmosteq(checkSums.localSumi, 1.315793E+06, tolerance, 'checkSum with reference for localsumi failed')
+ mytester:assertalmosteq(checkSums.localSumh, 1.315212E+05, tolerance, 'checkSum with reference for localSumh failed')
+ mytester:assertalmosteq(checkSums.localSumdi, 6.676003E+01, tolerance, 'checkSum with reference for localSumdi failed')
+ mytester:assertalmosteq(checkSums.localSumdh, 6.425067E+01, tolerance, 'checkSum with reference for localSumdh failed')
+ mytester:assertalmosteq(checkSums.localSumdw, 1.453750E+09, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testRNNTANH()
+ local miniBatch = 64
+ local seqLength = 20
+ local hiddenSize = 512
+ local numberOfLayers = 2
+ local numberOfLinearLayers = 2
+ local rnn = cudnn.RNNTanh(hiddenSize, hiddenSize, numberOfLayers)
+ rnn.mode = 'CUDNN_RNN_TANH'
+ local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn)
+
+ -- Checksums to check against are retrieved from cudnn RNN sample.
+ mytester:assertalmosteq(checkSums.localSumi, 6.319591E+05, tolerance, 'checkSum with reference for localsumi failed')
+ mytester:assertalmosteq(checkSums.localSumh, 6.319605E+04, tolerance, 'checkSum with reference for localSumh failed')
+ mytester:assertalmosteq(checkSums.localSumdi, 4.501830E+00, tolerance, 'checkSum with reference for localSumdi failed')
+ mytester:assertalmosteq(checkSums.localSumdh, 4.489546E+00, tolerance, 'checkSum with reference for localSumdh failed')
+ mytester:assertalmosteq(checkSums.localSumdw, 5.012598E+07, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testRNNLSTM()
+ local miniBatch = 64
+ local seqLength = 20
+ local hiddenSize = 512
+ local numberOfLayers = 2
+ local numberOfLinearLayers = 8
+ local rnn = cudnn.LSTM(hiddenSize, hiddenSize, numberOfLayers)
+ local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn)
+
+ -- Checksums to check against are retrieved from cudnn RNN sample.
+ mytester:assertalmosteq(checkSums.localSumi, 5.749536E+05, tolerance, 'checkSum with reference for localsumi failed')
+ mytester:assertalmosteq(checkSums.localSumc, 4.365091E+05, tolerance, 'checkSum with reference for localSumc failed')
+ mytester:assertalmosteq(checkSums.localSumh, 5.774818E+04, tolerance, 'checkSum with reference for localSumh failed')
+ mytester:assertalmosteq(checkSums.localSumdi, 3.842206E+02, tolerance, 'checkSum with reference for localSumdi failed')
+ mytester:assertalmosteq(checkSums.localSumdc, 9.323785E+03, tolerance, 'checkSum with reference for localSumdc failed')
+ mytester:assertalmosteq(checkSums.localSumdh, 1.182566E+01, tolerance, 'checkSum with reference for localSumdh failed')
+ mytester:assertalmosteq(checkSums.localSumdw, 4.313461E+08, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testRNNGRU()
+ local miniBatch = 64
+ local seqLength = 20
+ local hiddenSize = 512
+ local numberOfLayers = 2
+ local numberOfLinearLayers = 6
+ local rnn = cudnn.GRU(hiddenSize, hiddenSize, numberOfLayers)
+ local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn)
+ -- Checksums to check against are retrieved from cudnn RNN sample.
+ mytester:assertalmosteq(checkSums.localSumi, 6.358978E+05, tolerance, 'checkSum with reference for localsumi failed')
+ mytester:assertalmosteq(checkSums.localSumh, 6.281680E+04, tolerance, 'checkSum with reference for localSumh failed')
+ mytester:assertalmosteq(checkSums.localSumdi, 6.296622E+00, tolerance, 'checkSum with reference for localSumdi failed')
+ mytester:assertalmosteq(checkSums.localSumdh, 2.289960E+05, tolerance, 'checkSum with reference for localSumdh failed')
+ mytester:assertalmosteq(checkSums.localSumdw, 5.397419E+07, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testBiDirectionalRELURNN()
+ local miniBatch = 64
+ local seqLength = 20
+ local hiddenSize = 512
+ local numberOfLayers = 2
+ local numberOfLinearLayers = 2
+ local nbDirections = 2
+ local batchFirst = false
+ local rnn = cudnn.RNN(hiddenSize, hiddenSize, numberOfLayers)
+ rnn.bidirectional = 'CUDNN_BIDIRECTIONAL'
+ rnn.mode = 'CUDNN_RNN_RELU'
+ rnn.numDirections = 2
+
+ local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst, nbDirections)
+ -- Checksums to check against are retrieved from cudnn RNN sample.
+ mytester:assertalmosteq(checkSums.localSumi, 1.388634E+01, tolerance, 'checkSum with reference for localsumi failed')
+ mytester:assertalmosteq(checkSums.localSumh, 1.288997E+01, tolerance, 'checkSum with reference for localSumh failed')
+ mytester:assertalmosteq(checkSums.localSumdi, 1.288729E+01, tolerance, 'checkSum with reference for localSumdi failed')
+ mytester:assertalmosteq(checkSums.localSumdh, 1.279004E+01, tolerance, 'checkSum with reference for localSumdh failed')
+ mytester:assertalmosteq(checkSums.localSumdw, 7.061081E+07, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testBiDirectionalTANHRNN()
+ local miniBatch = 64
+ local seqLength = 20
+ local hiddenSize = 512
+ local numberOfLayers = 2
+ local numberOfLinearLayers = 2
+ local nbDirections = 2
+ local batchFirst = false
+ local rnn = cudnn.RNN(hiddenSize, hiddenSize, numberOfLayers)
+ rnn.bidirectional = 'CUDNN_BIDIRECTIONAL'
+ rnn.mode = 'CUDNN_RNN_TANH'
+ rnn.numDirections = 2
+
+ local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst, nbDirections)
+ -- Checksums to check against are retrieved from cudnn RNN sample.
+ mytester:assertalmosteq(checkSums.localSumi, 1.388634E+01, tolerance, 'checkSum with reference for localsumi failed')
+ mytester:assertalmosteq(checkSums.localSumh, 1.288997E+01, tolerance, 'checkSum with reference for localSumh failed')
+ mytester:assertalmosteq(checkSums.localSumdi, 1.288729E+01, tolerance, 'checkSum with reference for localSumdi failed')
+ mytester:assertalmosteq(checkSums.localSumdh, 1.279004E+01, tolerance, 'checkSum with reference for localSumdh failed')
+ mytester:assertalmosteq(checkSums.localSumdw, 7.061081E+07, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testBiDirectionalLSTMRNN()
+ local miniBatch = 64
+ local seqLength = 20
+ local hiddenSize = 512
+ local numberOfLayers = 2
+ local numberOfLinearLayers = 8
+ local nbDirections = 2
+ local batchFirst = false
+ local rnn = cudnn.BLSTM(hiddenSize, hiddenSize, numberOfLayers)
+
+ local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst, nbDirections)
+ -- Checksums to check against are retrieved from cudnn RNN sample.
+ mytester:assertalmosteq(checkSums.localSumi, 3.134097E+04, tolerance, 'checkSum with reference for localsumi failed')
+ mytester:assertalmosteq(checkSums.localSumc, 3.845626E+00, tolerance, 'checkSum with reference for localSumc failed')
+ mytester:assertalmosteq(checkSums.localSumh, 1.922855E+00, tolerance, 'checkSum with reference for localSumh failed')
+ mytester:assertalmosteq(checkSums.localSumdi, 4.794993E+00, tolerance, 'checkSum with reference for localSumdi failed')
+ mytester:assertalmosteq(checkSums.localSumdc, 2.870925E+04, tolerance, 'checkSum with reference for localSumdc failed')
+ mytester:assertalmosteq(checkSums.localSumdh, 2.468645E+00, tolerance, 'checkSum with reference for localSumdh failed')
+ mytester:assertalmosteq(checkSums.localSumdw, 1.121568E+08, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+function cudnntest.testBiDirectionalGRURNN()
+ local miniBatch = 64
+ local seqLength = 20
+ local hiddenSize = 512
+ local numberOfLayers = 2
+ local numberOfLinearLayers = 6
+ local nbDirections = 2
+ local batchFirst = false
+ local rnn = cudnn.RNN(hiddenSize, hiddenSize, numberOfLayers)
+ rnn.bidirectional = 'CUDNN_BIDIRECTIONAL'
+ rnn.mode = 'CUDNN_GRU'
+ rnn.numDirections = 2
+
+ local checkSums = getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst, nbDirections)
+ -- Checksums to check against are retrieved from cudnn RNN sample.
+ mytester:assertalmosteq(checkSums.localSumi, 6.555183E+04, tolerance, 'checkSum with reference for localsumi failed')
+ mytester:assertalmosteq(checkSums.localSumh, 5.830924E+00, tolerance, 'checkSum with reference for localSumh failed')
+ mytester:assertalmosteq(checkSums.localSumdi, 4.271801E+00, tolerance, 'checkSum with reference for localSumdi failed')
+ mytester:assertalmosteq(checkSums.localSumdh, 6.555744E+04, tolerance, 'checkSum with reference for localSumdh failed')
+ mytester:assertalmosteq(checkSums.localSumdw, 1.701796E+08, tolerance, 'checkSum with reference for localSumdw failed')
+end
+
+--[[
+-- Method gets Checksums of RNN to compare with ref Checksums in cudnn RNN C sample.
+-- ]]
+function getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numberOfLinearLayers, rnn, batchFirst, nbDirections)
+ local biDirectionalScale = nbDirections or 1
+ -- Reset the rnn and weight descriptor (since we are manually setting values for matrix/bias.
+ rnn:reset()
+ rnn:resetWeightDescriptor()
+ local input
+ if (batchFirst) then
+ input = torch.CudaTensor(miniBatch, seqLength, hiddenSize):fill(1)
+ else
+ input = torch.CudaTensor(seqLength, miniBatch, hiddenSize):fill(1) -- Input initialised to 1s.
+ end
+ if (biDirectionalScale == 2) then
+ rnn.weight:fill(1 / rnn.weight:size(1))
+ else
+ -- Matrices are initialised to 1 / matrixSize, biases to 1.
+ for layer = 0, numberOfLayers - 1 do
+ for layerId = 0, numberOfLinearLayers - 1 do
+ local linLayerMatDesc = rnn:createFilterDescriptors(1)
+ local matrixPointer = ffi.new("float*[1]")
+ errcheck('cudnnGetRNNLinLayerMatrixParams',
+ cudnn.getHandle(),
+ rnn.rnnDesc[0],
+ layer,
+ rnn.xDescs,
+ rnn.wDesc[0],
+ rnn.weight:data(),
+ layerId,
+ linLayerMatDesc[0],
+ ffi.cast("void**", matrixPointer))
+
+ local dataType = 'CUDNN_DATA_FLOAT'
+ local format = 'CUDNN_TENSOR_NCHW'
+ local nbDims = torch.IntTensor(1)
+
+ local minDim = 3
+ local filterDimA = torch.ones(minDim):int()
+ errcheck('cudnnGetFilterNdDescriptor',
+ linLayerMatDesc[0],
+ minDim,
+ ffi.cast("cudnnDataType_t*", dataType),
+ ffi.cast("cudnnDataType_t*", format),
+ nbDims:data(),
+ filterDimA:data())
+
+ local offset = matrixPointer[0] - rnn.weight:data()
+ local weightTensor = torch.CudaTensor(rnn.weight:storage(), offset + 1, filterDimA:prod())
+ weightTensor:fill(1.0 / filterDimA:prod())
+
+ local linLayerBiasDesc = rnn:createFilterDescriptors(1)
+ local biasPointer = ffi.new("float*[1]")
+ errcheck('cudnnGetRNNLinLayerBiasParams',
+ cudnn.getHandle(),
+ rnn.rnnDesc[0],
+ layer,
+ rnn.xDescs,
+ rnn.wDesc[0],
+ rnn.weight:data(),
+ layerId,
+ linLayerBiasDesc[0],
+ ffi.cast("void**", biasPointer))
+
+ local dataType = 'CUDNN_DATA_FLOAT'
+ local format = 'CUDNN_TENSOR_NCHW'
+ local nbDims = torch.IntTensor(1)
+ local filterDimA = torch.ones(minDim):int()
+
+ errcheck('cudnnGetFilterNdDescriptor',
+ linLayerBiasDesc[0],
+ minDim,
+ ffi.cast("cudnnDataType_t*", dataType),
+ ffi.cast("cudnnDataType_t*", format),
+ nbDims:data(),
+ filterDimA:data())
+
+ local offset = biasPointer[0] - rnn.weight:data()
+ local biasTensor = torch.CudaTensor(rnn.weight:storage(), offset + 1, filterDimA:prod())
+ biasTensor:fill(1)
+ end
+ end
+ end
+ -- Set hx/cx/dhy/dcy data to 1s.
+ rnn.hiddenInput = torch.CudaTensor(numberOfLayers * biDirectionalScale, miniBatch, hiddenSize):fill(1)
+ rnn.cellInput = torch.CudaTensor(numberOfLayers * biDirectionalScale, miniBatch, hiddenSize):fill(1)
+ rnn.gradHiddenOutput = torch.CudaTensor(numberOfLayers * biDirectionalScale, miniBatch, hiddenSize):fill(1)
+ rnn.gradCellOutput = torch.CudaTensor(numberOfLayers * biDirectionalScale, miniBatch, hiddenSize):fill(1)
+ local testOutputi = rnn:forward(input)
+ -- gradInput set to 1s.
+ local gradInput
+ if(batchFirst) then
+ gradInput = torch.CudaTensor(miniBatch, seqLength, hiddenSize * biDirectionalScale):fill(1)
+ else
+ gradInput = torch.CudaTensor(seqLength, miniBatch, hiddenSize * biDirectionalScale):fill(1)
+ end
+ rnn:backward(input, gradInput)
+
+ -- Sum up all values for each.
+ local localSumi = torch.sum(testOutputi)
+ local localSumh = torch.sum(rnn.hiddenOutput)
+ local localSumc = torch.sum(rnn.cellOutput)
+
+ local localSumdi = torch.sum(rnn.gradInput)
+ local localSumdh = torch.sum(rnn.gradHiddenInput)
+ local localSumdc = torch.sum(rnn.gradCellInput)
+
+ local localSumdw = torch.sum(rnn.gradWeight)
+
+ local checkSums = {
+ localSumi = localSumi,
+ localSumh = localSumh,
+ localSumc = localSumc,
+ localSumdi = localSumdi,
+ localSumdh = localSumdh,
+ localSumdc = localSumdc,
+ localSumdw = localSumdw
+ }
+ return checkSums
+end
+
+mytester = torch.Tester()
+mytester:add(cudnntest)
+mytester:run() \ No newline at end of file