diff options
-rw-r--r-- | RNN.lua | 75 | ||||
-rw-r--r-- | test/test_rnn.lua | 77 |
2 files changed, 87 insertions, 65 deletions
@@ -4,6 +4,13 @@ local errcheck = cudnn.errcheck local DESCS = {'rnnDesc', 'dropoutDesc', 'wDesc', 'xDescs', 'yDescs', 'hxDesc', 'hyDesc', 'cxDesc', 'cyDesc'} +RNN.linearLayers = { + CUDNN_LSTM = 8, + CUDNN_GRU = 6, + CUDNN_RNN_RELU = 2, + CUDNN_RNN_TANH = 2 +} + function RNN:__init(inputSize, hiddenSize, numLayers, batchFirst, dropout) parent.__init(self) @@ -516,6 +523,74 @@ function RNN:accGradParameters(input, gradOutput, scale) end end +local function numberOfLinearLayers(self) + return self.linearLayers[self.mode] +end + +local function numberOfLayers(self) + if self.bidirectional == 'CUDNN_BIDIRECTIONAL' then + assert(self.numDirections == 2) + return 2 * self.numLayers + else + return self.numLayers + end +end + +-- Function gets either the matrix or bias param x on cuDNN method given, at each layer and linear layerId. +local function retrieveLinearParams(self, cuDNNMethod) + if not self.wDesc then + self:resetWeightDescriptor() + end + local linearParams = {} + local numberOfLinearLayers = numberOfLinearLayers(self) + local numLayers = numberOfLayers(self) + for layer = 0, numLayers - 1 do + local layerInfo = {} + for layerId = 0, numberOfLinearLayers - 1 do + local linLayerMatDesc = self:createFilterDescriptors(1) + local matrixPointer = ffi.new("float*[1]") + errcheck(cuDNNMethod, + cudnn.getHandle(), + self.rnnDesc[0], + layer, + self.xDescs[0], + self.wDesc[0], + self.weight:data(), + layerId, + linLayerMatDesc[0], + ffi.cast("void**", matrixPointer)) + + local dataType = 'CUDNN_DATA_FLOAT' + local format = 'CUDNN_TENSOR_NCHW' + local nbDims = torch.IntTensor(1) + + local minDim = 3 + local filterDimA = torch.ones(minDim):int() + errcheck('cudnnGetFilterNdDescriptor', + linLayerMatDesc[0], + minDim, + ffi.cast("cudnnDataType_t*", dataType), + ffi.cast("cudnnDataType_t*", format), + nbDims:data(), + filterDimA:data()) + + local offset = matrixPointer[0] - self.weight:data() + local params = torch.CudaTensor(self.weight:storage(), offset + 1, filterDimA:prod()) + table.insert(layerInfo, params) + end + table.insert(linearParams, layerInfo) + end + return linearParams +end + +function RNN:weights() + return retrieveLinearParams(self, 'cudnnGetRNNLinLayerMatrixParams') +end + +function RNN:biases() + return retrieveLinearParams(self, 'cudnnGetRNNLinLayerBiasParams') +end + function RNN:clearDesc() for _, desc in pairs(DESCS) do self[desc] = nil diff --git a/test/test_rnn.lua b/test/test_rnn.lua index 2476ce4..bbc679b 100644 --- a/test/test_rnn.lua +++ b/test/test_rnn.lua @@ -204,71 +204,18 @@ function getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numbe else input = torch.CudaTensor(seqLength, miniBatch, hiddenSize):fill(1) -- Input initialised to 1s. end - if (biDirectionalScale == 2) then - rnn.weight:fill(1 / rnn.weight:size(1)) - else - -- Matrices are initialised to 1 / matrixSize, biases to 1. - for layer = 0, numberOfLayers - 1 do - for layerId = 0, numberOfLinearLayers - 1 do - local linLayerMatDesc = rnn:createFilterDescriptors(1) - local matrixPointer = ffi.new("float*[1]") - errcheck('cudnnGetRNNLinLayerMatrixParams', - cudnn.getHandle(), - rnn.rnnDesc[0], - layer, - rnn.xDescs[0], - rnn.wDesc[0], - rnn.weight:data(), - layerId, - linLayerMatDesc[0], - ffi.cast("void**", matrixPointer)) - - local dataType = 'CUDNN_DATA_FLOAT' - local format = 'CUDNN_TENSOR_NCHW' - local nbDims = torch.IntTensor(1) - - local minDim = 3 - local filterDimA = torch.ones(minDim):int() - errcheck('cudnnGetFilterNdDescriptor', - linLayerMatDesc[0], - minDim, - ffi.cast("cudnnDataType_t*", dataType), - ffi.cast("cudnnDataType_t*", format), - nbDims:data(), - filterDimA:data()) - - local offset = matrixPointer[0] - rnn.weight:data() - local weightTensor = torch.CudaTensor(rnn.weight:storage(), offset + 1, filterDimA:prod()) - weightTensor:fill(1.0 / filterDimA:prod()) - - local linLayerBiasDesc = rnn:createFilterDescriptors(1) - local biasPointer = ffi.new("float*[1]") - errcheck('cudnnGetRNNLinLayerBiasParams', - cudnn.getHandle(), - rnn.rnnDesc[0], - layer, - rnn.xDescs[0], - rnn.wDesc[0], - rnn.weight:data(), - layerId, - linLayerBiasDesc[0], - ffi.cast("void**", biasPointer)) - - local dataType = 'CUDNN_DATA_FLOAT' - local format = 'CUDNN_TENSOR_NCHW' - local nbDims = torch.IntTensor(1) - local filterDimA = torch.ones(minDim):int() - - errcheck('cudnnGetFilterNdDescriptor', - linLayerBiasDesc[0], - minDim, - ffi.cast("cudnnDataType_t*", dataType), - ffi.cast("cudnnDataType_t*", format), - nbDims:data(), - filterDimA:data()) - - local offset = biasPointer[0] - rnn.weight:data() - local biasTensor = torch.CudaTensor(rnn.weight:storage(), offset + 1, filterDimA:prod()) + local weights = rnn:weights() + local biases = rnn:biases() + -- Matrices are initialised to 1 / matrixSize, biases to 1 unless bi-directional. + for layer = 1, numberOfLayers do + for layerId = 1, numberOfLinearLayers do + if (biDirectionalScale == 2) then + rnn.weight:fill(1 / rnn.weight:size(1)) + else + local weightTensor = weights[layer][layerId] + weightTensor:fill(1.0 / weightTensor:size(1)) + + local biasTensor = biases[layer][layerId] biasTensor:fill(1) end end |