diff options
author | Natalia Gimelshein <ngimelshein@nvidia.com> | 2016-04-23 03:54:34 +0300 |
---|---|---|
committer | Boris Fomitchev <bfomitchev@nvidia.com> | 2016-05-17 01:12:28 +0300 |
commit | 17be1b758a2dc8986c3f76f5092467f191fcfc64 (patch) | |
tree | 8e45d3b61a50cc028897c7cbaeb012b2a13c1777 | |
parent | 8869d166a00769669d5bb1c981c7c02f03388804 (diff) |
fix for V5 GA RNN APIs
-rw-r--r-- | RNN.lua | 33 | ||||
-rw-r--r-- | ffi.lua | 206 | ||||
-rw-r--r-- | test/test_rnn.lua | 6 |
3 files changed, 128 insertions, 117 deletions
@@ -44,8 +44,9 @@ function RNN:reset(stdv) errcheck('cudnnGetRNNParamsSize', cudnn.getHandle(), self.rnnDesc[0], - self.xDescs, - weightSize:data()) + self.xDescs[0], + weightSize:data(), + self.datatype) weightSize[1] = (weightSize[1] + 3) / 4 -- sizeof(float) self.weight:resize(weightSize[1]) self.weight:uniform(-stdv, stdv) @@ -116,12 +117,10 @@ end function RNN:resetRNNDescriptor() if not self.rnnDesc then self.rnnDesc = self:createRNNDescriptors(1) - end - + end errcheck('cudnnSetRNNDescriptor', self.rnnDesc[0], self.hiddenSize, - self.seqLength, self.numLayers, self.dropoutDesc[0], self.inputMode, @@ -150,8 +149,8 @@ function RNN:resetIODescriptors() self.yDescs = self:createTensorDescriptors(self.seqLength) for i = 0, self.seqLength - 1 do - local dim = torch.IntTensor({self.inputSize, self.miniBatch, self.seqLength}) - local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]}) + local dim = torch.IntTensor({ self.miniBatch,self.inputSize, 1}) + local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1}) errcheck('cudnnSetTensorNdDescriptor', self.xDescs[i], self.datatype, @@ -159,8 +158,8 @@ function RNN:resetIODescriptors() dim:data(), stride:data()) - local dim = torch.IntTensor({self.hiddenSize * self.numDirections, self.miniBatch, self.seqLength}) - local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]}) + local dim = torch.IntTensor({self.miniBatch, self.hiddenSize * self.numDirections, 1}) + local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1}) errcheck('cudnnSetTensorNdDescriptor', self.yDescs[i], self.datatype, @@ -173,9 +172,8 @@ end function RNN:resetHiddenDescriptors() self.hxDesc = self:createTensorDescriptors(1) self.hyDesc = self:createTensorDescriptors(1) - - local dim = torch.IntTensor({self.hiddenSize, self.miniBatch, self.numLayers}) - local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]}) + local dim = torch.IntTensor({self.numLayers*self.numDirections, self.miniBatch, self.hiddenSize }) + local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1}) errcheck('cudnnSetTensorNdDescriptor', self.hxDesc[0], @@ -194,9 +192,8 @@ end function RNN:resetCellDescriptors() self.cxDesc = self:createTensorDescriptors(1) self.cyDesc = self:createTensorDescriptors(1) - - local dim = torch.IntTensor({self.hiddenSize, self.miniBatch, self.numLayers}) - local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]}) + local dim = torch.IntTensor({self.numLayers*self.numDirections, self.miniBatch, self.hiddenSize }) + local stride = torch.IntTensor({dim[3] * dim[2], dim[3],1}) errcheck('cudnnSetTensorNdDescriptor', self.cxDesc[0], @@ -305,6 +302,7 @@ function RNN:updateOutput(input) errcheck('cudnnGetRNNWorkspaceSize', cudnn.getHandle(), self.rnnDesc[0], + self.seqLength, self.xDescs, workspaceSize:data()) workspaceSize[1] = (workspaceSize[1] + 3) / 4 -- sizeof(float) @@ -317,6 +315,7 @@ function RNN:updateOutput(input) errcheck('cudnnGetRNNTrainingReserveSize', cudnn.getHandle(), self.rnnDesc[0], + self.seqLength, self.xDescs, reserveSize:data()) reserveSize[1] = (reserveSize[1] + 3) / 4 -- sizeof(float) @@ -328,6 +327,7 @@ function RNN:updateOutput(input) errcheck('cudnnRNNForwardTraining', cudnn.getHandle(), self.rnnDesc[0], + self.seqLength, self.xDescs, x:data(), self.hxDesc[0], hx and hx:data() or nil, self.cxDesc[0], cx and cx:data() or nil, @@ -341,6 +341,7 @@ function RNN:updateOutput(input) errcheck('cudnnRNNForwardInference', cudnn.getHandle(), self.rnnDesc[0], + self.seqLength, self.xDescs, x:data(), self.hxDesc[0], hx and hx:data() or nil, self.cxDesc[0], cx and cx:data() or nil, @@ -419,6 +420,7 @@ function RNN:updateGradInput(input, gradOutput) errcheck('cudnnRNNBackwardData', cudnn.getHandle(), self.rnnDesc[0], + self.seqLength, self.yDescs, y:data(), self.yDescs, dy:data(), self.hyDesc[0], dhy and dhy:data() or nil, @@ -482,6 +484,7 @@ function RNN:accGradParameters(input, gradOutput, scale) errcheck('cudnnRNNBackwardWeights', cudnn.getHandle(), self.rnnDesc[0], + self.seqLength, self.xDescs, x:data(), self.hxDesc[0], hx and hx:data() or nil, self.yDescs, y:data(), @@ -1241,146 +1241,154 @@ cudnnStatus_t cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t * rnnDes cudnnStatus_t cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc); cudnnStatus_t cudnnSetRNNDescriptor(cudnnRNNDescriptor_t rnnDesc, - int hiddenSize, - int seqLength, - int numLayers, - cudnnDropoutDescriptor_t dropoutDesc, /* Between layers, not between recurrent steps.*/ - cudnnRNNInputMode_t inputMode, - cudnnDirectionMode_t direction, - cudnnRNNMode_t mode, + int hiddenSize, + int numLayers, + cudnnDropoutDescriptor_t dropoutDesc, + cudnnRNNInputMode_t inputMode, + cudnnDirectionMode_t direction, + cudnnRNNMode_t mode, cudnnDataType_t dataType); -/* dataType in the RNN descriptor is used to determine math precision*/ -/* dataType in weight descriptors and input descriptors is used to describe storage*/ +// dataType in the RNN descriptor is used to determine math precision +// dataType in weight descriptors and input descriptors is used to describe storage cudnnStatus_t cudnnGetRNNWorkspaceSize( cudnnHandle_t handle, - const cudnnRNNDescriptor_t rnnDesc, + const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *xDesc, size_t *sizeInBytes ); - + cudnnStatus_t cudnnGetRNNTrainingReserveSize( cudnnHandle_t handle, - const cudnnRNNDescriptor_t rnnDesc, + const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *xDesc, size_t *sizeInBytes ); - + cudnnStatus_t cudnnGetRNNParamsSize( cudnnHandle_t handle, - const cudnnRNNDescriptor_t rnnDesc, - const cudnnTensorDescriptor_t *xDesc, - size_t *sizeInBytes + const cudnnRNNDescriptor_t rnnDesc, + const cudnnTensorDescriptor_t xDesc, + size_t *sizeInBytes, + cudnnDataType_t dataType ); cudnnStatus_t cudnnGetRNNLinLayerMatrixParams( cudnnHandle_t handle, - const cudnnRNNDescriptor_t rnnDesc, + const cudnnRNNDescriptor_t rnnDesc, const int layer, - const cudnnTensorDescriptor_t * xDesc, - const cudnnFilterDescriptor_t wDesc, - const void * w, - const int linLayerID, - cudnnFilterDescriptor_t linLayerMatDesc, + const cudnnTensorDescriptor_t xDesc, + const cudnnFilterDescriptor_t wDesc, + const void * w, + const int linLayerID, + cudnnFilterDescriptor_t linLayerMatDesc, void ** linLayerMat ); cudnnStatus_t cudnnGetRNNLinLayerBiasParams( cudnnHandle_t handle, - const cudnnRNNDescriptor_t rnnDesc, + const cudnnRNNDescriptor_t rnnDesc, const int layer, - const cudnnTensorDescriptor_t * xDesc, - const cudnnFilterDescriptor_t wDesc, - const void * w, - const int linLayerID, - cudnnFilterDescriptor_t linLayerBiasDesc, - void ** linLayerBias + const cudnnTensorDescriptor_t xDesc, + const cudnnFilterDescriptor_t wDesc, + const void * w, + const int linLayerID, + cudnnFilterDescriptor_t linLayerBiasDesc, + void ** linLayerBias ); -cudnnStatus_t cudnnRNNForwardInference( cudnnHandle_t handle, - const cudnnRNNDescriptor_t rnnDesc, - const cudnnTensorDescriptor_t * xDesc, - const void * x, - const cudnnTensorDescriptor_t hxDesc, - const void * hx, - const cudnnTensorDescriptor_t cxDesc, - const void * cx, - const cudnnFilterDescriptor_t wDesc, - const void * w, - const cudnnTensorDescriptor_t *yDesc, - void * y, - const cudnnTensorDescriptor_t hyDesc, - void * hy, - const cudnnTensorDescriptor_t cyDesc, - void * cy, - void * workspace, - size_t workSpaceSizeInBytes); - - - -cudnnStatus_t cudnnRNNForwardTraining( cudnnHandle_t handle, - const cudnnRNNDescriptor_t rnnDesc, - const cudnnTensorDescriptor_t *xDesc, - const void * x, - const cudnnTensorDescriptor_t hxDesc, - const void * hx, - const cudnnTensorDescriptor_t cxDesc, - const void * cx, - const cudnnFilterDescriptor_t wDesc, - const void * w, - const cudnnTensorDescriptor_t *yDesc, - void * y, - const cudnnTensorDescriptor_t hyDesc, - void * hy, - const cudnnTensorDescriptor_t cyDesc, - void * cy, - void * workspace, +cudnnStatus_t cudnnRNNForwardInference( cudnnHandle_t handle, + const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, + const cudnnTensorDescriptor_t * xDesc, + const void * x, + const cudnnTensorDescriptor_t hxDesc, + const void * hx, + const cudnnTensorDescriptor_t cxDesc, + const void * cx, + const cudnnFilterDescriptor_t wDesc, + const void * w, + const cudnnTensorDescriptor_t *yDesc, + void * y, + const cudnnTensorDescriptor_t hyDesc, + void * hy, + const cudnnTensorDescriptor_t cyDesc, + void * cy, + void * workspace, + size_t workSpaceSizeInBytes); + + + +cudnnStatus_t cudnnRNNForwardTraining( cudnnHandle_t handle, + const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, + const cudnnTensorDescriptor_t *xDesc, + const void * x, + const cudnnTensorDescriptor_t hxDesc, + const void * hx, + const cudnnTensorDescriptor_t cxDesc, + const void * cx, + const cudnnFilterDescriptor_t wDesc, + const void * w, + const cudnnTensorDescriptor_t *yDesc, + void * y, + const cudnnTensorDescriptor_t hyDesc, + void * hy, + const cudnnTensorDescriptor_t cyDesc, + void * cy, + void * workspace, size_t workSpaceSizeInBytes, - void * reserveSpace, + void * reserveSpace, size_t reserveSpaceSizeInBytes); -cudnnStatus_t cudnnRNNBackwardData( cudnnHandle_t handle, - const cudnnRNNDescriptor_t rnnDesc, - const cudnnTensorDescriptor_t * yDesc, - const void * y, - const cudnnTensorDescriptor_t * dyDesc, - const void * dy, - const cudnnTensorDescriptor_t dhyDesc, - const void * dhy, - const cudnnTensorDescriptor_t dcyDesc, - const void * dcy, - const cudnnFilterDescriptor_t wDesc, - const void * w, - const cudnnTensorDescriptor_t hxDesc, - const void * hx, - const cudnnTensorDescriptor_t cxDesc, - const void * cx, - const cudnnTensorDescriptor_t * dxDesc, - void * dx, +cudnnStatus_t cudnnRNNBackwardData( cudnnHandle_t handle, + const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, + const cudnnTensorDescriptor_t * yDesc, + const void * y, + const cudnnTensorDescriptor_t * dyDesc, + const void * dy, + const cudnnTensorDescriptor_t dhyDesc, + const void * dhy, + const cudnnTensorDescriptor_t dcyDesc, + const void * dcy, + const cudnnFilterDescriptor_t wDesc, + const void * w, + const cudnnTensorDescriptor_t hxDesc, + const void * hx, + const cudnnTensorDescriptor_t cxDesc, + const void * cx, + const cudnnTensorDescriptor_t * dxDesc, + void * dx, const cudnnTensorDescriptor_t dhxDesc, void * dhx, const cudnnTensorDescriptor_t dcxDesc, void * dcx, void * workspace, size_t workSpaceSizeInBytes, - const void * reserveSpace, + const void * reserveSpace, size_t reserveSpaceSizeInBytes ); -cudnnStatus_t cudnnRNNBackwardWeights( cudnnHandle_t handle, - const cudnnRNNDescriptor_t rnnDesc, - const cudnnTensorDescriptor_t * xDesc, - const void * x, - const cudnnTensorDescriptor_t hxDesc, - const void * hx, - const cudnnTensorDescriptor_t * yDesc, +cudnnStatus_t cudnnRNNBackwardWeights( cudnnHandle_t handle, + const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, + const cudnnTensorDescriptor_t * xDesc, + const void * x, + const cudnnTensorDescriptor_t hxDesc, + const void * hx, + const cudnnTensorDescriptor_t * yDesc, const void * y, - const void * workspace, - size_t workSpaceSizeInBytes, - const cudnnFilterDescriptor_t dwDesc, + const void * workspace, + size_t workSpaceSizeInBytes, + const cudnnFilterDescriptor_t dwDesc, void * dw, - const void * reserveSpace, + const void * reserveSpace, size_t reserveSpaceSizeInBytes ); - + + + /* DEPRECATED routines to be removed next release : diff --git a/test/test_rnn.lua b/test/test_rnn.lua index e7ee3de..2476ce4 100644 --- a/test/test_rnn.lua +++ b/test/test_rnn.lua @@ -216,7 +216,7 @@ function getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numbe cudnn.getHandle(), rnn.rnnDesc[0], layer, - rnn.xDescs, + rnn.xDescs[0], rnn.wDesc[0], rnn.weight:data(), layerId, @@ -247,7 +247,7 @@ function getRNNCheckSums(miniBatch, seqLength, hiddenSize, numberOfLayers, numbe cudnn.getHandle(), rnn.rnnDesc[0], layer, - rnn.xDescs, + rnn.xDescs[0], rnn.wDesc[0], rnn.weight:data(), layerId, @@ -313,4 +313,4 @@ end mytester = torch.Tester() mytester:add(cudnntest) -mytester:run()
\ No newline at end of file +mytester:run() |