diff options
-rw-r--r-- | RNN.lua | 772 | ||||
-rw-r--r-- | rnn_exp.lua | 293 | ||||
-rw-r--r-- | rnn_exp2.lua | 198 | ||||
-rw-r--r-- | test/test.lua | 6 |
4 files changed, 444 insertions, 825 deletions
@@ -2,169 +2,215 @@ local RNN, parent = torch.class('cudnn.RNN', 'nn.Module') local ffi = require 'ffi' local errcheck = cudnn.errcheck -function RNN:__init(hiddenSize, numLayers) - parent.__init(self) - - self.datatype = 0 -- TODO CUDNN_FLOAT, should get the constant from ffi - self.hiddenSize = hiddenSize - self.inputSize = 0 - self.seqLength = 0 - self.numLayers = numLayers - self.miniBatch = 0 - self.bidirectional = 0 - self.inputMode = 0 -- TODO CUDNN_LINEAR_INPUT, should get the constant from ffi - self.mode = 0 -- TODO CUDNN_RNN_RELU, should get the constant from ffi - self.dropout = 0 - self.seed = 0x01234567 - - self.gradInput = torch.CudaTensor() - self.output = torch.CudaTensor() - self.weight = torch.CudaTensor() - self.gradParameters = torch.CudaTensor() - self.hx = torch.CudaTensor() - self.cx = torch.CudaTensor() - self.hy = torch.CudaTensor() - self.cy = torch.CudaTensor() - self.reserve = torch.CudaTensor(1) +function RNN:__init(inputSize, hiddenSize, numLayers) + parent.__init(self) + + self.datatype = 'CUDNN_DATA_FLOAT' + self.inputSize = inputSize + self.hiddenSize = hiddenSize + self.seqLength = 1 + self.miniBatch = 1 + self.numLayers = numLayers + self.bidirectional = 'CUDNN_UNIDIRECTIONAL' + self.inputMode = 'CUDNN_LINEAR_INPUT' + self.mode = 'CUDNN_RNN_RELU' + self.dropout = 0 + self.seed = 0x01234567 + + self.gradInput = torch.CudaTensor() + self.output = torch.CudaTensor() + self.weight = torch.CudaTensor() + self.gradWeight = torch.CudaTensor() + self.reserve = torch.CudaTensor() + self.hiddenOutput = torch.CudaTensor() + self.cellOutput = torch.CudaTensor() + self.gradHiddenInput = torch.CudaTensor() + self.gradCellInput = torch.CudaTensor() + + self:training() + self:reset() +end + +function RNN:reset(stdv) + stdv = stdv or 1.0 / math.sqrt(self.hiddenSize) + + self:resetDropoutDescriptor() + self:resetRNNDescriptor() + self:resetIODescriptors() + + local weightSize = torch.LongTensor(1) + errcheck('cudnnGetRNNParamsSize', + cudnn.getHandle(), + self.rnnDesc[0], + self.xDescs, + weightSize:data()) + weightSize[1] = (weightSize[1] + 3) / 4 -- sizeof(float) + self.weight:resize(weightSize[1]) + self.weight:uniform(-stdv, stdv) + self.gradWeight:resizeAs(self.weight):zero() end local function createDescriptors(count, descs_type, create_func, destroy_func) - local ds = ffi.new(descs_type, count) - for i = 0, count - 1 do - errcheck(create_func, ds + i) - end - local function destroyDescriptors(ds) - for i = 0, count - 1 do - errcheck(destroy_func, ds[i]) - end - end - ffi.gc(ds, destroyDescriptors) - return ds + local ds = ffi.new(descs_type, count) + for i = 0, count - 1 do + errcheck(create_func, ds + i) + end + local function destroyDescriptors(ds) + for i = 0, count - 1 do + errcheck(destroy_func, ds[i]) + end + end + ffi.gc(ds, destroyDescriptors) + return ds end local function createDropoutDescriptors(count) - return createDescriptors(count, - 'cudnnDropoutDescriptor_t[?]', - 'cudnnCreateDropoutDescriptor', - 'cudnnDestroyDropoutDescriptor') + return createDescriptors(count, + 'cudnnDropoutDescriptor_t[?]', + 'cudnnCreateDropoutDescriptor', + 'cudnnDestroyDropoutDescriptor') end local function createFilterDescriptors(count) - return createDescriptors(count, - 'cudnnFilterDescriptor_t[?]', - 'cudnnCreateFilterDescriptor', - 'cudnnDestroyFilterDescriptor') + return createDescriptors(count, + 'cudnnFilterDescriptor_t[?]', + 'cudnnCreateFilterDescriptor', + 'cudnnDestroyFilterDescriptor') end local function createRNNDescriptors(count) - return createDescriptors(count, - 'cudnnRNNDescriptor_t[?]', - 'cudnnCreateRNNDescriptor', - 'cudnnDestroyRNNDescriptor') + return createDescriptors(count, + 'cudnnRNNDescriptor_t[?]', + 'cudnnCreateRNNDescriptor', + 'cudnnDestroyRNNDescriptor') end -local function createTensorDescriptors(count) return createDescriptors(count, - 'cudnnTensorDescriptor_t[?]', - 'cudnnCreateTensorDescriptor', - 'cudnnDestroyTensorDescriptor') +local function createTensorDescriptors(count) + return createDescriptors(count, + 'cudnnTensorDescriptor_t[?]', + 'cudnnCreateTensorDescriptor', + 'cudnnDestroyTensorDescriptor') end function RNN:resetDropoutDescriptor() - if not self.dropoutDesc then - self.dropoutDesc = createDropoutDescriptors(1) - end - - self.dropoutStatesSize = torch.LongTensor(1) - errcheck('cudnnDropoutGetStatesSize', - cudnn.getHandle(), - self.dropoutStatesSize:data()) - self.dropoutStates = torch.CudaTensor(self.dropoutStatesSize[1]) - - errcheck('cudnnSetDropoutDescriptor', - self.dropoutDesc[0], - cudnn.getHandle(), - self.dropout, - self.dropoutStates:data(), self.dropoutStatesSize[1], - self.seed) + if not self.dropoutDesc then + self.dropoutDesc = createDropoutDescriptors(1) + end + + self.dropoutStatesSize = torch.LongTensor(1) + errcheck('cudnnDropoutGetStatesSize', + cudnn.getHandle(), + self.dropoutStatesSize:data()) + self.dropoutStates = torch.CudaTensor(self.dropoutStatesSize[1]) + + errcheck('cudnnSetDropoutDescriptor', + self.dropoutDesc[0], + cudnn.getHandle(), + self.dropout, + self.dropoutStates:data(), self.dropoutStatesSize[1], + self.seed) end function RNN:resetRNNDescriptor() - if not self.rnnDesc then - self.rnnDesc = createRNNDescriptors(1) - end - - errcheck('cudnnSetRNNDescriptor', - self.rnnDesc[0], - self.hiddenSize, - self.seqLength, - self.numLayers, - self.dropoutDesc[0], - self.inputMode, - self.bidirectional, - self.mode, - self.datatype) + if not self.rnnDesc then + self.rnnDesc = createRNNDescriptors(1) + end + + errcheck('cudnnSetRNNDescriptor', + self.rnnDesc[0], + self.hiddenSize, + self.seqLength, + self.numLayers, + self.dropoutDesc[0], + self.inputMode, + self.bidirectional, + self.mode, + self.datatype) end -function RNN:resetWeightDescriptors() - if not self.wDesc then - self.wDesc = createFilterDescriptors(1) - end - - local weightSize = torch.LongTensor(1) - errcheck('cudnnGetRNNParamsSize', - cudnn.getHandle(), - self.rnnDesc[0], - self.xDescs, - weightSize:data()) - local dim = torch.IntTensor({weightSize[1] / 4, 1, 1}) -- sizeof(float) - - errcheck('cudnnSetFilterNdDescriptor', - self.wDesc[0], - self.datatype, - 0, -- TODO ffi CUDNN_TENSOR_NCHW - 3, - dim:data()) +function RNN:resetWeightDescriptor() + if not self.wDesc then + self.wDesc = createFilterDescriptors(1) + end + + local dim = torch.IntTensor({self.weight:size(1), 1, 1}) + + errcheck('cudnnSetFilterNdDescriptor', + self.wDesc[0], + self.datatype, + 'CUDNN_TENSOR_NCHW', + 3, + dim:data()) end function RNN:resetIODescriptors() - self.xDescs = createTensorDescriptors(self.seqLength) - self.yDescs = createTensorDescriptors(self.seqLength) - - for i = 0, self.seqLength - 1 do - local dim = torch.IntTensor({self.inputSize, self.miniBatch, 1}) - local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]}) - - errcheck('cudnnSetTensorNdDescriptor', - self.xDescs[i], - self.datatype, - 3, - dim:data(), - stride:data()) - - dim[1] = self.hiddenSize * (self.bidirectional > 0 and 2 or 1) - stride[2] = dim[1] - stride[3] = dim[1] * dim[2] - - errcheck('cudnnSetTensorNdDescriptor', - self.yDescs[i], - self.datatype, - 3, - dim:data(), - stride:data()) - end + self.xDescs = createTensorDescriptors(self.seqLength) + self.yDescs = createTensorDescriptors(self.seqLength) + + for i = 0, self.seqLength - 1 do + local dim = torch.IntTensor({self.inputSize, self.miniBatch, self.seqLength}) + local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]}) + errcheck('cudnnSetTensorNdDescriptor', + self.xDescs[i], + self.datatype, + 3, + dim:data(), + stride:data()) + + local dim = torch.IntTensor({self.hiddenSize, self.miniBatch, self.seqLength}) + local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]}) + errcheck('cudnnSetTensorNdDescriptor', + self.yDescs[i], + self.datatype, + 3, + dim:data(), + stride:data()) + end end function RNN:resetHiddenDescriptors() - self.hxDesc = cudnn.toDescriptor(self.hx) - self.hyDesc = cudnn.toDescriptor(self.hy) + self.hxDesc = createTensorDescriptors(1) + self.hyDesc = createTensorDescriptors(1) + + local dim = torch.IntTensor({self.hiddenSize, self.miniBatch, self.numLayers}) + local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]}) + + errcheck('cudnnSetTensorNdDescriptor', + self.hxDesc[0], + self.datatype, + 3, + dim:data(), + stride:data()) + errcheck('cudnnSetTensorNdDescriptor', + self.hyDesc[0], + self.datatype, + 3, + dim:data(), + stride:data()) end function RNN:resetCellDescriptors() - self.cxDesc = cudnn.toDescriptor(self.cx) - self.cyDesc = cudnn.toDescriptor(self.cy) + self.cxDesc = createTensorDescriptors(1) + self.cyDesc = createTensorDescriptors(1) + + local dim = torch.IntTensor({self.hiddenSize, self.miniBatch, self.numLayers}) + local stride = torch.IntTensor({1, dim[1], dim[1] * dim[2]}) + + errcheck('cudnnSetTensorNdDescriptor', + self.cxDesc[0], + self.datatype, + 3, + dim:data(), + stride:data()) + errcheck('cudnnSetTensorNdDescriptor', + self.cyDesc[0], + self.datatype, + 3, + dim:data(), + stride:data()) end -function RNN:makeContiguous(input, gradOutput) +local function makeContiguous(self, input, gradOutput) if not input:isContiguous() then self._input = self._input or input.new() self._input:typeAs(input):resizeAs(input):copy(input) @@ -179,215 +225,279 @@ function RNN:makeContiguous(input, gradOutput) end function RNN:updateOutput(input) + assert(input:dim() == 3, 'input must have 3 dimensions: seqLength, miniBatch, inputSize') + + -- Decide which descriptors/tensors need to be updated. + local resetRNN = not self.dropoutDesc or not self.rnnDesc + local resetIO = not self.xDescs or not self.yDescs + local resetHC = not self.hxDesc or not self.hyDesc or not self.cxDesc or not self.cyDesc + local resetWeight = not self.wDesc + + if input:size(1) ~= self.seqLength then + self.seqLength = input:size(1) + resetRNN = true + resetIO = true + end + + if input:size(2) ~= self.miniBatch then + self.miniBatch = input:size(2) + resetIO = true + resetHC = true + end + + assert(input:size(3) == self.inputSize, 'Incorrect input size!') + + -- Update descriptors/tensors + if resetRNN then + self:resetDropoutDescriptor() + self:resetRNNDescriptor() + end + if resetIO then + self:resetIODescriptors(input) + end + if resetHC then + self:resetHiddenDescriptors() + self:resetCellDescriptors() + end + if resetWeight then + self:resetWeightDescriptor() + end + + local x = makeContiguous(self, input) + local y = self.output:resize(self.seqLength, self.miniBatch, self.hiddenSize) + local w = self.weight + local hy = self.hiddenOutput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero() + local cy = self.cellOutput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero() + + -- Optionally use hiddenInput/cellInput parameters + local hx = self.hiddenInput + local cx = self.cellInput + + if hx then + assert(hx:dim() == 3, 'hiddenInput must have 3 dimensions: numLayers, miniBatch, hiddenSize') + assert(hx:size(1) == self.numLayers, 'hiddenInput has incorrect number of layers!') + assert(hx:size(2) == self.miniBatch, 'hiddenInput has incorrect number of minibathes!') + assert(hx:size(3) == self.hiddenSize, 'hiddenIinput has incorrect size!') + assert(hx:isContiguous(), 'hiddenInput must be contiguous!') end + + if cx then + assert(cx:dim() == 3, 'cellInput must have 3 dimensions: numLayers, miniBatch, hiddenSize') + assert(cx:size(1) == self.numLayers, 'cellInput has incorrect number of layers!') + assert(cx:size(2) == self.miniBatch, 'cellInput has incorrect number of minibathes!') + assert(cx:size(3) == self.hiddenSize, 'cellInput has incorrect size!') + assert(cx:isContiguous(), 'cellInput must be contiguous!') + end + + self.workspace = cudnn.getSharedWorkspace() + local workspaceSize = torch.LongTensor(1) + errcheck('cudnnGetRNNWorkspaceSize', + cudnn.getHandle(), + self.rnnDesc[0], + self.xDescs, + workspaceSize:data()) + workspaceSize[1] = (workspaceSize[1] + 3) / 4 -- sizeof(float) + if self.workspace:size(1) < workspaceSize[1] then + self.workspace:resize(workspaceSize[1]) + end - assert(input:dim() == 3) - - -- Decide which descriptors/tensors need to be updated. - local resetRNN = not DropoutDesc or not RNNDesc - local resetIO = not xDescs or not yDescs - local resetHC = not self.hxDesc or not self.hyDesc or - not self.cxDesc or not self.cyDesc - local resetWeight = not wDesc - - if input:size(1) ~= self.inputSize then - self.inputSize = input:size(1) - resetRNN = true - resetIO = true - resetWeight = true - end - - if input:size(2) ~= self.miniBatch then - self.miniBatch = input:size(1) - resetRNN = true - resetIO = true - resetHC = true - resetWeight = true - end - - if input:size(3) ~= self.seqLength then - self.seqLength = input:size(1) - resetRNN = true - resetIO = true - end - - -- Update descriptors/tensors - if resetRNN then - self:resetDropoutDescriptor() - self:resetRNNDescriptor() - end - - local x = self:makeContiguous(input) - local y = self.output - if resetIO then - self.output:resize(self.hiddenSize, self.miniBatch, self.seqLength) - self:resetIODescriptors() - end - - -- Hidden/cell output becomes the new hidden/cell input. - local hx = self.hy - local cx = self.cy - local hy = self.hx - local cy = self.cx - if resetHC then - self.hx:resize(self.hiddenSize, self.miniBatch, self.numLayers) - self.cx:resize(self.hiddenSize, self.miniBatch, self.numLayers) - self.hy:resize(self.hiddenSize, self.miniBatch, self.numLayers) - self.cy:resize(self.hiddenSize, self.miniBatch, self.numLayers) - self:resetHiddenDescriptors() - self:resetCellDescriptors() - end - - local w = self.weight - if resetWeight then - local weightSize = torch.LongTensor(1) - errcheck('cudnnGetRNNParamsSize', - cudnn.getHandle(), - self.rnnDesc[0], - self.xDescs, - weightSize:data()) - weightSize[1] = (weightSize[1] + 3) / 4 -- sizeof(float) - self.weight:resize(weightSize[1] / 4) - self:resetWeightDescriptors() - end - - self.workspace = cudnn.getSharedWorkspace() - local workspaceSize = torch.LongTensor(1) - errcheck('cudnnGetRNNWorkspaceSize', - cudnn.getHandle(), - self.rnnDesc[0], - self.xDescs, - workspaceSize:data()) - workspaceSize[1] = (workspaceSize[1] + 3) / 4 -- sizeof(float) - if self.workspace:size(1) < workspaceSize[1] then - self.workspace:resize(workspaceSize[1]) - end - - local reserveSize = torch.LongTensor(1) - errcheck('cudnnGetRNNTrainingReserveSize', - cudnn.getHandle(), - self.rnnDesc[0], - self.xDescs, - reserveSize:data()) - reserveSize[1] = (reserveSize[1] + 3) / 4 -- sizeof(float) - if self.reserve:size(1) < reserveSize[1] then - self.reserve:resize(reserveSize[1]) - end - - errcheck('cudnnRNNForwardTraining', - cudnn.getHandle(), - self.rnnDesc[0], - self.xDescs, x:data(), - self.hxDesc[0], hx:data(), - self.cxDesc[0], cx:data(), - self.wDesc[0], w:data(), - self.yDescs, y:data(), - self.hyDesc[0], hy:data(), - self.cyDesc[0], cy:data(), - self.workspace:data(), self.workspace:size(1) * 4, -- sizeof(float) - self.reserve:data(), self.reserve:size(1) * 4) -- sizeof(float) + if self.train then + local reserveSize = torch.LongTensor(1) + errcheck('cudnnGetRNNTrainingReserveSize', + cudnn.getHandle(), + self.rnnDesc[0], + self.xDescs, + reserveSize:data()) + reserveSize[1] = (reserveSize[1] + 3) / 4 -- sizeof(float) + if self.reserve:dim() == 0 or + self.reserve:size(1) < reserveSize[1] then + self.reserve:resize(reserveSize[1]) + end + + errcheck('cudnnRNNForwardTraining', + cudnn.getHandle(), + self.rnnDesc[0], + self.xDescs, x:data(), + self.hxDesc[0], hx and hx:data() or nil, + self.cxDesc[0], cx and cx:data() or nil, + self.wDesc[0], w:data(), + self.yDescs, y:data(), + self.hyDesc[0], hy:data(), + self.cyDesc[0], cy:data(), + self.workspace:data(), self.workspace:size(1) * 4, -- sizeof(float) + self.reserve:data(), self.reserve:size(1) * 4) -- sizeof(float) + else + errcheck('cudnnRNNForwardInference', + cudnn.getHandle(), + self.rnnDesc[0], + self.xDescs, x:data(), + self.hxDesc[0], hx and hx:data() or nil, + self.cxDesc[0], cx and cx:data() or nil, + self.wDesc[0], w:data(), + self.yDescs, y:data(), + self.hyDesc[0], hy:data(), + self.cyDesc[0], cy:data(), + self.workspace:data(), self.workspace:size(1) * 4) -- sizeof(float) + end + + return self.output end function RNN:updateGradInput(input, gradOutput) + assert(input:dim() == 3, 'input should have 3 dimensions: seqLength, miniBatch, inputSize') + assert(input:size(1) == self.seqLength, 'input has incorrect sequence length!') + assert(input:size(2) == self.miniBatch, 'input has incorrect minibatch size!') + assert(input:size(3) == self.inputSize, 'input has incorrect size!') + + assert(gradOutput:isSameSizeAs(self.output), 'gradOutput has incorrect size!') + assert(self.train, 'updateGradInput can only be called when training!') + + local x, dy = makeContiguous(self, input, gradOutput) + local y = self.output + local w = self.weight + local dx = self.gradInput:resizeAs(input) + local hx = self.hiddenInput + local cx = self.cellInput + local dhy = self.gradHiddenOutput + local dcy = self.gradCellOutput + local dhx = self.gradHiddenInput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero() + local dcx = self.gradCellInput:resize(self.numLayers, self.miniBatch, self.hiddenSize):zero() + + + if hx then + assert(hx:dim() == 3, 'hiddenInput must have 3 dimensions: numLayers, miniBatch, hiddenSize') + assert(hx:size(1) == self.numLayers, 'hiddenInput has incorrect number of layers!') + assert(hx:size(2) == self.miniBatch, 'hiddenInput has incorrect minibatch size!') + assert(hx:size(3) == self.hiddenSize, 'hiddenInput has incorrect size!') + assert(hx:isContiguous(), 'hiddenInput must be contiguous!') + end + + if cx then + assert(cx:dim() == 3, 'cellInput must have 3 dimensions: numLayers, miniBatch, hiddenSize') + assert(cx:size(1) == self.numLayers, 'cellInput has incorrect number of layers!') + assert(cx:size(2) == self.miniBatch, 'cellInput has incorrect minibatch size!') + assert(cx:size(3) == self.hiddenSize, 'cellInput has incorrect size!') + assert(cx:isContiguous(), 'cellInput must be contiguous!') + end + + if dhy then + assert(dhy:dim() == 3, 'gradHiddenOutput must have 3 dimensions: ' .. + 'numLayers, miniBatch, hiddenSize') + assert(dhy:size(1) == self.numLayers, 'gradHiddenOutput has incorrect number of layers!') + assert(dhy:size(2) == self.miniBatch, 'gradHiddenOutput has incorrect minibatch size!') + assert(dhy:size(3) == self.hiddenSize, 'gradHiddenOutput has incorrect size!') + assert(dhy:isContiguous(), 'gradHiddenOutput must be contiguous!') + end + + if dcy then + assert(dcy:dim() == 3, 'gradCellOutput must have 3 dimensions: ' .. + 'numLayers, miniBatch, hiddenSize') + assert(dcy:size(1) == self.numLayers, 'gradCellOutput has incorrect number of layers!') + assert(dcy:size(2) == self.miniBatch, 'gradCellOutput has incorrect minibatch size!') + assert(dcy:size(3) == self.hiddenSize, 'gradCellOutput has incorrect size!') + assert(dcy:isContiguous(), 'gradCellOutput must be contiguous!') + end - assert(input:dim() == 3) - assert(input:size(1) == self.inputSize) - assert(input:size(2) == self.miniBatch) - assert(input:size(3) == self.seqLength) - - assert(gradOutput:dim() == self.output:dim()) - for i = 1, gradOutput:dim() do - assert(gradOutput:size(i) == self.output:size(i)) - end - - local y = self.output - local dy = gradOutput - local w = self.weight - local hx = self.hx - local cx = self.cx - local dx = self.gradInput - - if dx:dim() ~= 3 or - dx:size(1) ~= input:size(1) or - dx:size(2) ~= input:size(2) or - dx:size(3) ~= input:size(3) then - dx:resizeAs(input) - end - - errcheck('cudnnRNNBackwardData', - cudnn.getHandle(), - self.rnnDesc[0], - self.yDescs, y:data(), - self.yDescs, dy:data(), - self.hyDesc[0], nil, -- TODO should dhy be ignored? - self.cyDesc[0], nil, -- TODO should dhy be ignored? - self.wDesc[0], w:data(), - self.hxDesc[0], hx:data(), - self.cxDesc[0], cx:data(), - self.xDescs, dx:data(), - self.hxDesc[0], nil, -- TODO should dhx be ignored? - self.cxDesc[0], nil, -- TODO should dcx be ignored? - self.workspace:data(), self.workspace:size(1) * 4, -- sizeof(float) - self.reserve:data(), self.reserve:size(1) * 4) -- sizeof(float) + errcheck('cudnnRNNBackwardData', + cudnn.getHandle(), + self.rnnDesc[0], + self.yDescs, y:data(), + self.yDescs, dy:data(), + self.hyDesc[0], dhy and dhy:data() or nil, + self.cyDesc[0], dcy and dcy:data() or nil, + self.wDesc[0], w:data(), + self.hxDesc[0], hx and hx:data() or nil, + self.cxDesc[0], cx and cx:data() or nil, + self.xDescs, dx:data(), + self.hxDesc[0], dhx:data(), + self.cxDesc[0], dcx:data(), + self.workspace:data(), self.workspace:size(1) * 4, -- sizeof(float) + self.reserve:data(), self.reserve:size(1) * 4) -- sizeof(float) + + return self.gradInput end function RNN:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + if scale == 0 then return end + + assert(input:dim() == 3, 'input should have 3 dimensions: seqLength, miniBatch, inputSize') + assert(input:size(1) == self.seqLength, 'input has incorrect sequence length!') + assert(input:size(2) == self.miniBatch, 'input has incorrect minibatch size!') + assert(input:size(3) == self.inputSize, 'input has incorrect size!') + + assert(gradOutput:isSameSizeAs(self.output), 'gradOutput has incorrect size!') + assert(self.train, 'accGradParameters can only be called when training!') + + local x, dy = makeContiguous(self, input, gradOutput) + local hx = self.hiddenInput + local y = self.output + local dw = self.gradWeight + + if hx then + assert(hx:dim() == 3, 'hiddenInput must have 3 dimensions: numLayers, miniBatch, hiddenSize') + assert(hx:size(1) == self.numLayers, 'hiddenInput has incorrect number of layers!') + assert(hx:size(2) == self.miniBatch, 'hiddenInput has incorrect minibatch size!') + assert(hx:size(3) == self.hiddenSize, 'hiddenIinput has incorrect size!') + assert(hx:isContiguous(), 'hiddenInput must be contiguous!') + end + + -- cudnnRNNBackwardWeights doesn't accept a scale parameter so instead + -- scale before and after. + -- TODO: How much does this impact accuracy? + -- Use a secondary buffer instead? + if scale ~= 1 then + local scaleTensor = torch.Tensor({1 / scale}) + errcheck('cudnnScaleTensor', + cudnn.getHandle(), + self.wDesc[0], + self.dw:data(), + scaleTensor:data()) + end - assert(input:dim() == 3) - assert(input:size(1) == self.inputSize) - assert(input:size(2) == self.miniBatch) - assert(input:size(3) == self.seqLength) - - assert(gradOutput:dim() == self.output:dim()) - for i = 1, gradOutput:dim() do - assert(gradOutput:size(i) == self.output:size(i)) - end - - local x = input - local hx = self.hx - local y = self.output - local dw = self.gradParameters - - if dw:dim() ~= 3 or - dw:size(1) ~= self.weight:size(1) or - dw:size(2) ~= self.weight:size(2) or - dw:size(3) ~= self.weight:size(3) then - dw:resizeAs(self.weight) - end - - if scale == 0 then - return - end - - -- cudnnRNNBackwardWeights doesn't accept a scale parameter so instead - -- scale before and after. - -- TODO: How much does this impact accuracy? - if scale ~= 1 then - local scaleTensor = torch.Tensor({1 / scale}) - errcheck('cudnnScaleTensor', - cudnn.getHandle(), - self.wDesc[0], - self.dw:data(), - scaleTensor:data()) - end - - errcheck('cudnnRNNBackwardWeights', - cudnn.getHandle(), - self.rnnDesc[0], - self.xDescs, x:data(), - self.hxDesc[0], hx:data(), - self.yDescs, y:data(), - self.workspace:data(), self.workspace:size(1) * 4, -- sizeof(float) - self.wDesc[0], dw:data(), - self.reserve:data(), self.reserve:size(1) * 4) -- sizeof(float) - - - if scale ~= 1 then - local scaleTensor = torch.Tensor({scale}) - errcheck('cudnnScaleTensor', - cudnn.getHandle(), - self.wDesc[0], - self.dw:data(), - scaleTensor:data()) - end + errcheck('cudnnRNNBackwardWeights', + cudnn.getHandle(), + self.rnnDesc[0], + self.xDescs, x:data(), + self.hxDesc[0], hx and hx:data() or nil, + self.yDescs, y:data(), + self.workspace:data(), self.workspace:size(1) * 4, -- sizeof(float) + self.wDesc[0], dw:data(), + self.reserve:data(), self.reserve:size(1) * 4) -- sizeof(float) + + if scale ~= 1 then + local scaleTensor = torch.Tensor({scale}) + errcheck('cudnnScaleTensor', + cudnn.getHandle(), + self.wDesc[0], + self.dw:data(), + scaleTensor:data()) + end end +function RNN:clearDesc() + self.dropoutDesc = nil + self.rnnDesc = nil + self.dropoutDesc = nil + self.wDesc = nil + self.xDescs = nil + self.yDescs = nil + self.hxDesc = nil + self.hyDesc = nil + self.cxDesc = nil + self.cyDesc = nil +end + +function RNN:write(f) + self:clearDesc() + local var = {} + for k,v in pairs(self) do + var[k] = v + end + f:writeObject(var) +end + +function RNN:clearState() + self:clearDesc() + nn.utils.clear(self, '_input', '_gradOutput', 'reserve', 'dropoutStates') + return parent.clearState(self) +end diff --git a/rnn_exp.lua b/rnn_exp.lua deleted file mode 100644 index 3af7118..0000000 --- a/rnn_exp.lua +++ /dev/null @@ -1,293 +0,0 @@ -import 'cudnn' -local ffi = require 'ffi' -local errcheck = cudnn.errcheck - -local datatype = 0 -- TODO CUDNN_FLOAT, should get the constant from ffi -local hiddenSize = 1 -- TODO This is a layer parameter, correct? -local inputSize = 1 -- TODO Is this a layer parameter or determined by input? -local seqLength = 1 -- TODO Is this a layer parameter or determined by input? -local numLayers = 1 -- TODO -local miniBatch = 1 -- TODO -local bidirectional = 0 -- TODO CUDNN_UNIDIRECTIONAL, should get the constant from ffi -local inputMode = 0 -- TODO CUDNN_LINEAR_INPUT, should get the constant from ffi -local mode = 0 -- TODO CUDNN_RNN_RELU, should get the constant from ffi -local dropout = 0 -- TODO -local seed = 0x01234567 -- TODO - --- Dropout Descriptor - -local dropoutStatesSize = torch.LongTensor(1) -errcheck('cudnnDropoutGetStatesSize', - cudnn.getHandle(), - dropoutStatesSize:data()) -local dropoutStates = torch.CudaTensor(dropoutStatesSize[1]) - -local dropoutDesc = ffi.new('cudnnDropoutDescriptor_t[?]', 1) -errcheck('cudnnCreateDropoutDescriptor', dropoutDesc) --- TODO GC was being called early. Ignore cleanup for now. --- ffi.gc(dropoutDesc, function(d) errcheck('cudnnDestroyDropoutDescriptor', d[0]) end) -errcheck('cudnnSetDropoutDescriptor', - dropoutDesc[0], - cudnn.getHandle(), - dropout, - -- TODO Using dropoutStates causes an invalid memory access error. - dropoutStates:data(), dropoutStatesSize[1], - seed) - --- RNN Descriptor -local rnnDesc = ffi.new('cudnnRNNDescriptor_t[?]', 1) -errcheck('cudnnCreateRNNDescriptor', rnnDesc) --- ffi.gc(rnnDesc, function(d) errcheck('cudnnDestroyRNNDescriptor', d[0]) end) -errcheck('cudnnSetRNNDescriptor', - rnnDesc[0], - hiddenSize, - seqLength, - numLayers, - dropoutDesc[0], - inputMode, - bidirectional, - mode, - datatype) - --- Input -local inputDescs = ffi.new('cudnnTensorDescriptor_t[?]', seqLength) -for i = 0, seqLength - 1 do - errcheck('cudnnCreateTensorDescriptor', inputDescs + i) -end --- ffi.gc(inputDescs, function() --- for i = 0, seqLength - 1 do --- errcheck('cudnnDestroyTensorDescriptor', inputDescs[i]) --- end --- end) - -local dims = torch.IntTensor({inputSize, miniBatch, seqLength}) -local stride = torch.IntTensor({1, dims[1], 1}) - -for i = 0, seqLength - 1 do - errcheck('cudnnSetTensorNdDescriptor', - inputDescs[i], - datatype, - 3, - dims:data(), - stride:data()) -end - -local input = torch.CudaTensor(dims[1], dims[2], dims[3]) - --- Ouptut -local outputDescs = ffi.new('cudnnTensorDescriptor_t[?]', seqLength) -for i = 0, seqLength - 1 do - errcheck('cudnnCreateTensorDescriptor', outputDescs + i) -end --- ffi.gc(outputDescs, function() --- for i = 0, seqLength - 1 do --- errcheck('cudnnDestroyTensorDescriptor', outputDescs[i]) --- end --- end) - -local dims = torch.IntTensor({hiddenSize, miniBatch, seqLength}) -local stride = torch.IntTensor({1, dims[1], 1}) - -for i = 0, seqLength - 1 do - errcheck('cudnnSetTensorNdDescriptor', - outputDescs[i], - datatype, - 3, - dims:data(), - stride:data()) -end - -local output = torch.CudaTensor(dims[1], dims[2], dims[3]) - --- Hidden -local hiddenInputDesc = ffi.new('cudnnTensorDescriptor_t[?]', 1) -local hiddenOutputDesc = ffi.new('cudnnTensorDescriptor_t[?]', 1) -errcheck('cudnnCreateTensorDescriptor', hiddenInputDesc) -errcheck('cudnnCreateTensorDescriptor', hiddenOutputDesc) --- ffi.gc(hiddenInputDesc, function(d) errcheck('cudnnDestroyTensorDescriptor', d[0]) end) --- ffi.gc(hiddenOutputDesc, function(d) errcheck('cudnnDestroyTensorDescriptor', d[0]) end) - -local dims = torch.IntTensor({hiddenSize, miniBatch, numLayers}) -local stride = torch.IntTensor({1, dims[1], 1}) - -errcheck('cudnnSetTensorNdDescriptor', - hiddenInputDesc[0], - datatype, - 3, - dims:data(), - stride:data()) -errcheck('cudnnSetTensorNdDescriptor', - hiddenOutputDesc[0], - datatype, - 3, - dims:data(), - stride:data()) - -local hiddenInput = torch.CudaTensor(dims[1], dims[2], dims[3]) -local hiddenOutput = torch.CudaTensor(dims[1], dims[2], dims[3]) - --- Cell -local cellInputDesc = ffi.new('cudnnTensorDescriptor_t[?]', 1) -local cellOutputDesc = ffi.new('cudnnTensorDescriptor_t[?]', 1) -errcheck('cudnnCreateTensorDescriptor', cellInputDesc) -errcheck('cudnnCreateTensorDescriptor', cellOutputDesc) --- ffi.gc(cellInputDesc, function(d) errcheck('cudnnDestroyTensorDescriptor', d[0]) end) --- ffi.gc(cellOutputDesc, function(d) errcheck('cudnnDestroyTensorDescriptor', d[0]) end) - -local dims = torch.IntTensor({hiddenSize, miniBatch, numLayers}) -local stride = torch.IntTensor({1, dims[1], 1}) - -errcheck('cudnnSetTensorNdDescriptor', - cellInputDesc[0], - datatype, - 3, - dims:data(), - stride:data()) -errcheck('cudnnSetTensorNdDescriptor', - cellOutputDesc[0], - datatype, - 3, - dims:data(), - stride:data()) - -local cellInput = torch.CudaTensor(dims[1], dims[2], dims[3]) -local cellOutput = torch.CudaTensor(dims[1], dims[2], dims[3]) - --- Weight -local weightDesc = ffi.new('cudnnFilterDescriptor_t[?]', 1) -errcheck('cudnnCreateFilterDescriptor', weightDesc) --- ffi.gc(weightDesc, function(d) errcheck('cudnnDestroyFilterDescriptor', d[0]) end) - -local weightSize = torch.LongTensor(1) -errcheck('cudnnGetRNNParamsSize', - cudnn.getHandle(), - rnnDesc[0], - inputDescs, - weightSize:data()) -local dims = torch.IntTensor({weightSize[1] / 4, 1, 1}) -- sizeof(float) - -errcheck('cudnnSetFilterNdDescriptor', - weightDesc[0], - datatype, - 0, -- TODO ffi CUDNN_TENSOR_NCHW - 3, - dims:data()) -local weight = torch.CudaTensor(dims[1], dims[2], dims[3]) - --- Workspace -local workspace = cudnn.getSharedWorkspace() -local workspaceSize = torch.LongTensor(1) -errcheck('cudnnGetRNNWorkspaceSize', - cudnn.getHandle(), - rnnDesc[0], - inputDescs, - workspaceSize:data()) -workspace:resize(workspaceSize[1] / 4) -- sizeof(float) - --- Print Descriptor data -print("hiddenSize = " .. hiddenSize) -print("inputSize = " .. inputSize) -print("seqLength = " .. seqLength) -print("numLayers = " .. numLayers) -print("miniBatch = " .. miniBatch) -print("bidirectional = " .. bidirectional) -print("inputMode = " .. inputMode) -print("mode = " .. mode) -print("dropout = " .. dropout) - -local datatype = torch.IntTensor(1) -local nbDims = torch.IntTensor(1) -local dims = torch.IntTensor(3) -local stride = torch.IntTensor(3) - -errcheck('cudnnGetTensorNdDescriptor', - inputDescs[0], - 3, - datatype:data(), - nbDims:data(), - dims:data(), - stride:data()) -print("Input " .. - "dim=(" .. dims[1] .. ", " .. dims[2] .. ", " .. dims[3] .. ") " .. - "stride=(" .. stride[1] .. ", " .. stride[2] .. ", " .. stride[3] .. ")") - -errcheck('cudnnGetTensorNdDescriptor', - outputDescs[0], - 3, - datatype:data(), - nbDims:data(), - dims:data(), - stride:data()) -print("Output " .. - "dim=(" .. dims[1] .. ", " .. dims[2] .. ", " .. dims[3] .. ") " .. - "stride=(" .. stride[1] .. ", " .. stride[2] .. ", " .. stride[3] .. ")") - -errcheck('cudnnGetTensorNdDescriptor', - hiddenInputDesc[0], - 3, - datatype:data(), - nbDims:data(), - dims:data(), - stride:data()) -print("Hidden Input " .. - "dim=(" .. dims[1] .. ", " .. dims[2] .. ", " .. dims[3] .. ") " .. - "stride=(" .. stride[1] .. ", " .. stride[2] .. ", " .. stride[3] .. ")") - -errcheck('cudnnGetTensorNdDescriptor', - hiddenOutputDesc[0], - 3, - datatype:data(), - nbDims:data(), - dims:data(), - stride:data()) -print("Hidden Output " .. - "dim=(" .. dims[1] .. ", " .. dims[2] .. ", " .. dims[3] .. ") " .. - "stride=(" .. stride[1] .. ", " .. stride[2] .. ", " .. stride[3] .. ")") - -errcheck('cudnnGetTensorNdDescriptor', - cellInputDesc[0], - 3, - datatype:data(), - nbDims:data(), - dims:data(), - stride:data()) -print("Cell Input " .. - "dim=(" .. dims[1] .. ", " .. dims[2] .. ", " .. dims[3] .. ") " .. - "stride=(" .. stride[1] .. ", " .. stride[2] .. ", " .. stride[3] .. ")") - -errcheck('cudnnGetTensorNdDescriptor', - cellOutputDesc[0], - 3, - datatype:data(), - nbDims:data(), - dims:data(), - stride:data()) -print("Cell Output " .. - "dim=(" .. dims[1] .. ", " .. dims[2] .. ", " .. dims[3] .. ") " .. - "stride=(" .. stride[1] .. ", " .. stride[2] .. ", " .. stride[3] .. ")") - -local format = ffi.new('cudnnTensorFormat_t[?]', 1) -errcheck('cudnnGetFilterNdDescriptor', - weightDesc[0], - 3, - datatype:data(), - format, - nbDims:data(), - dims:data()) - -print("Weight " .. - "dim=(" .. dims[1] .. ", " .. dims[2] .. ", " .. dims[3] .. ") ") - --- ForwardInference -errcheck('cudnnRNNForwardInference', - cudnn.getHandle(), - rnnDesc[0], - inputDescs, input:data(), - hiddenInputDesc[0], hiddenInput:data(), - cellInputDesc[0], cellInput:data(), - weightDesc[0], weight:data(), - outputDescs, output:data(), - hiddenOutputDesc[0], hiddenOutput:data(), - cellOutputDesc[0], cellOutput:data(), - workspace:data(), workspace:size(1) * 4) -- sizeof(float) - diff --git a/rnn_exp2.lua b/rnn_exp2.lua deleted file mode 100644 index e2ad093..0000000 --- a/rnn_exp2.lua +++ /dev/null @@ -1,198 +0,0 @@ -import 'cudnn' -local ffi = require 'ffi' -local errcheck = cudnn.errcheck - -local datatype = 0 -- TODO CUDNN_DATA_FLOAT=0, should get the constant from ffi -local hiddenSize = 1 -- TODO This is a layer parameter, correct? -local inputSize = 1 -- TODO Is this a layer parameter or determined by input? -local seqLength = 1 -- TODO Is this a layer parameter or determined by input? -local numLayers = 1 -- TODO -local miniBatch = 1 -- TODO -local bidirectional = 0 -- TODO CUDNN_UNIDIRECTIONAL=0, should get the constant from ffi -local inputMode = 0 -- TODO CUDNN_LINEAR_INPUT=0, should get the constant from ffi -local mode = 0 -- TODO CUDNN_RNN_RELU=0, CUDNN_LSTM=1, CUDNN_GRU=2 should get the constant from ffi -local dropout = 0 -- TODO -local seed = 0x01234567 -- TODO - --- Dropout Descriptor - -print() -print("---------------------------------------------------------------------------------------") -print() -local dropoutStatesSize = torch.LongTensor(1) -errcheck('cudnnDropoutGetStatesSize', cudnn.getHandle(), dropoutStatesSize:data()) -local dropoutStates = torch.CudaTensor(dropoutStatesSize[1]) - -local dropoutDesc = ffi.new('cudnnDropoutDescriptor_t[?]', 1) -errcheck('cudnnCreateDropoutDescriptor', dropoutDesc) - --- TODO GC was being called early. Ignore cleanup for now. --- ffi.gc(dropoutDesc, function(d) errcheck('cudnnDestroyDropoutDescriptor', d[0]) end) - -errcheck('cudnnSetDropoutDescriptor', dropoutDesc[0], cudnn.getHandle(), dropout, dropoutStates:data(), dropoutStatesSize[1], seed) - --- RNN Descriptor -local rnnDesc = ffi.new('cudnnRNNDescriptor_t[?]', 1) -errcheck('cudnnCreateRNNDescriptor', rnnDesc) --- ffi.gc(rnnDesc, function(d) errcheck('cudnnDestroyRNNDescriptor', d[0]) end) -errcheck('cudnnSetRNNDescriptor', rnnDesc[0], hiddenSize, seqLength, numLayers, dropoutDesc[0], inputMode, bidirectional, mode, datatype) - --- Input -local inputDescs = ffi.new('cudnnTensorDescriptor_t[?]', seqLength) -for i = 0, seqLength - 1 do - errcheck('cudnnCreateTensorDescriptor', inputDescs + i) -end --- ffi.gc(inputDescs, function() --- for i = 0, seqLength - 1 do --- errcheck('cudnnDestroyTensorDescriptor', inputDescs[i]) --- end --- end) - -local dims_1 = torch.IntTensor({inputSize, miniBatch, seqLength}) -local stride_1 = torch.IntTensor({1, dims_1[1], 1}) - -for i = 0, seqLength - 1 do - errcheck('cudnnSetTensorNdDescriptor', inputDescs[i], datatype, 3, dims_1:data(), stride_1:data()) -end - -local input = torch.CudaTensor(dims_1[1], dims_1[2], dims_1[3]) - --- Ouptut -local outputDescs = ffi.new('cudnnTensorDescriptor_t[?]', seqLength) -for i = 0, seqLength - 1 do - errcheck('cudnnCreateTensorDescriptor', outputDescs + i) -end --- ffi.gc(outputDescs, function() --- for i = 0, seqLength - 1 do --- errcheck('cudnnDestroyTensorDescriptor', outputDescs[i]) --- end --- end) - -local dims_2 = torch.IntTensor({hiddenSize, miniBatch, seqLength}) -local stride_2 = torch.IntTensor({1, dims_2[1], 1}) - -for i = 0, seqLength - 1 do - errcheck('cudnnSetTensorNdDescriptor', outputDescs[i], datatype, 3, dims_2:data(), stride_2:data()) -end - -local output = torch.CudaTensor(dims_2[1], dims_2[2], dims_2[3]) - --- Hidden -local hiddenInputDesc = ffi.new('cudnnTensorDescriptor_t[?]', 1) -local hiddenOutputDesc = ffi.new('cudnnTensorDescriptor_t[?]', 1) -errcheck('cudnnCreateTensorDescriptor', hiddenInputDesc) -errcheck('cudnnCreateTensorDescriptor', hiddenOutputDesc) --- ffi.gc(hiddenInputDesc, function(d) errcheck('cudnnDestroyTensorDescriptor', d[0]) end) --- ffi.gc(hiddenOutputDesc, function(d) errcheck('cudnnDestroyTensorDescriptor', d[0]) end) - -local dims_3 = torch.IntTensor({hiddenSize, miniBatch, numLayers}) -local stride_3 = torch.IntTensor({1, dims_3[1], 1}) - -errcheck('cudnnSetTensorNdDescriptor', hiddenInputDesc[0], datatype, 3, dims_3:data(), stride_3:data()) -errcheck('cudnnSetTensorNdDescriptor', hiddenOutputDesc[0], datatype, 3, dims_3:data(), stride_3:data()) - -local hiddenInput = torch.CudaTensor(dims_3[1], dims_3[2], dims_3[3]) -local hiddenOutput = torch.CudaTensor(dims_3[1], dims_3[2], dims_3[3]) - --- Cell -local cellInputDesc = ffi.new('cudnnTensorDescriptor_t[?]', 1) -local cellOutputDesc = ffi.new('cudnnTensorDescriptor_t[?]', 1) -errcheck('cudnnCreateTensorDescriptor', cellInputDesc) -errcheck('cudnnCreateTensorDescriptor', cellOutputDesc) --- ffi.gc(cellInputDesc, function(d) errcheck('cudnnDestroyTensorDescriptor', d[0]) end) --- ffi.gc(cellOutputDesc, function(d) errcheck('cudnnDestroyTensorDescriptor', d[0]) end) - -local dims_4 = torch.IntTensor({hiddenSize, miniBatch, numLayers}) -local stride_4 = torch.IntTensor({1, dims_4[1], 1}) - -errcheck('cudnnSetTensorNdDescriptor', cellInputDesc[0], datatype, 3, dims_4:data(), stride_4:data()) -errcheck('cudnnSetTensorNdDescriptor', cellOutputDesc[0], datatype, 3, dims_4:data(), stride_4:data()) - -local cellInput = torch.CudaTensor(dims_4[1], dims_4[2], dims_4[3]) -local cellOutput = torch.CudaTensor(dims_4[1], dims_4[2], dims_4[3]) - --- Weight -local weightDesc = ffi.new('cudnnFilterDescriptor_t[?]', 1) -errcheck('cudnnCreateFilterDescriptor', weightDesc) --- ffi.gc(weightDesc, function(d) errcheck('cudnnDestroyFilterDescriptor', d[0]) end) - -local weightSize = torch.LongTensor(1) -errcheck('cudnnGetRNNParamsSize', cudnn.getHandle(), rnnDesc[0], inputDescs, weightSize:data()) -local dims_5 = torch.IntTensor({weightSize[1] / 4, 1, 1}) -- sizeof(float) - --- TODO ffi CUDNN_TENSOR_NCHW -errcheck('cudnnSetFilterNdDescriptor', weightDesc[0], datatype, 0, 3, dims_5:data()) - -local weight = torch.CudaTensor(dims_5[1], dims_5[2], dims_5[3]) - --- Workspace -local workspace = cudnn.getSharedWorkspace() -local workspaceSize = torch.LongTensor(1) -errcheck('cudnnGetRNNWorkspaceSize', cudnn.getHandle(), rnnDesc[0], inputDescs, workspaceSize:data()) -workspace:resize(workspaceSize[1] * 40000) -- sizeof(float) - --- Print Descriptor data -print("hiddenSize = " .. hiddenSize) -print("inputSize = " .. inputSize) -print("seqLength = " .. seqLength) -print("numLayers = " .. numLayers) -print("miniBatch = " .. miniBatch) -print("bidirectional = " .. bidirectional) -print("inputMode = " .. inputMode) -print("mode = " .. mode) -print("dropout = " .. dropout) - -local datatype = torch.IntTensor(1) -local nbDims = torch.IntTensor(1) -local dims = torch.IntTensor(3) -local stride = torch.IntTensor(3) - -errcheck('cudnnGetTensorNdDescriptor', inputDescs[0], 3, datatype:data(), nbDims:data(), dims:data(), stride:data()) -print("Input dim=(" .. dims[1] .. ", " .. dims[2] .. ", " .. dims[3] .. ") " .. "stride=(" .. stride[1] .. ", " .. stride[2] .. ", " .. stride[3] .. ")") - -errcheck('cudnnGetTensorNdDescriptor', outputDescs[0], 3, datatype:data(), nbDims:data(), dims:data(), stride:data()) -print("Output dim=(" .. dims[1] .. ", " .. dims[2] .. ", " .. dims[3] .. ") " .. "stride=(" .. stride[1] .. ", " .. stride[2] .. ", " .. stride[3] .. ")") - -errcheck('cudnnGetTensorNdDescriptor', hiddenInputDesc[0], 3, datatype:data(), nbDims:data(), dims:data(), stride:data()) -print("Hidden Input dim=(" .. dims[1] .. ", " .. dims[2] .. ", " .. dims[3] .. ") " .. "stride=(" .. stride[1] .. ", " .. stride[2] .. ", " .. stride[3] .. ")") - -errcheck('cudnnGetTensorNdDescriptor', hiddenOutputDesc[0], 3, datatype:data(), nbDims:data(), dims:data(), stride:data()) -print("Hidden Output dim=(" .. dims[1] .. ", " .. dims[2] .. ", " .. dims[3] .. ") " .. "stride=(" .. stride[1] .. ", " .. stride[2] .. ", " .. stride[3] .. ")") - -errcheck('cudnnGetTensorNdDescriptor', cellInputDesc[0], 3, datatype:data(), nbDims:data(), dims:data(), stride:data()) -print("Cell Input dim=(" .. dims[1] .. ", " .. dims[2] .. ", " .. dims[3] .. ") " .. "stride=(" .. stride[1] .. ", " .. stride[2] .. ", " .. stride[3] .. ")") - -errcheck('cudnnGetTensorNdDescriptor', cellOutputDesc[0], 3, datatype:data(), nbDims:data(), dims:data(), stride:data()) -print("Cell Output dim=(" .. dims[1] .. ", " .. dims[2] .. ", " .. dims[3] .. ") " .. "stride=(" .. stride[1] .. ", " .. stride[2] .. ", " .. stride[3] .. ")") - -local format = ffi.new('cudnnTensorFormat_t[?]', 1) -errcheck('cudnnGetFilterNdDescriptor', weightDesc[0], 3, datatype:data(), format, nbDims:data(), dims:data()) - -print("Weight dim=(" .. dims[1] .. ", " .. dims[2] .. ", " .. dims[3] .. ") ") - ------- ForwardInference ---errcheck('cudnnRNNForwardInference', --- cudnn.getHandle(), --- rnnDesc[0], --- inputDescs, input:data(), --- hiddenInputDesc[0], nil, -- hiddenInput:data(), --- cellInputDesc[0], nil, -- cellInput:data(), --- weightDesc[0], weight:data(), --- outputDescs, output:data(), --- hiddenOutputDesc[0], nil, -- hiddenOutput:data(), --- cellOutputDesc[0], nil, -- cellOutput:data(), --- workspace:data(), workspace:size(1) * 40000) -- sizeof(float) - ----- ForwardInference -errcheck('cudnnRNNForwardInference', - cudnn.getHandle(), - rnnDesc[0], - inputDescs, input:data(), - hiddenInputDesc[0], hiddenInput:data(), - cellInputDesc[0], cellInput:data(), - weightDesc[0], weight:data(), - outputDescs, output:data(), - hiddenOutputDesc[0], hiddenOutput:data(), - cellOutputDesc[0], cellOutput:data(), - workspace:data(), workspace:size(1) * 40000) -- sizeof(float) - diff --git a/test/test.lua b/test/test.lua index 8fcd1b9..ba1b5a1 100644 --- a/test/test.lua +++ b/test/test.lua @@ -1444,10 +1444,10 @@ math.randomseed(os.time()) mytester = torch.Tester() mytester:add(cudnntest) --- if torch.random(1,2) == 1 then +if torch.random(1,2) == 1 then cudnn.benchmark = true -- run manual auto-tuner - cudnn.verbose = true --- end +-- cudnn.verbose = true +end for i=1,cutorch.getDeviceCount() do |