diff options
author | Boris Fomitchev <bfomitchev@nvidia.com> | 2016-10-19 09:31:04 +0300 |
---|---|---|
committer | Boris Fomitchev <bfomitchev@nvidia.com> | 2016-10-19 09:31:04 +0300 |
commit | 127fabc32a2c28262d786d83aaeaaf94c3059dd4 (patch) | |
tree | 67cbb8cd3f19794d0469ea816d3a361924b35240 | |
parent | e08c0e4444a2a3f2177565f765414ec2389e9870 (diff) |
Added new refactoring for convolution and filter descriptors
-rw-r--r-- | RNN.lua | 24 | ||||
-rw-r--r-- | SpatialConvolution.lua | 50 | ||||
-rw-r--r-- | SpatialFullConvolution.lua | 23 | ||||
-rw-r--r-- | VolumetricConvolution.lua | 21 | ||||
-rw-r--r-- | VolumetricFullConvolution.lua | 25 | ||||
-rw-r--r-- | find.lua | 33 | ||||
-rw-r--r-- | functional.lua | 123 | ||||
-rw-r--r-- | init.lua | 49 | ||||
-rw-r--r-- | test/test.lua | 5 |
9 files changed, 153 insertions, 200 deletions
@@ -75,12 +75,6 @@ function RNN:createDropoutDescriptors(count) 'cudnnDestroyDropoutDescriptor') end -function RNN:createFilterDescriptors(count) - return cudnn.createDescriptors(count, - 'cudnnFilterDescriptor_t[?]', - 'cudnnCreateFilterDescriptor', - 'cudnnDestroyFilterDescriptor') -end function RNN:createRNNDescriptors(count) return cudnn.createDescriptors(count, @@ -131,18 +125,12 @@ function RNN:resetRNNDescriptor() end function RNN:resetWeightDescriptor() - if not self.wDesc then - self.wDesc = self:createFilterDescriptors(1) - end - - local dim = torch.IntTensor({self.weight:size(1), 1, 1}) - - errcheck('cudnnSetFilterNdDescriptor', - self.wDesc[0], - self.datatype, - 'CUDNN_TENSOR_NCHW', - 3, - dim:data()) + cudnn.setFilterDescriptor( + { dataType = self.datatype, + filterDimA = {self.weight:size(1), 1, 1} + }, + self.wDesc + ) end function RNN:resetIODescriptors() diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua index 4a56980..3a09370 100644 --- a/SpatialConvolution.lua +++ b/SpatialConvolution.lua @@ -25,30 +25,27 @@ function SpatialConvolution:__init(nInputPlane, nOutputPlane, self.reset = nil end -function SpatialConvolution:createWeightDescriptors() +-- if you change the configuration of the module manually, call this +function SpatialConvolution:resetWeightDescriptors(desc) + -- for compatibility + self.groups = self.groups or 1 assert(cudnn.typemap[torch.typename(self.weight)], 'Only Cuda supported duh!') assert(cudnn.typemap[torch.typename(self.bias)] or not self.bias, 'Only Cuda supported duh!') + -- create descriptor for bias if self.bias then self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane,1,1)) end - -- create filterDescriptor for weight - return cudnn.createDescriptors(1, 'struct cudnnFilterStruct*[?]', - 'cudnnCreateFilterDescriptor', 'cudnnDestroyFilterDescriptor') -end --- if you change the configuration of the module manually, call this -function SpatialConvolution:resetWeightDescriptors(desc) - -- for compatibility - self.groups = self.groups or 1 - self.weightDesc = SpatialConvolution.createWeightDescriptors(self) - desc = desc or torch.IntTensor({self.nOutputPlane/self.groups, - self.nInputPlane/self.groups, - self.kH, self.kW}) - - errcheck('cudnnSetFilterNdDescriptor', self.weightDesc[0], - cudnn.typemap[torch.typename(self.weight)], 'CUDNN_TENSOR_NCHW', desc:nElement(), - desc:data()); + self.weightDesc = cudnn.setFilterDescriptor( + { dataType = cudnn.typemap[torch.typename(self.weight)], + filterDimA = desc or + {self.nOutputPlane/self.groups, + self.nInputPlane/self.groups, + self.kH, self.kW} + } + ) + return self end @@ -97,6 +94,7 @@ function SpatialConvolution:checkInputChanged(input) end self.groups = self.groups or 1 if not self.weightDesc then self:resetWeightDescriptors() end + if not self.weightDesc then error "Weights not assigned!" end if not self.iDesc or not self.oDesc or input:size(1) ~= self.iSize[1] or input:size(2) ~= self.iSize[2] or input:size(3) ~= self.iSize[3] or input:size(4) ~= self.iSize[4] or (input:dim()==5 and input:size(5) ~= self.iSize[5]) then @@ -124,17 +122,17 @@ function SpatialConvolution:createIODescriptors(input) local input_slice = input:narrow(2,1,self.nInputPlane/self.groups) self.iDesc = cudnn.toDescriptor(input_slice) -- create conv descriptor - self.convDesc = cudnn.createDescriptors(1, 'struct cudnnConvolutionStruct*[?]', - 'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor') self.padH, self.padW = self.padH or 0, self.padW or 0 - self.pad = torch.IntTensor({self.padH, self.padW}) - self.stride = torch.IntTensor({self.dH, self.dW}) - local upscale = torch.IntTensor({1,1}) - errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0], - 2, self.pad:data(), - self.stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION', - cudnn.configmap(torch.type(self.weight))); + -- those needed to calculate hash + self.pad = {self.padH, self.padW} + self.stride = {self.dH, self.dW} + self.convDesc = cudnn.setConvolutionDescriptor( + { padA = self.pad, + filterStrideA = self.stride, + upscaleA = {1,1}, + dataType = cudnn.configmap(torch.type(self.weight)) + }) -- get output shape, resize output local oSize = torch.IntTensor(4) diff --git a/SpatialFullConvolution.lua b/SpatialFullConvolution.lua index 528dae9..f598435 100644 --- a/SpatialFullConvolution.lua +++ b/SpatialFullConvolution.lua @@ -8,9 +8,9 @@ local checkedCall = find.checkedCall local Convolution = cudnn.SpatialConvolution function SpatialFullConvolution:resetWeightDescriptors() - return Convolution.resetWeightDescriptors(self, torch.IntTensor({self.nInputPlane, - self.nOutputPlane, - self.kH, self.kW})) + return Convolution.resetWeightDescriptors(self, {self.nInputPlane, + self.nOutputPlane, + self.kH, self.kW}) end function SpatialFullConvolution:fastest(mode) @@ -44,15 +44,14 @@ function SpatialFullConvolution:createIODescriptors(input) self.iDesc = cudnn.toDescriptor(input_slice) -- create conv descriptor - self.convDesc = cudnn.createDescriptors(1, 'struct cudnnConvolutionStruct*[?]', - 'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor') - self.pad = torch.IntTensor({self.padH, self.padW}) - self.stride = torch.IntTensor({self.dH, self.dW}) - local upscale = torch.IntTensor({1,1}) - errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0], - 2, self.pad:data(), - self.stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION', - cudnn.configmap(torch.type(self.weight))); + self.pad = {self.padH, self.padW} + self.stride = {self.dH, self.dW} + + self.convDesc = cudnn.setConvolutionDescriptor( + { padA = self.pad, + filterStrideA = self.stride, + dataType = cudnn.configmap(torch.type(self.weight)) + }) -- get output shape, resize output local iwidth = input:size(4) diff --git a/VolumetricConvolution.lua b/VolumetricConvolution.lua index 03d893e..7a7c1f0 100644 --- a/VolumetricConvolution.lua +++ b/VolumetricConvolution.lua @@ -8,8 +8,8 @@ local Convolution = cudnn.SpatialConvolution -- if you change the configuration of the module manually, call this function VolumetricConvolution:resetWeightDescriptors() - local desc = torch.IntTensor({self.nOutputPlane, self.nInputPlane, - self.kT, self.kH, self.kW}) + local desc = {self.nOutputPlane, self.nInputPlane, + self.kT, self.kH, self.kW} return Convolution.resetWeightDescriptors(self,desc) end @@ -35,21 +35,18 @@ function VolumetricConvolution:createIODescriptors(input) -- create input descriptor self.iDesc = cudnn.toDescriptor(input) -- create conv descriptor - self.convDesc = cudnn.createDescriptors(1, 'struct cudnnConvolutionStruct*[?]', - 'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor') - self.pad = torch.IntTensor({self.padT, self.padH, self.padW}) - self.stride = torch.IntTensor({self.dT, self.dH, self.dW}) - local upscale = torch.IntTensor({1,1,1}) + self.pad = {self.padT, self.padH, self.padW} + self.stride = {self.dT, self.dH, self.dW} + local mathtype=cudnn.configmap(torch.type(self.weight)) -- 3D convolutions do not work in 16 bits if mathtype == 'CUDNN_DATA_HALF' then mathtype = 'CUDNN_DATA_FLOAT' end - errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0], - 3, self.pad:data(), - self.stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION', - mathtype); - -- create output descriptor and resize output + self.convDesc = cudnn.setConvolutionDescriptor( + { padA = self.pad, filterStrideA = self.stride, + dataType = mathtype + }) local oSize = torch.IntTensor(5) errcheck('cudnnGetConvolutionNdForwardOutputDim', diff --git a/VolumetricFullConvolution.lua b/VolumetricFullConvolution.lua index ad6110d..8f8bac6 100644 --- a/VolumetricFullConvolution.lua +++ b/VolumetricFullConvolution.lua @@ -9,9 +9,10 @@ local Convolution = cudnn.SpatialConvolution -- if you change the configuration of the module manually, call this function VolumetricFullConvolution:resetWeightDescriptors() - local desc = torch.IntTensor({self.nInputPlane, self.nOutputPlane, - self.kT, self.kH, self.kW}) - return Convolution.resetWeightDescriptors(self,desc) + return Convolution.resetWeightDescriptors( + self, + {self.nInputPlane, self.nOutputPlane, self.kT, self.kH, self.kW} + ) end function VolumetricFullConvolution:fastest(mode) @@ -38,20 +39,16 @@ function VolumetricFullConvolution:createIODescriptors(input) assert(input:dim() == 5 and input:isContiguous()); self.iSize = self.iSize or torch.LongStorage(5):fill(0) if Convolution.checkInputChanged(self, input) then + -- create input descriptor local input_slice = input[{{},{1,self.nInputPlane},{},{}}] self.iDesc = cudnn.toDescriptor(input_slice) - -- create input descriptor --- self.iDesc = cudnn.toDescriptor(input) -- create conv descriptor - self.convDesc = cudnn.createDescriptors(1, 'struct cudnnConvolutionStruct*[?]', - 'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor') - self.pad = torch.IntTensor({self.padT, self.padH, self.padW}) - self.stride = torch.IntTensor({self.dT, self.dH, self.dW}) - local upscale = torch.IntTensor({1,1,1}) - errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0], - 3, self.pad:data(), - self.stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION', - cudnn.configmap(torch.type(self.weight))); + self.pad = {self.padT, self.padH, self.padW} + self.stride = {self.dT, self.dH, self.dW} + self.convDesc = cudnn.setConvolutionDescriptor( + { padA = self.pad, filterStrideA = self.stride, + dataType = cudnn.configmap(torch.type(self.weight)) + }) -- get output shape, resize output local iwidth = input:size(5) @@ -70,15 +70,9 @@ local function verboseCall(layer, f, ...) end local status = cudnn.call(f, ...) if status ~= ffi.C.CUDNN_STATUS_SUCCESS and (find.verbose or find.verboseError) then - local stride = ffi.new('int[8]') - local upscale = ffi.new('int[8]') - local dim = ffi.new('int[8]') - local mode = ffi.new('cudnnConvolutionMode_t[8]') - local datatype = ffi.new('cudnnDataType_t[8]') - cudnn.call('cudnnGetConvolutionNdDescriptor', layer.convDesc[0], - 4, dim, pad, stride, - upscale, mode, datatype) - print("find:verboseCall:" .. f .. " failed: ", tonumber(status) , ' mode : ', tonumber(mode[0]), ' datatype : ', tonumber(datatype[0])) + local desc= cudnn.getConvolutionDescriptor(layer.convDesc) + print("find:verboseCall:" .. f .. " failed: ", tonumber(status) , ' mode : ', + desc.mode, ' datatype : ', desc.datatype) end if find.verbose then print("find:verboseCall: success, " .. f ) @@ -105,18 +99,8 @@ end local function defaultFallback(layer, replay) -- read conv descriptor - local pad = ffi.new('int[8]') - local stride = ffi.new('int[8]') - local upscale = ffi.new('int[8]') - local dim = ffi.new('int[8]') - local mode = ffi.new('cudnnConvolutionMode_t[8]') - local datatype = ffi.new('cudnnDataType_t[8]') - - checkedCall(layer,'cudnnGetConvolutionNdDescriptor', layer.convDesc[0], - 5, dim, pad, stride, - upscale, mode, datatype) - - if datatype[0] == ffi.C.CUDNN_DATA_HALF then + local convDescData = cudnn.getConvolutionDescriptor(layer.convDesc) + if data.math == ffi.C.CUDNN_DATA_HALF then if find.verbose then if replay then print("find.defaultFallback: replay for ", layer.autotunerHash) @@ -124,9 +108,8 @@ local function defaultFallback(layer, replay) print("find.defaultFallback: no 16-bit float algo found, will try 32 bits for ", layer.autotunerHash) end end - checkedCall(layer,'cudnnSetConvolutionNdDescriptor', layer.convDesc[0], - dim[0], pad, stride, - upscale, mode[0], ffi.C.CUDNN_DATA_FLOAT) + data.math = ffi.C.CUDNN_DATA_FLOAT + cudnn.setConvolutionDescriptor(data, layer.convDesc) return true else return false @@ -461,7 +444,7 @@ function find:prepare(layer, input_slice, output_slice) return table.concat(x:size():totable(),',') end local function vals(x) - return table.concat(x:totable(),',') + return table.concat(x,',') end layer.autotunerHash = '-dimA' .. shape(input_slice) diff --git a/functional.lua b/functional.lua index a03d5a3..deaf839 100644 --- a/functional.lua +++ b/functional.lua @@ -7,11 +7,15 @@ local ffi = require 'ffi' local errcheck = cudnn.errcheck cudnn.functional = {} - - - - - +local function getMathType(weight) + local mathType = cudnn.configmap(torch.type(weight)) + if mathType == 'CUDNN_DATA_HALF' then + -- explicitly set math type to fp32 to avoid possible failures with fp16 and exotic sizes + -- this can be changed back when ported to find() as it has built-in fallback mechanism + mathType = 'CUDNN_DATA_FLOAT' + end + return mathType +end local function Batch2D(t) return t:view(1, t:size(1), t:size(2), t:size(3)) @@ -68,43 +72,21 @@ cudnn.functional.Convolution2D_updateOutput = function(handle, input, weight, ou output = output:dim() == 3 and Batch2D(output) or output -- create a weight descriptor - local weightDesc = ffi.new('struct cudnnFilterStruct*[1]') - errcheck('cudnnCreateFilterDescriptor', weightDesc) local nOutputPlane, nInputPlane, kH, kW = weight:size(1), weight:size(2), weight:size(3), weight:size(4) - local desc = torch.IntTensor({nOutputPlane, nInputPlane, kH, kW}) - errcheck('cudnnSetFilterNdDescriptor', weightDesc[0], cudnn.typemap[torch.type(input)], 'CUDNN_TENSOR_NCHW', 4, - desc:data()); - local function destroyWDesc(d) - errcheck('cudnnDestroyFilterDescriptor', d[0]); - end - ffi.gc(weightDesc, destroyWDesc) + local weightDesc = cudnn.setFilterDescriptor( + { dataType = cudnn.typemap[torch.type(input)], + filterDimA = {nOutputPlane, nInputPlane, kH, kW}}) -- create a convolution descriptor - local convDesc = ffi.new('struct cudnnConvolutionStruct*[1]') - errcheck('cudnnCreateConvolutionDescriptor', convDesc) - local pad = torch.IntTensor({padH, padW}) - local stride = torch.IntTensor({strideH, strideW}) - local upscale = torch.IntTensor({1,1}) - local mathType = cudnn.configmap(torch.type(weight)) - if mathType == 'CUDNN_DATA_HALF' then - -- explicitly set math type to fp32 to avoid possible failures with fp16 and exotic sizes - -- this can be changed back when ported to find() as it has built-in fallback mechanism - mathType = 'CUDNN_DATA_FLOAT' - end - errcheck('cudnnSetConvolutionNdDescriptor', convDesc[0], - 2, pad:data(), - stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION', - mathType + local convDesc = cudnn.setConvolutionDescriptor( + { padA = {padH, padW}, + filterStrideA = {strideH, strideW}, + dataType = getMathType(weight) } ); - local function destroyConvDesc(d) - errcheck('cudnnDestroyConvolutionDescriptor', d[0]); - end - ffi.gc(convDesc, destroyConvDesc) -- create input descriptor local iDesc = cudnn.toDescriptor(input) - -- create output descriptor local oSize = torch.IntTensor(4) errcheck('cudnnGetConvolutionNdForwardOutputDim', @@ -169,39 +151,19 @@ cudnn.functional.Convolution2D_updateGradInput = function(handle, input, weight, gradInput = gradInput:dim() == 3 and Batch2D(gradInput) or gradInput -- create a weight descriptor - local weightDesc = ffi.new('struct cudnnFilterStruct*[1]') - errcheck('cudnnCreateFilterDescriptor', weightDesc) local nOutputPlane, nInputPlane, kH, kW = weight:size(1), weight:size(2), weight:size(3), weight:size(4) - local desc = torch.IntTensor({nOutputPlane, nInputPlane, kH, kW}) - errcheck('cudnnSetFilterNdDescriptor', weightDesc[0], cudnn.typemap[torch.type(input)], 'CUDNN_TENSOR_NCHW', 4, - desc:data()); - local function destroyWDesc(d) - errcheck('cudnnDestroyFilterDescriptor', d[0]); - end - ffi.gc(weightDesc, destroyWDesc) + local weightDesc = cudnn.setFilterDescriptor( + { dataType = cudnn.typemap[torch.type(input)], + filterDimA = {nOutputPlane, nInputPlane, kH, kW} }) -- create a convolution descriptor - local convDesc = ffi.new('struct cudnnConvolutionStruct*[1]') - errcheck('cudnnCreateConvolutionDescriptor', convDesc) - local pad = torch.IntTensor({padH, padW}) - local stride = torch.IntTensor({strideH, strideW}) - local upscale = torch.IntTensor({1,1}) - local mathType = cudnn.configmap(torch.type(weight)) - if mathType == 'CUDNN_DATA_HALF' then - -- explicitly set math type to fp32 to avoid possible failures with fp16 and exotic sizes - -- this can be changed back when ported to find() as it has built-in fallback mechanism - mathType = 'CUDNN_DATA_FLOAT' - end - errcheck('cudnnSetConvolutionNdDescriptor', convDesc[0], - 2, pad:data(), - stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION', - mathType) - local function destroyConvDesc(d) - errcheck('cudnnDestroyConvolutionDescriptor', d[0]); - end - ffi.gc(convDesc, destroyConvDesc) - + local convDesc = cudnn.setConvolutionDescriptor( + { padA = {padH, padW}, + filterStrideA = {strideH, strideW}, + dataType = getMathType(weight) + } + ); -- create input, output descriptor local iDesc = cudnn.toDescriptor(input) local oDesc = cudnn.toDescriptor(output) @@ -256,38 +218,17 @@ cudnn.functional.Convolution2D_accGradParameters = function(handle, input, gradW local scaleT = torch.type(gradWeight) == 'torch.CudaDoubleTensor' and torch.DoubleTensor({scale}) or torch.FloatTensor({scale}) -- create a weight descriptor - local weightDesc = ffi.new('struct cudnnFilterStruct*[1]') - errcheck('cudnnCreateFilterDescriptor', weightDesc) local nOutputPlane, nInputPlane, kH, kW = gradWeight:size(1), gradWeight:size(2), gradWeight:size(3), gradWeight:size(4) - local desc = torch.IntTensor({nOutputPlane, nInputPlane, kH, kW}) - errcheck('cudnnSetFilterNdDescriptor', weightDesc[0], cudnn.typemap[torch.type(input)], 'CUDNN_TENSOR_NCHW', 4, - desc:data()); - local function destroyWDesc(d) - errcheck('cudnnDestroyFilterDescriptor', d[0]); - end - ffi.gc(weightDesc, destroyWDesc) + local weightDesc = cudnn.setFilterDescriptor({ dataType = cudnn.typemap[torch.type(input)], + filterDimA = {nOutputPlane, nInputPlane, kH, kW}}) -- create a convolution descriptor - local convDesc = ffi.new('struct cudnnConvolutionStruct*[1]') - errcheck('cudnnCreateConvolutionDescriptor', convDesc) - local pad = torch.IntTensor({padH, padW}) - local stride = torch.IntTensor({strideH, strideW}) - local upscale = torch.IntTensor({1,1}) - local mathType = cudnn.configmap(torch.type(gradWeight)) - if mathType == 'CUDNN_DATA_HALF' then - -- explicitly set math type to fp32 to avoid possible failures with fp16 and exotic sizes - -- this can be changed back when ported to find() as it has built-in fallback mechanism - mathType = 'CUDNN_DATA_FLOAT' - end - errcheck('cudnnSetConvolutionNdDescriptor', convDesc[0], - 2, pad:data(), - stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION', - mathType) - local function destroyConvDesc(d) - errcheck('cudnnDestroyConvolutionDescriptor', d[0]); - end - ffi.gc(convDesc, destroyConvDesc) + local convDesc = cudnn.setConvolutionDescriptor( + { padA = {padH, padW}, + filterStrideA = {strideH, strideW}, + dataType = getMathType(gradWeight) } + ); -- create input, output descriptor local iDesc = cudnn.toDescriptor(input) @@ -196,6 +196,55 @@ function cudnn.createDescriptors(count, descs_type, create_func, destroy_func) return ds end + +function cudnn.getConvolutionDescriptor(desc) + local CUDNN_DIM_MAX=4 + local data = { + dim_p = ffi.new('int[1]'), + padA = ffi.new('int[?]', CUDNN_DIM_MAX), + filterStrideA = ffi.new('int[?]', CUDNN_DIM_MAX), + upscaleA = ffi.new('int[?]', CUDNN_DIM_MAX), + mode_p = ffi.new('cudnnConvolutionMode_t[1]'), + math_p = ffi.new('cudnnDataType_t[1]') + } + + errcheck('cudnnGetConvolutionNdDescriptor', desc[0], CUDNN_DIM_MAX, + data.dim_p, data.padA, data.filterStrideA, + data.upscaleA, data.mode_p, data.math_p) + + data.arrayLength = data.dim_p[0] + data.mode = data.mode_p[0] + data.dataType = data.math_p[0] + return data +end + +function cudnn.setConvolutionDescriptor(data, desc) + local dim = data.arrayLength or #data.padA + local upscale = data.upscaleA or torch.IntStorage(dim):fill(1) + local myDesc = desc or cudnn.createDescriptors( + 1, 'struct cudnnConvolutionStruct*[?]', + 'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor') + errcheck('cudnnSetConvolutionNdDescriptor', myDesc[0], + dim, + torch.IntTensor(data.padA):data(), + torch.IntTensor(data.filterStrideA):data(), + torch.IntTensor(upscale):data(), + data.mode or 'CUDNN_CROSS_CORRELATION', + data.dataType) + return myDesc +end + +function cudnn.setFilterDescriptor(data, filterDesc) + local myDesc = filterDesc or cudnn.createDescriptors( + 1, 'struct cudnnFilterStruct*[?]', + 'cudnnCreateFilterDescriptor', 'cudnnDestroyFilterDescriptor') + local dims = data.nbDims or #data.filterDimA + errcheck('cudnnSetFilterNdDescriptor', myDesc[0], + data.dataType, data.format or 'CUDNN_TENSOR_NCHW', + dims, torch.IntTensor(data.filterDimA):data()); + return myDesc +end + local sharedBuffer = {} local nextBufferSize = {} diff --git a/test/test.lua b/test/test.lua index bd7437a..ac3e721 100644 --- a/test/test.lua +++ b/test/test.lua @@ -934,9 +934,10 @@ math.randomseed(os.time()) mytester = torch.Tester() mytester:add(cudnntest) --- cudnn.verbose=true --- cudnn.find.verbose=true +cudnn.verbose=false +cudnn.find.verbose=false cudnn.useFindEx=false +cudnn.useFloatMathForHalf = true for i = 1, cutorch.getDeviceCount() do |