Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/soumith/cudnn.torch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBoris Fomitchev <bfomitchev@nvidia.com>2016-10-19 09:31:04 +0300
committerBoris Fomitchev <bfomitchev@nvidia.com>2016-10-19 09:31:04 +0300
commit127fabc32a2c28262d786d83aaeaaf94c3059dd4 (patch)
tree67cbb8cd3f19794d0469ea816d3a361924b35240
parente08c0e4444a2a3f2177565f765414ec2389e9870 (diff)
Added new refactoring for convolution and filter descriptors
-rw-r--r--RNN.lua24
-rw-r--r--SpatialConvolution.lua50
-rw-r--r--SpatialFullConvolution.lua23
-rw-r--r--VolumetricConvolution.lua21
-rw-r--r--VolumetricFullConvolution.lua25
-rw-r--r--find.lua33
-rw-r--r--functional.lua123
-rw-r--r--init.lua49
-rw-r--r--test/test.lua5
9 files changed, 153 insertions, 200 deletions
diff --git a/RNN.lua b/RNN.lua
index e3e278f..5a5be4a 100644
--- a/RNN.lua
+++ b/RNN.lua
@@ -75,12 +75,6 @@ function RNN:createDropoutDescriptors(count)
'cudnnDestroyDropoutDescriptor')
end
-function RNN:createFilterDescriptors(count)
- return cudnn.createDescriptors(count,
- 'cudnnFilterDescriptor_t[?]',
- 'cudnnCreateFilterDescriptor',
- 'cudnnDestroyFilterDescriptor')
-end
function RNN:createRNNDescriptors(count)
return cudnn.createDescriptors(count,
@@ -131,18 +125,12 @@ function RNN:resetRNNDescriptor()
end
function RNN:resetWeightDescriptor()
- if not self.wDesc then
- self.wDesc = self:createFilterDescriptors(1)
- end
-
- local dim = torch.IntTensor({self.weight:size(1), 1, 1})
-
- errcheck('cudnnSetFilterNdDescriptor',
- self.wDesc[0],
- self.datatype,
- 'CUDNN_TENSOR_NCHW',
- 3,
- dim:data())
+ cudnn.setFilterDescriptor(
+ { dataType = self.datatype,
+ filterDimA = {self.weight:size(1), 1, 1}
+ },
+ self.wDesc
+ )
end
function RNN:resetIODescriptors()
diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua
index 4a56980..3a09370 100644
--- a/SpatialConvolution.lua
+++ b/SpatialConvolution.lua
@@ -25,30 +25,27 @@ function SpatialConvolution:__init(nInputPlane, nOutputPlane,
self.reset = nil
end
-function SpatialConvolution:createWeightDescriptors()
+-- if you change the configuration of the module manually, call this
+function SpatialConvolution:resetWeightDescriptors(desc)
+ -- for compatibility
+ self.groups = self.groups or 1
assert(cudnn.typemap[torch.typename(self.weight)], 'Only Cuda supported duh!')
assert(cudnn.typemap[torch.typename(self.bias)] or not self.bias, 'Only Cuda supported duh!')
+
-- create descriptor for bias
if self.bias then
self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane,1,1))
end
- -- create filterDescriptor for weight
- return cudnn.createDescriptors(1, 'struct cudnnFilterStruct*[?]',
- 'cudnnCreateFilterDescriptor', 'cudnnDestroyFilterDescriptor')
-end
--- if you change the configuration of the module manually, call this
-function SpatialConvolution:resetWeightDescriptors(desc)
- -- for compatibility
- self.groups = self.groups or 1
- self.weightDesc = SpatialConvolution.createWeightDescriptors(self)
- desc = desc or torch.IntTensor({self.nOutputPlane/self.groups,
- self.nInputPlane/self.groups,
- self.kH, self.kW})
-
- errcheck('cudnnSetFilterNdDescriptor', self.weightDesc[0],
- cudnn.typemap[torch.typename(self.weight)], 'CUDNN_TENSOR_NCHW', desc:nElement(),
- desc:data());
+ self.weightDesc = cudnn.setFilterDescriptor(
+ { dataType = cudnn.typemap[torch.typename(self.weight)],
+ filterDimA = desc or
+ {self.nOutputPlane/self.groups,
+ self.nInputPlane/self.groups,
+ self.kH, self.kW}
+ }
+ )
+
return self
end
@@ -97,6 +94,7 @@ function SpatialConvolution:checkInputChanged(input)
end
self.groups = self.groups or 1
if not self.weightDesc then self:resetWeightDescriptors() end
+ if not self.weightDesc then error "Weights not assigned!" end
if not self.iDesc or not self.oDesc or input:size(1) ~= self.iSize[1] or input:size(2) ~= self.iSize[2]
or input:size(3) ~= self.iSize[3] or input:size(4) ~= self.iSize[4] or (input:dim()==5 and input:size(5) ~= self.iSize[5]) then
@@ -124,17 +122,17 @@ function SpatialConvolution:createIODescriptors(input)
local input_slice = input:narrow(2,1,self.nInputPlane/self.groups)
self.iDesc = cudnn.toDescriptor(input_slice)
-- create conv descriptor
- self.convDesc = cudnn.createDescriptors(1, 'struct cudnnConvolutionStruct*[?]',
- 'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor')
self.padH, self.padW = self.padH or 0, self.padW or 0
- self.pad = torch.IntTensor({self.padH, self.padW})
- self.stride = torch.IntTensor({self.dH, self.dW})
- local upscale = torch.IntTensor({1,1})
- errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0],
- 2, self.pad:data(),
- self.stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
- cudnn.configmap(torch.type(self.weight)));
+ -- those needed to calculate hash
+ self.pad = {self.padH, self.padW}
+ self.stride = {self.dH, self.dW}
+ self.convDesc = cudnn.setConvolutionDescriptor(
+ { padA = self.pad,
+ filterStrideA = self.stride,
+ upscaleA = {1,1},
+ dataType = cudnn.configmap(torch.type(self.weight))
+ })
-- get output shape, resize output
local oSize = torch.IntTensor(4)
diff --git a/SpatialFullConvolution.lua b/SpatialFullConvolution.lua
index 528dae9..f598435 100644
--- a/SpatialFullConvolution.lua
+++ b/SpatialFullConvolution.lua
@@ -8,9 +8,9 @@ local checkedCall = find.checkedCall
local Convolution = cudnn.SpatialConvolution
function SpatialFullConvolution:resetWeightDescriptors()
- return Convolution.resetWeightDescriptors(self, torch.IntTensor({self.nInputPlane,
- self.nOutputPlane,
- self.kH, self.kW}))
+ return Convolution.resetWeightDescriptors(self, {self.nInputPlane,
+ self.nOutputPlane,
+ self.kH, self.kW})
end
function SpatialFullConvolution:fastest(mode)
@@ -44,15 +44,14 @@ function SpatialFullConvolution:createIODescriptors(input)
self.iDesc = cudnn.toDescriptor(input_slice)
-- create conv descriptor
- self.convDesc = cudnn.createDescriptors(1, 'struct cudnnConvolutionStruct*[?]',
- 'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor')
- self.pad = torch.IntTensor({self.padH, self.padW})
- self.stride = torch.IntTensor({self.dH, self.dW})
- local upscale = torch.IntTensor({1,1})
- errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0],
- 2, self.pad:data(),
- self.stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
- cudnn.configmap(torch.type(self.weight)));
+ self.pad = {self.padH, self.padW}
+ self.stride = {self.dH, self.dW}
+
+ self.convDesc = cudnn.setConvolutionDescriptor(
+ { padA = self.pad,
+ filterStrideA = self.stride,
+ dataType = cudnn.configmap(torch.type(self.weight))
+ })
-- get output shape, resize output
local iwidth = input:size(4)
diff --git a/VolumetricConvolution.lua b/VolumetricConvolution.lua
index 03d893e..7a7c1f0 100644
--- a/VolumetricConvolution.lua
+++ b/VolumetricConvolution.lua
@@ -8,8 +8,8 @@ local Convolution = cudnn.SpatialConvolution
-- if you change the configuration of the module manually, call this
function VolumetricConvolution:resetWeightDescriptors()
- local desc = torch.IntTensor({self.nOutputPlane, self.nInputPlane,
- self.kT, self.kH, self.kW})
+ local desc = {self.nOutputPlane, self.nInputPlane,
+ self.kT, self.kH, self.kW}
return Convolution.resetWeightDescriptors(self,desc)
end
@@ -35,21 +35,18 @@ function VolumetricConvolution:createIODescriptors(input)
-- create input descriptor
self.iDesc = cudnn.toDescriptor(input)
-- create conv descriptor
- self.convDesc = cudnn.createDescriptors(1, 'struct cudnnConvolutionStruct*[?]',
- 'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor')
- self.pad = torch.IntTensor({self.padT, self.padH, self.padW})
- self.stride = torch.IntTensor({self.dT, self.dH, self.dW})
- local upscale = torch.IntTensor({1,1,1})
+ self.pad = {self.padT, self.padH, self.padW}
+ self.stride = {self.dT, self.dH, self.dW}
+
local mathtype=cudnn.configmap(torch.type(self.weight))
-- 3D convolutions do not work in 16 bits
if mathtype == 'CUDNN_DATA_HALF' then
mathtype = 'CUDNN_DATA_FLOAT'
end
- errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0],
- 3, self.pad:data(),
- self.stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
- mathtype);
- -- create output descriptor and resize output
+ self.convDesc = cudnn.setConvolutionDescriptor(
+ { padA = self.pad, filterStrideA = self.stride,
+ dataType = mathtype
+ })
local oSize = torch.IntTensor(5)
errcheck('cudnnGetConvolutionNdForwardOutputDim',
diff --git a/VolumetricFullConvolution.lua b/VolumetricFullConvolution.lua
index ad6110d..8f8bac6 100644
--- a/VolumetricFullConvolution.lua
+++ b/VolumetricFullConvolution.lua
@@ -9,9 +9,10 @@ local Convolution = cudnn.SpatialConvolution
-- if you change the configuration of the module manually, call this
function VolumetricFullConvolution:resetWeightDescriptors()
- local desc = torch.IntTensor({self.nInputPlane, self.nOutputPlane,
- self.kT, self.kH, self.kW})
- return Convolution.resetWeightDescriptors(self,desc)
+ return Convolution.resetWeightDescriptors(
+ self,
+ {self.nInputPlane, self.nOutputPlane, self.kT, self.kH, self.kW}
+ )
end
function VolumetricFullConvolution:fastest(mode)
@@ -38,20 +39,16 @@ function VolumetricFullConvolution:createIODescriptors(input)
assert(input:dim() == 5 and input:isContiguous());
self.iSize = self.iSize or torch.LongStorage(5):fill(0)
if Convolution.checkInputChanged(self, input) then
+ -- create input descriptor
local input_slice = input[{{},{1,self.nInputPlane},{},{}}]
self.iDesc = cudnn.toDescriptor(input_slice)
- -- create input descriptor
--- self.iDesc = cudnn.toDescriptor(input)
-- create conv descriptor
- self.convDesc = cudnn.createDescriptors(1, 'struct cudnnConvolutionStruct*[?]',
- 'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor')
- self.pad = torch.IntTensor({self.padT, self.padH, self.padW})
- self.stride = torch.IntTensor({self.dT, self.dH, self.dW})
- local upscale = torch.IntTensor({1,1,1})
- errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0],
- 3, self.pad:data(),
- self.stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
- cudnn.configmap(torch.type(self.weight)));
+ self.pad = {self.padT, self.padH, self.padW}
+ self.stride = {self.dT, self.dH, self.dW}
+ self.convDesc = cudnn.setConvolutionDescriptor(
+ { padA = self.pad, filterStrideA = self.stride,
+ dataType = cudnn.configmap(torch.type(self.weight))
+ })
-- get output shape, resize output
local iwidth = input:size(5)
diff --git a/find.lua b/find.lua
index 5e1dfd4..3ed3657 100644
--- a/find.lua
+++ b/find.lua
@@ -70,15 +70,9 @@ local function verboseCall(layer, f, ...)
end
local status = cudnn.call(f, ...)
if status ~= ffi.C.CUDNN_STATUS_SUCCESS and (find.verbose or find.verboseError) then
- local stride = ffi.new('int[8]')
- local upscale = ffi.new('int[8]')
- local dim = ffi.new('int[8]')
- local mode = ffi.new('cudnnConvolutionMode_t[8]')
- local datatype = ffi.new('cudnnDataType_t[8]')
- cudnn.call('cudnnGetConvolutionNdDescriptor', layer.convDesc[0],
- 4, dim, pad, stride,
- upscale, mode, datatype)
- print("find:verboseCall:" .. f .. " failed: ", tonumber(status) , ' mode : ', tonumber(mode[0]), ' datatype : ', tonumber(datatype[0]))
+ local desc= cudnn.getConvolutionDescriptor(layer.convDesc)
+ print("find:verboseCall:" .. f .. " failed: ", tonumber(status) , ' mode : ',
+ desc.mode, ' datatype : ', desc.datatype)
end
if find.verbose then
print("find:verboseCall: success, " .. f )
@@ -105,18 +99,8 @@ end
local function defaultFallback(layer, replay)
-- read conv descriptor
- local pad = ffi.new('int[8]')
- local stride = ffi.new('int[8]')
- local upscale = ffi.new('int[8]')
- local dim = ffi.new('int[8]')
- local mode = ffi.new('cudnnConvolutionMode_t[8]')
- local datatype = ffi.new('cudnnDataType_t[8]')
-
- checkedCall(layer,'cudnnGetConvolutionNdDescriptor', layer.convDesc[0],
- 5, dim, pad, stride,
- upscale, mode, datatype)
-
- if datatype[0] == ffi.C.CUDNN_DATA_HALF then
+ local convDescData = cudnn.getConvolutionDescriptor(layer.convDesc)
+ if data.math == ffi.C.CUDNN_DATA_HALF then
if find.verbose then
if replay then
print("find.defaultFallback: replay for ", layer.autotunerHash)
@@ -124,9 +108,8 @@ local function defaultFallback(layer, replay)
print("find.defaultFallback: no 16-bit float algo found, will try 32 bits for ", layer.autotunerHash)
end
end
- checkedCall(layer,'cudnnSetConvolutionNdDescriptor', layer.convDesc[0],
- dim[0], pad, stride,
- upscale, mode[0], ffi.C.CUDNN_DATA_FLOAT)
+ data.math = ffi.C.CUDNN_DATA_FLOAT
+ cudnn.setConvolutionDescriptor(data, layer.convDesc)
return true
else
return false
@@ -461,7 +444,7 @@ function find:prepare(layer, input_slice, output_slice)
return table.concat(x:size():totable(),',')
end
local function vals(x)
- return table.concat(x:totable(),',')
+ return table.concat(x,',')
end
layer.autotunerHash =
'-dimA' .. shape(input_slice)
diff --git a/functional.lua b/functional.lua
index a03d5a3..deaf839 100644
--- a/functional.lua
+++ b/functional.lua
@@ -7,11 +7,15 @@ local ffi = require 'ffi'
local errcheck = cudnn.errcheck
cudnn.functional = {}
-
-
-
-
-
+local function getMathType(weight)
+ local mathType = cudnn.configmap(torch.type(weight))
+ if mathType == 'CUDNN_DATA_HALF' then
+ -- explicitly set math type to fp32 to avoid possible failures with fp16 and exotic sizes
+ -- this can be changed back when ported to find() as it has built-in fallback mechanism
+ mathType = 'CUDNN_DATA_FLOAT'
+ end
+ return mathType
+end
local function Batch2D(t)
return t:view(1, t:size(1), t:size(2), t:size(3))
@@ -68,43 +72,21 @@ cudnn.functional.Convolution2D_updateOutput = function(handle, input, weight, ou
output = output:dim() == 3 and Batch2D(output) or output
-- create a weight descriptor
- local weightDesc = ffi.new('struct cudnnFilterStruct*[1]')
- errcheck('cudnnCreateFilterDescriptor', weightDesc)
local nOutputPlane, nInputPlane, kH, kW
= weight:size(1), weight:size(2), weight:size(3), weight:size(4)
- local desc = torch.IntTensor({nOutputPlane, nInputPlane, kH, kW})
- errcheck('cudnnSetFilterNdDescriptor', weightDesc[0], cudnn.typemap[torch.type(input)], 'CUDNN_TENSOR_NCHW', 4,
- desc:data());
- local function destroyWDesc(d)
- errcheck('cudnnDestroyFilterDescriptor', d[0]);
- end
- ffi.gc(weightDesc, destroyWDesc)
+ local weightDesc = cudnn.setFilterDescriptor(
+ { dataType = cudnn.typemap[torch.type(input)],
+ filterDimA = {nOutputPlane, nInputPlane, kH, kW}})
-- create a convolution descriptor
- local convDesc = ffi.new('struct cudnnConvolutionStruct*[1]')
- errcheck('cudnnCreateConvolutionDescriptor', convDesc)
- local pad = torch.IntTensor({padH, padW})
- local stride = torch.IntTensor({strideH, strideW})
- local upscale = torch.IntTensor({1,1})
- local mathType = cudnn.configmap(torch.type(weight))
- if mathType == 'CUDNN_DATA_HALF' then
- -- explicitly set math type to fp32 to avoid possible failures with fp16 and exotic sizes
- -- this can be changed back when ported to find() as it has built-in fallback mechanism
- mathType = 'CUDNN_DATA_FLOAT'
- end
- errcheck('cudnnSetConvolutionNdDescriptor', convDesc[0],
- 2, pad:data(),
- stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
- mathType
+ local convDesc = cudnn.setConvolutionDescriptor(
+ { padA = {padH, padW},
+ filterStrideA = {strideH, strideW},
+ dataType = getMathType(weight) }
);
- local function destroyConvDesc(d)
- errcheck('cudnnDestroyConvolutionDescriptor', d[0]);
- end
- ffi.gc(convDesc, destroyConvDesc)
-- create input descriptor
local iDesc = cudnn.toDescriptor(input)
-
-- create output descriptor
local oSize = torch.IntTensor(4)
errcheck('cudnnGetConvolutionNdForwardOutputDim',
@@ -169,39 +151,19 @@ cudnn.functional.Convolution2D_updateGradInput = function(handle, input, weight,
gradInput = gradInput:dim() == 3 and Batch2D(gradInput) or gradInput
-- create a weight descriptor
- local weightDesc = ffi.new('struct cudnnFilterStruct*[1]')
- errcheck('cudnnCreateFilterDescriptor', weightDesc)
local nOutputPlane, nInputPlane, kH, kW
= weight:size(1), weight:size(2), weight:size(3), weight:size(4)
- local desc = torch.IntTensor({nOutputPlane, nInputPlane, kH, kW})
- errcheck('cudnnSetFilterNdDescriptor', weightDesc[0], cudnn.typemap[torch.type(input)], 'CUDNN_TENSOR_NCHW', 4,
- desc:data());
- local function destroyWDesc(d)
- errcheck('cudnnDestroyFilterDescriptor', d[0]);
- end
- ffi.gc(weightDesc, destroyWDesc)
+ local weightDesc = cudnn.setFilterDescriptor(
+ { dataType = cudnn.typemap[torch.type(input)],
+ filterDimA = {nOutputPlane, nInputPlane, kH, kW} })
-- create a convolution descriptor
- local convDesc = ffi.new('struct cudnnConvolutionStruct*[1]')
- errcheck('cudnnCreateConvolutionDescriptor', convDesc)
- local pad = torch.IntTensor({padH, padW})
- local stride = torch.IntTensor({strideH, strideW})
- local upscale = torch.IntTensor({1,1})
- local mathType = cudnn.configmap(torch.type(weight))
- if mathType == 'CUDNN_DATA_HALF' then
- -- explicitly set math type to fp32 to avoid possible failures with fp16 and exotic sizes
- -- this can be changed back when ported to find() as it has built-in fallback mechanism
- mathType = 'CUDNN_DATA_FLOAT'
- end
- errcheck('cudnnSetConvolutionNdDescriptor', convDesc[0],
- 2, pad:data(),
- stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
- mathType)
- local function destroyConvDesc(d)
- errcheck('cudnnDestroyConvolutionDescriptor', d[0]);
- end
- ffi.gc(convDesc, destroyConvDesc)
-
+ local convDesc = cudnn.setConvolutionDescriptor(
+ { padA = {padH, padW},
+ filterStrideA = {strideH, strideW},
+ dataType = getMathType(weight)
+ }
+ );
-- create input, output descriptor
local iDesc = cudnn.toDescriptor(input)
local oDesc = cudnn.toDescriptor(output)
@@ -256,38 +218,17 @@ cudnn.functional.Convolution2D_accGradParameters = function(handle, input, gradW
local scaleT = torch.type(gradWeight) == 'torch.CudaDoubleTensor'
and torch.DoubleTensor({scale}) or torch.FloatTensor({scale})
-- create a weight descriptor
- local weightDesc = ffi.new('struct cudnnFilterStruct*[1]')
- errcheck('cudnnCreateFilterDescriptor', weightDesc)
local nOutputPlane, nInputPlane, kH, kW
= gradWeight:size(1), gradWeight:size(2), gradWeight:size(3), gradWeight:size(4)
- local desc = torch.IntTensor({nOutputPlane, nInputPlane, kH, kW})
- errcheck('cudnnSetFilterNdDescriptor', weightDesc[0], cudnn.typemap[torch.type(input)], 'CUDNN_TENSOR_NCHW', 4,
- desc:data());
- local function destroyWDesc(d)
- errcheck('cudnnDestroyFilterDescriptor', d[0]);
- end
- ffi.gc(weightDesc, destroyWDesc)
+ local weightDesc = cudnn.setFilterDescriptor({ dataType = cudnn.typemap[torch.type(input)],
+ filterDimA = {nOutputPlane, nInputPlane, kH, kW}})
-- create a convolution descriptor
- local convDesc = ffi.new('struct cudnnConvolutionStruct*[1]')
- errcheck('cudnnCreateConvolutionDescriptor', convDesc)
- local pad = torch.IntTensor({padH, padW})
- local stride = torch.IntTensor({strideH, strideW})
- local upscale = torch.IntTensor({1,1})
- local mathType = cudnn.configmap(torch.type(gradWeight))
- if mathType == 'CUDNN_DATA_HALF' then
- -- explicitly set math type to fp32 to avoid possible failures with fp16 and exotic sizes
- -- this can be changed back when ported to find() as it has built-in fallback mechanism
- mathType = 'CUDNN_DATA_FLOAT'
- end
- errcheck('cudnnSetConvolutionNdDescriptor', convDesc[0],
- 2, pad:data(),
- stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
- mathType)
- local function destroyConvDesc(d)
- errcheck('cudnnDestroyConvolutionDescriptor', d[0]);
- end
- ffi.gc(convDesc, destroyConvDesc)
+ local convDesc = cudnn.setConvolutionDescriptor(
+ { padA = {padH, padW},
+ filterStrideA = {strideH, strideW},
+ dataType = getMathType(gradWeight) }
+ );
-- create input, output descriptor
local iDesc = cudnn.toDescriptor(input)
diff --git a/init.lua b/init.lua
index 357e367..594b93f 100644
--- a/init.lua
+++ b/init.lua
@@ -196,6 +196,55 @@ function cudnn.createDescriptors(count, descs_type, create_func, destroy_func)
return ds
end
+
+function cudnn.getConvolutionDescriptor(desc)
+ local CUDNN_DIM_MAX=4
+ local data = {
+ dim_p = ffi.new('int[1]'),
+ padA = ffi.new('int[?]', CUDNN_DIM_MAX),
+ filterStrideA = ffi.new('int[?]', CUDNN_DIM_MAX),
+ upscaleA = ffi.new('int[?]', CUDNN_DIM_MAX),
+ mode_p = ffi.new('cudnnConvolutionMode_t[1]'),
+ math_p = ffi.new('cudnnDataType_t[1]')
+ }
+
+ errcheck('cudnnGetConvolutionNdDescriptor', desc[0], CUDNN_DIM_MAX,
+ data.dim_p, data.padA, data.filterStrideA,
+ data.upscaleA, data.mode_p, data.math_p)
+
+ data.arrayLength = data.dim_p[0]
+ data.mode = data.mode_p[0]
+ data.dataType = data.math_p[0]
+ return data
+end
+
+function cudnn.setConvolutionDescriptor(data, desc)
+ local dim = data.arrayLength or #data.padA
+ local upscale = data.upscaleA or torch.IntStorage(dim):fill(1)
+ local myDesc = desc or cudnn.createDescriptors(
+ 1, 'struct cudnnConvolutionStruct*[?]',
+ 'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor')
+ errcheck('cudnnSetConvolutionNdDescriptor', myDesc[0],
+ dim,
+ torch.IntTensor(data.padA):data(),
+ torch.IntTensor(data.filterStrideA):data(),
+ torch.IntTensor(upscale):data(),
+ data.mode or 'CUDNN_CROSS_CORRELATION',
+ data.dataType)
+ return myDesc
+end
+
+function cudnn.setFilterDescriptor(data, filterDesc)
+ local myDesc = filterDesc or cudnn.createDescriptors(
+ 1, 'struct cudnnFilterStruct*[?]',
+ 'cudnnCreateFilterDescriptor', 'cudnnDestroyFilterDescriptor')
+ local dims = data.nbDims or #data.filterDimA
+ errcheck('cudnnSetFilterNdDescriptor', myDesc[0],
+ data.dataType, data.format or 'CUDNN_TENSOR_NCHW',
+ dims, torch.IntTensor(data.filterDimA):data());
+ return myDesc
+end
+
local sharedBuffer = {}
local nextBufferSize = {}
diff --git a/test/test.lua b/test/test.lua
index bd7437a..ac3e721 100644
--- a/test/test.lua
+++ b/test/test.lua
@@ -934,9 +934,10 @@ math.randomseed(os.time())
mytester = torch.Tester()
mytester:add(cudnntest)
--- cudnn.verbose=true
--- cudnn.find.verbose=true
+cudnn.verbose=false
+cudnn.find.verbose=false
cudnn.useFindEx=false
+cudnn.useFloatMathForHalf = true
for i = 1, cutorch.getDeviceCount() do