Added new refactoring for convolution and filter descriptors

author: Boris Fomitchev <bfomitchev@nvidia.com> 2016-10-19 09:31:04 +0300
committer: Boris Fomitchev <bfomitchev@nvidia.com> 2016-10-19 09:31:04 +0300
commit: 127fabc32a2c28262d786d83aaeaaf94c3059dd4 (patch)
tree: 67cbb8cd3f19794d0469ea816d3a361924b35240
parent: e08c0e4444a2a3f2177565f765414ec2389e9870 (diff)
9 files changed, 153 insertions, 200 deletions
diff --git a/RNN.lua b/RNN.lua
index e3e278f..5a5be4a 100644
--- a/RNN.lua
+++ b/RNN.lua
@@ -75,12 +75,6 @@ function RNN:createDropoutDescriptors(count)
                             'cudnnDestroyDropoutDescriptor')
 end
 
-function RNN:createFilterDescriptors(count)
-   return cudnn.createDescriptors(count,
-                            'cudnnFilterDescriptor_t[?]',
-                            'cudnnCreateFilterDescriptor',
-                            'cudnnDestroyFilterDescriptor')
-end
 
 function RNN:createRNNDescriptors(count)
    return cudnn.createDescriptors(count,
@@ -131,18 +125,12 @@ function RNN:resetRNNDescriptor()
 end
 
 function RNN:resetWeightDescriptor()
-   if not self.wDesc then
-      self.wDesc = self:createFilterDescriptors(1)
-   end
-
-   local dim = torch.IntTensor({self.weight:size(1), 1, 1})
-
-   errcheck('cudnnSetFilterNdDescriptor',
-            self.wDesc[0],
-            self.datatype,
-            'CUDNN_TENSOR_NCHW',
-            3,
-            dim:data())
+   cudnn.setFilterDescriptor(
+      { dataType = self.datatype,
+        filterDimA = {self.weight:size(1), 1, 1}
+      },
+      self.wDesc
+   )
 end
 
 function RNN:resetIODescriptors()
diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua
index 4a56980..3a09370 100644
--- a/SpatialConvolution.lua
+++ b/SpatialConvolution.lua
@@ -25,30 +25,27 @@ function SpatialConvolution:__init(nInputPlane, nOutputPlane,
     self.reset = nil
 end
 
-function SpatialConvolution:createWeightDescriptors()
+-- if you change the configuration of the module manually, call this
+function SpatialConvolution:resetWeightDescriptors(desc)
+    -- for compatibility
+    self.groups = self.groups or 1
     assert(cudnn.typemap[torch.typename(self.weight)], 'Only Cuda supported duh!')
     assert(cudnn.typemap[torch.typename(self.bias)] or not self.bias, 'Only Cuda supported duh!')
+
     -- create descriptor for bias
     if self.bias then
         self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane,1,1))
     end
-    -- create filterDescriptor for weight
-    return cudnn.createDescriptors(1, 'struct cudnnFilterStruct*[?]',
-                                   'cudnnCreateFilterDescriptor', 'cudnnDestroyFilterDescriptor')
-end
 
--- if you change the configuration of the module manually, call this
-function SpatialConvolution:resetWeightDescriptors(desc)
-    -- for compatibility
-    self.groups = self.groups or 1
-    self.weightDesc = SpatialConvolution.createWeightDescriptors(self)
-    desc = desc or torch.IntTensor({self.nOutputPlane/self.groups,
-                                    self.nInputPlane/self.groups,
-                                    self.kH, self.kW})
-
-    errcheck('cudnnSetFilterNdDescriptor', self.weightDesc[0],
-             cudnn.typemap[torch.typename(self.weight)], 'CUDNN_TENSOR_NCHW', desc:nElement(),
-             desc:data());
+    self.weightDesc = cudnn.setFilterDescriptor(
+       { dataType = cudnn.typemap[torch.typename(self.weight)],
+         filterDimA = desc or
+            {self.nOutputPlane/self.groups,
+             self.nInputPlane/self.groups,
+             self.kH, self.kW}
+       }
+    )
+
     return self
 end
 
@@ -97,6 +94,7 @@ function SpatialConvolution:checkInputChanged(input)
     end
     self.groups = self.groups or 1
     if not self.weightDesc then self:resetWeightDescriptors() end
+    if not self.weightDesc then error "Weights not assigned!" end
 
     if not self.iDesc or not self.oDesc or input:size(1) ~= self.iSize[1] or input:size(2) ~= self.iSize[2]
     or input:size(3) ~= self.iSize[3] or input:size(4) ~= self.iSize[4] or (input:dim()==5 and input:size(5) ~= self.iSize[5]) then
@@ -124,17 +122,17 @@ function SpatialConvolution:createIODescriptors(input)
         local input_slice = input:narrow(2,1,self.nInputPlane/self.groups)
         self.iDesc = cudnn.toDescriptor(input_slice)
         -- create conv descriptor
-        self.convDesc = cudnn.createDescriptors(1, 'struct cudnnConvolutionStruct*[?]',
-                                                'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor')
         self.padH, self.padW = self.padH or 0, self.padW or 0
-        self.pad = torch.IntTensor({self.padH, self.padW})
-        self.stride = torch.IntTensor({self.dH, self.dW})
-        local upscale = torch.IntTensor({1,1})
-        errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0],
-                 2, self.pad:data(),
-                 self.stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
-                 cudnn.configmap(torch.type(self.weight)));
+        -- those needed to calculate hash
+        self.pad = {self.padH, self.padW}
+        self.stride = {self.dH, self.dW}
 
+        self.convDesc = cudnn.setConvolutionDescriptor(
+           { padA = self.pad,
+             filterStrideA = self.stride,
+             upscaleA = {1,1},
+             dataType = cudnn.configmap(torch.type(self.weight))
+           })
 
         -- get output shape, resize output
         local oSize = torch.IntTensor(4)
diff --git a/SpatialFullConvolution.lua b/SpatialFullConvolution.lua
index 528dae9..f598435 100644
--- a/SpatialFullConvolution.lua
+++ b/SpatialFullConvolution.lua
@@ -8,9 +8,9 @@ local checkedCall = find.checkedCall
 local Convolution = cudnn.SpatialConvolution
 
 function SpatialFullConvolution:resetWeightDescriptors()
-   return Convolution.resetWeightDescriptors(self, torch.IntTensor({self.nInputPlane,
-                                                                    self.nOutputPlane,
-                                                                    self.kH, self.kW}))
+   return Convolution.resetWeightDescriptors(self, {self.nInputPlane,
+                                                    self.nOutputPlane,
+                                                    self.kH, self.kW})
 end
 
 function SpatialFullConvolution:fastest(mode)
@@ -44,15 +44,14 @@ function SpatialFullConvolution:createIODescriptors(input)
         self.iDesc = cudnn.toDescriptor(input_slice)
 
         -- create conv descriptor
-        self.convDesc = cudnn.createDescriptors(1, 'struct cudnnConvolutionStruct*[?]',
-                                                'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor')
-        self.pad = torch.IntTensor({self.padH, self.padW})
-        self.stride = torch.IntTensor({self.dH, self.dW})
-        local upscale = torch.IntTensor({1,1})
-        errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0],
-                 2, self.pad:data(),
-                 self.stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
-                 cudnn.configmap(torch.type(self.weight)));
+        self.pad = {self.padH, self.padW}
+        self.stride = {self.dH, self.dW}
+
+        self.convDesc = cudnn.setConvolutionDescriptor(
+           { padA = self.pad,
+             filterStrideA = self.stride,
+             dataType = cudnn.configmap(torch.type(self.weight))
+           })
 
         -- get output shape, resize output
         local iwidth = input:size(4)
diff --git a/VolumetricConvolution.lua b/VolumetricConvolution.lua
index 03d893e..7a7c1f0 100644
--- a/VolumetricConvolution.lua
+++ b/VolumetricConvolution.lua
@@ -8,8 +8,8 @@ local Convolution = cudnn.SpatialConvolution
 
 -- if you change the configuration of the module manually, call this
 function VolumetricConvolution:resetWeightDescriptors()
-   local desc = torch.IntTensor({self.nOutputPlane, self.nInputPlane,
-                             self.kT, self.kH, self.kW})
+   local desc = {self.nOutputPlane, self.nInputPlane,
+                 self.kT, self.kH, self.kW}
    return Convolution.resetWeightDescriptors(self,desc)
 end
 
@@ -35,21 +35,18 @@ function VolumetricConvolution:createIODescriptors(input)
          -- create input descriptor
          self.iDesc = cudnn.toDescriptor(input)
          -- create conv descriptor
-         self.convDesc = cudnn.createDescriptors(1, 'struct cudnnConvolutionStruct*[?]',
-                                                 'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor')
-         self.pad = torch.IntTensor({self.padT, self.padH, self.padW})
-         self.stride = torch.IntTensor({self.dT, self.dH, self.dW})
-         local upscale = torch.IntTensor({1,1,1})
+         self.pad = {self.padT, self.padH, self.padW}
+         self.stride = {self.dT, self.dH, self.dW}
+
          local mathtype=cudnn.configmap(torch.type(self.weight))
          -- 3D convolutions do not work in 16 bits
          if mathtype == 'CUDNN_DATA_HALF' then
             mathtype = 'CUDNN_DATA_FLOAT'
          end
-         errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0],
-                  3, self.pad:data(),
-                  self.stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
-                  mathtype);
-         -- create output descriptor and resize output
+         self.convDesc = cudnn.setConvolutionDescriptor(
+            { padA = self.pad, filterStrideA = self.stride,
+              dataType = mathtype
+            })
 
          local oSize = torch.IntTensor(5)
          errcheck('cudnnGetConvolutionNdForwardOutputDim',
diff --git a/VolumetricFullConvolution.lua b/VolumetricFullConvolution.lua
index ad6110d..8f8bac6 100644
--- a/VolumetricFullConvolution.lua
+++ b/VolumetricFullConvolution.lua
@@ -9,9 +9,10 @@ local Convolution = cudnn.SpatialConvolution
 
 -- if you change the configuration of the module manually, call this
 function VolumetricFullConvolution:resetWeightDescriptors()
-   local desc = torch.IntTensor({self.nInputPlane, self.nOutputPlane,
-                             self.kT, self.kH, self.kW})
-   return Convolution.resetWeightDescriptors(self,desc)
+   return Convolution.resetWeightDescriptors(
+      self,
+      {self.nInputPlane, self.nOutputPlane, self.kT, self.kH, self.kW}
+   )
 end
 
 function VolumetricFullConvolution:fastest(mode)
@@ -38,20 +39,16 @@ function VolumetricFullConvolution:createIODescriptors(input)
    assert(input:dim() == 5 and input:isContiguous());
    self.iSize = self.iSize or torch.LongStorage(5):fill(0)
    if Convolution.checkInputChanged(self, input) then
+         -- create input descriptor
          local input_slice = input[{{},{1,self.nInputPlane},{},{}}]
          self.iDesc = cudnn.toDescriptor(input_slice)
-         -- create input descriptor
---         self.iDesc = cudnn.toDescriptor(input)
          -- create conv descriptor
-         self.convDesc = cudnn.createDescriptors(1, 'struct cudnnConvolutionStruct*[?]',
-         'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor')
-         self.pad = torch.IntTensor({self.padT, self.padH, self.padW})
-         self.stride = torch.IntTensor({self.dT, self.dH, self.dW})
-         local upscale = torch.IntTensor({1,1,1})
-         errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0],
-                  3, self.pad:data(),
-                  self.stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
-                  cudnn.configmap(torch.type(self.weight)));
+         self.pad = {self.padT, self.padH, self.padW}
+         self.stride = {self.dT, self.dH, self.dW}
+         self.convDesc = cudnn.setConvolutionDescriptor(
+            { padA = self.pad, filterStrideA = self.stride,
+              dataType = cudnn.configmap(torch.type(self.weight))
+            })
 
         -- get output shape, resize output
         local iwidth = input:size(5)
diff --git a/find.lua b/find.lua
index 5e1dfd4..3ed3657 100644
--- a/find.lua
+++ b/find.lua
@@ -70,15 +70,9 @@ local function verboseCall(layer, f, ...)
    end
    local status = cudnn.call(f, ...)
    if status ~= ffi.C.CUDNN_STATUS_SUCCESS and (find.verbose or find.verboseError) then
-      local stride = ffi.new('int[8]')
-      local upscale = ffi.new('int[8]')
-      local dim = ffi.new('int[8]')
-      local mode = ffi.new('cudnnConvolutionMode_t[8]')
-      local datatype = ffi.new('cudnnDataType_t[8]')
-      cudnn.call('cudnnGetConvolutionNdDescriptor', layer.convDesc[0],
-                 4, dim, pad, stride,
-                 upscale, mode, datatype)
-      print("find:verboseCall:" .. f .. " failed: ", tonumber(status) , ' mode : ', tonumber(mode[0]), ' datatype : ', tonumber(datatype[0]))
+      local desc= cudnn.getConvolutionDescriptor(layer.convDesc)
+      print("find:verboseCall:" .. f .. " failed: ", tonumber(status) , ' mode : ',
+            desc.mode, ' datatype : ', desc.datatype)
    end
    if find.verbose then
       print("find:verboseCall: success, " .. f )
@@ -105,18 +99,8 @@ end
 
 local function defaultFallback(layer, replay)
    -- read conv descriptor
-   local pad = ffi.new('int[8]')
-   local stride = ffi.new('int[8]')
-   local upscale = ffi.new('int[8]')
-   local dim = ffi.new('int[8]')
-   local mode = ffi.new('cudnnConvolutionMode_t[8]')
-   local datatype = ffi.new('cudnnDataType_t[8]')
-
-   checkedCall(layer,'cudnnGetConvolutionNdDescriptor', layer.convDesc[0],
-            5, dim, pad, stride,
-            upscale, mode, datatype)
-
-   if datatype[0] == ffi.C.CUDNN_DATA_HALF then
+   local convDescData = cudnn.getConvolutionDescriptor(layer.convDesc)
+   if data.math == ffi.C.CUDNN_DATA_HALF then
       if find.verbose then
          if replay then
             print("find.defaultFallback: replay for ", layer.autotunerHash)
@@ -124,9 +108,8 @@ local function defaultFallback(layer, replay)
             print("find.defaultFallback: no 16-bit float algo found, will try 32 bits for ", layer.autotunerHash)
          end
       end
-      checkedCall(layer,'cudnnSetConvolutionNdDescriptor', layer.convDesc[0],
-                  dim[0], pad, stride,
-                  upscale, mode[0], ffi.C.CUDNN_DATA_FLOAT)
+      data.math = ffi.C.CUDNN_DATA_FLOAT
+      cudnn.setConvolutionDescriptor(data, layer.convDesc)
       return true
    else
       return false
@@ -461,7 +444,7 @@ function find:prepare(layer, input_slice, output_slice)
       return table.concat(x:size():totable(),',')
    end
    local function vals(x)
-      return table.concat(x:totable(),',')
+      return table.concat(x,',')
    end
    layer.autotunerHash =
       '-dimA' .. shape(input_slice)
diff --git a/functional.lua b/functional.lua
index a03d5a3..deaf839 100644
--- a/functional.lua
+++ b/functional.lua
@@ -7,11 +7,15 @@ local ffi = require 'ffi'
 local errcheck = cudnn.errcheck
 cudnn.functional = {}
 
-
-
-
-
-
+local function getMathType(weight)
+   local mathType = cudnn.configmap(torch.type(weight))
+   if mathType == 'CUDNN_DATA_HALF' then
+      -- explicitly set math type to fp32 to avoid possible failures with fp16 and exotic sizes
+      -- this can be changed back when ported to find() as it has built-in fallback mechanism
+      mathType = 'CUDNN_DATA_FLOAT'
+   end
+   return mathType
+end
 
 local function Batch2D(t)
     return t:view(1, t:size(1), t:size(2), t:size(3))
@@ -68,43 +72,21 @@ cudnn.functional.Convolution2D_updateOutput = function(handle, input, weight, ou
     output = output:dim() == 3 and Batch2D(output) or output
 
     -- create a weight descriptor
-    local weightDesc = ffi.new('struct cudnnFilterStruct*[1]')
-   errcheck('cudnnCreateFilterDescriptor', weightDesc)
    local nOutputPlane, nInputPlane, kH, kW
        = weight:size(1), weight:size(2), weight:size(3), weight:size(4)
-   local desc = torch.IntTensor({nOutputPlane, nInputPlane, kH, kW})
-   errcheck('cudnnSetFilterNdDescriptor', weightDesc[0], cudnn.typemap[torch.type(input)], 'CUDNN_TENSOR_NCHW', 4,
-            desc:data());
-   local function destroyWDesc(d)
-      errcheck('cudnnDestroyFilterDescriptor', d[0]);
-   end
-   ffi.gc(weightDesc, destroyWDesc)
+   local weightDesc = cudnn.setFilterDescriptor(
+      { dataType = cudnn.typemap[torch.type(input)],
+        filterDimA = {nOutputPlane, nInputPlane, kH, kW}})
 
    -- create a convolution descriptor
-   local convDesc = ffi.new('struct cudnnConvolutionStruct*[1]')
-   errcheck('cudnnCreateConvolutionDescriptor', convDesc)
-   local pad = torch.IntTensor({padH, padW})
-   local stride = torch.IntTensor({strideH, strideW})
-   local upscale = torch.IntTensor({1,1})
-   local mathType = cudnn.configmap(torch.type(weight))
-   if mathType == 'CUDNN_DATA_HALF' then
-      -- explicitly set math type to fp32 to avoid possible failures with fp16 and exotic sizes
-      -- this can be changed back when ported to find() as it has built-in fallback mechanism
-      mathType = 'CUDNN_DATA_FLOAT'
-   end
-   errcheck('cudnnSetConvolutionNdDescriptor', convDesc[0],
-            2, pad:data(),
-            stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
-            mathType
+   local convDesc = cudnn.setConvolutionDescriptor(
+      { padA = {padH, padW},
+        filterStrideA = {strideH, strideW},
+        dataType = getMathType(weight) }
    );
-   local function destroyConvDesc(d)
-       errcheck('cudnnDestroyConvolutionDescriptor', d[0]);
-   end
-   ffi.gc(convDesc, destroyConvDesc)
 
     -- create input descriptor
    local iDesc = cudnn.toDescriptor(input)
-
    -- create output descriptor
    local oSize = torch.IntTensor(4)
    errcheck('cudnnGetConvolutionNdForwardOutputDim',
@@ -169,39 +151,19 @@ cudnn.functional.Convolution2D_updateGradInput = function(handle, input, weight,
     gradInput = gradInput:dim() == 3 and Batch2D(gradInput) or gradInput
 
     -- create a weight descriptor
-    local weightDesc = ffi.new('struct cudnnFilterStruct*[1]')
-   errcheck('cudnnCreateFilterDescriptor', weightDesc)
    local nOutputPlane, nInputPlane, kH, kW
        = weight:size(1), weight:size(2), weight:size(3), weight:size(4)
-   local desc = torch.IntTensor({nOutputPlane, nInputPlane, kH, kW})
-   errcheck('cudnnSetFilterNdDescriptor', weightDesc[0], cudnn.typemap[torch.type(input)], 'CUDNN_TENSOR_NCHW', 4,
-            desc:data());
-   local function destroyWDesc(d)
-      errcheck('cudnnDestroyFilterDescriptor', d[0]);
-   end
-   ffi.gc(weightDesc, destroyWDesc)
+   local weightDesc = cudnn.setFilterDescriptor(
+      { dataType = cudnn.typemap[torch.type(input)],
+        filterDimA = {nOutputPlane, nInputPlane, kH, kW} })
 
    -- create a convolution descriptor
-   local convDesc = ffi.new('struct cudnnConvolutionStruct*[1]')
-   errcheck('cudnnCreateConvolutionDescriptor', convDesc)
-   local pad = torch.IntTensor({padH, padW})
-   local stride = torch.IntTensor({strideH, strideW})
-   local upscale = torch.IntTensor({1,1})
-   local mathType = cudnn.configmap(torch.type(weight))
-   if mathType == 'CUDNN_DATA_HALF' then
-      -- explicitly set math type to fp32 to avoid possible failures with fp16 and exotic sizes
-      -- this can be changed back when ported to find() as it has built-in fallback mechanism
-      mathType = 'CUDNN_DATA_FLOAT'
-   end
-   errcheck('cudnnSetConvolutionNdDescriptor', convDesc[0],
-            2, pad:data(),
-            stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
-            mathType)
-   local function destroyConvDesc(d)
-       errcheck('cudnnDestroyConvolutionDescriptor', d[0]);
-   end
-   ffi.gc(convDesc, destroyConvDesc)
-
+   local convDesc = cudnn.setConvolutionDescriptor(
+      { padA = {padH, padW},
+        filterStrideA = {strideH, strideW},
+        dataType = getMathType(weight)
+      }
+   );
     -- create input, output descriptor
    local iDesc = cudnn.toDescriptor(input)
    local oDesc = cudnn.toDescriptor(output)
@@ -256,38 +218,17 @@ cudnn.functional.Convolution2D_accGradParameters = function(handle, input, gradW
     local scaleT = torch.type(gradWeight) == 'torch.CudaDoubleTensor'
        and torch.DoubleTensor({scale}) or torch.FloatTensor({scale})
     -- create a weight descriptor
-    local weightDesc = ffi.new('struct cudnnFilterStruct*[1]')
-    errcheck('cudnnCreateFilterDescriptor', weightDesc)
     local nOutputPlane, nInputPlane, kH, kW
         = gradWeight:size(1), gradWeight:size(2), gradWeight:size(3), gradWeight:size(4)
-    local desc = torch.IntTensor({nOutputPlane, nInputPlane, kH, kW})
-    errcheck('cudnnSetFilterNdDescriptor', weightDesc[0], cudnn.typemap[torch.type(input)], 'CUDNN_TENSOR_NCHW', 4,
-             desc:data());
-    local function destroyWDesc(d)
-        errcheck('cudnnDestroyFilterDescriptor', d[0]);
-    end
-    ffi.gc(weightDesc, destroyWDesc)
 
+    local weightDesc =  cudnn.setFilterDescriptor({ dataType = cudnn.typemap[torch.type(input)],
+                                                    filterDimA = {nOutputPlane, nInputPlane, kH, kW}})
     -- create a convolution descriptor
-    local convDesc = ffi.new('struct cudnnConvolutionStruct*[1]')
-    errcheck('cudnnCreateConvolutionDescriptor', convDesc)
-    local pad = torch.IntTensor({padH, padW})
-    local stride = torch.IntTensor({strideH, strideW})
-    local upscale = torch.IntTensor({1,1})
-    local mathType = cudnn.configmap(torch.type(gradWeight))
-    if mathType == 'CUDNN_DATA_HALF' then
-       -- explicitly set math type to fp32 to avoid possible failures with fp16 and exotic sizes
-       -- this can be changed back when ported to find() as it has built-in fallback mechanism
-       mathType = 'CUDNN_DATA_FLOAT'
-    end
-    errcheck('cudnnSetConvolutionNdDescriptor', convDesc[0],
-             2, pad:data(),
-             stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
-             mathType)
-    local function destroyConvDesc(d)
-        errcheck('cudnnDestroyConvolutionDescriptor', d[0]);
-    end
-    ffi.gc(convDesc, destroyConvDesc)
+    local convDesc = cudnn.setConvolutionDescriptor(
+       { padA = {padH, padW},
+         filterStrideA = {strideH, strideW},
+         dataType = getMathType(gradWeight) }
+    );
 
     -- create input, output descriptor
     local iDesc = cudnn.toDescriptor(input)
diff --git a/init.lua b/init.lua
index 357e367..594b93f 100644
--- a/init.lua
+++ b/init.lua
@@ -196,6 +196,55 @@ function cudnn.createDescriptors(count, descs_type, create_func, destroy_func)
    return ds
 end
 
+
+function cudnn.getConvolutionDescriptor(desc)
+   local CUDNN_DIM_MAX=4
+   local data = {
+      dim_p = ffi.new('int[1]'),
+      padA = ffi.new('int[?]', CUDNN_DIM_MAX),
+      filterStrideA = ffi.new('int[?]', CUDNN_DIM_MAX),
+      upscaleA = ffi.new('int[?]', CUDNN_DIM_MAX),
+      mode_p = ffi.new('cudnnConvolutionMode_t[1]'),
+      math_p = ffi.new('cudnnDataType_t[1]')
+   }
+
+   errcheck('cudnnGetConvolutionNdDescriptor', desc[0], CUDNN_DIM_MAX,
+            data.dim_p, data.padA, data.filterStrideA,
+            data.upscaleA, data.mode_p, data.math_p)
+
+   data.arrayLength = data.dim_p[0]
+   data.mode =     data.mode_p[0]
+   data.dataType = data.math_p[0]
+   return data
+end
+
+function cudnn.setConvolutionDescriptor(data, desc)
+   local dim  = data.arrayLength or #data.padA
+   local upscale = data.upscaleA or torch.IntStorage(dim):fill(1)
+   local myDesc = desc or cudnn.createDescriptors(
+      1, 'struct cudnnConvolutionStruct*[?]',
+      'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor')
+   errcheck('cudnnSetConvolutionNdDescriptor', myDesc[0],
+            dim,
+            torch.IntTensor(data.padA):data(),
+            torch.IntTensor(data.filterStrideA):data(),
+            torch.IntTensor(upscale):data(),
+            data.mode or 'CUDNN_CROSS_CORRELATION',
+            data.dataType)
+   return myDesc
+end
+
+function cudnn.setFilterDescriptor(data, filterDesc)
+   local myDesc = filterDesc or cudnn.createDescriptors(
+      1, 'struct cudnnFilterStruct*[?]',
+      'cudnnCreateFilterDescriptor', 'cudnnDestroyFilterDescriptor')
+   local dims = data.nbDims or #data.filterDimA
+   errcheck('cudnnSetFilterNdDescriptor', myDesc[0],
+            data.dataType, data.format or 'CUDNN_TENSOR_NCHW',
+            dims, torch.IntTensor(data.filterDimA):data());
+   return myDesc
+end
+
 local sharedBuffer = {}
 local nextBufferSize = {}
 
diff --git a/test/test.lua b/test/test.lua
index bd7437a..ac3e721 100644
--- a/test/test.lua
+++ b/test/test.lua
@@ -934,9 +934,10 @@ math.randomseed(os.time())
 mytester = torch.Tester()
 mytester:add(cudnntest)
 
--- cudnn.verbose=true
--- cudnn.find.verbose=true
+cudnn.verbose=false
+cudnn.find.verbose=false
 cudnn.useFindEx=false
+cudnn.useFloatMathForHalf = true
 
 for i = 1, cutorch.getDeviceCount() do
author	Boris Fomitchev <bfomitchev@nvidia.com>	2016-10-19 09:31:04 +0300
committer	Boris Fomitchev <bfomitchev@nvidia.com>	2016-10-19 09:31:04 +0300
commit	127fabc32a2c28262d786d83aaeaaf94c3059dd4 (patch)
tree	67cbb8cd3f19794d0469ea816d3a361924b35240
parent	e08c0e4444a2a3f2177565f765414ec2389e9870 (diff)