R5 rebase

author: Sergey Zagoruyko <zagoruyko2@gmail.com> 2016-04-13 16:01:55 +0300
committer: Sergey Zagoruyko <zagoruyko2@gmail.com> 2016-04-13 19:01:48 +0300
commit: 70adafcae259f67129c2de6e1048594aa0283e59 (patch)
tree: 432358dc7274ec86132e2c3bb40b655b0f4a4f61 /SpatialConvolution.lua
parent: 0a040cf2bb3d0f0cdf217c8e92efaddf29ed2efc (diff)
1 files changed, 12 insertions, 6 deletions
diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua
index 20f31ef..b92dd57 100644
--- a/SpatialConvolution.lua
+++ b/SpatialConvolution.lua
@@ -43,7 +43,7 @@ function SpatialConvolution:resetWeightDescriptors()
                               self.nInputPlane/self.groups,
                               self.kH, self.kW})
     errcheck('cudnnSetFilterNdDescriptor', self.weightDesc[0],
-             'CUDNN_DATA_FLOAT', 4,
+             'CUDNN_DATA_FLOAT', 'CUDNN_TENSOR_NCHW', 4,
              desc:data());
     local function destroyWDesc(d)
         errcheck('cudnnDestroyFilterDescriptor', d[0]);
@@ -122,7 +122,7 @@ function SpatialConvolution:createIODescriptors(input)
         local pad = torch.IntTensor({self.padH, self.padW})
         local stride = torch.IntTensor({self.dH, self.dW})
         local upscale = torch.IntTensor({1,1})
-        errcheck('cudnnSetConvolutionNdDescriptor_v3', self.convDesc[0],
+        errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0],
                  2, pad:data(),
                  stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
                  'CUDNN_DATA_FLOAT');
@@ -177,7 +177,7 @@ function SpatialConvolution:createIODescriptors(input)
             if autotunerCache[1][autotunerHash] then
                 algType[0] = autotunerCache[1][autotunerHash]
                 if cudnn.verbose then
-                    print('Using cached benchmark for: ', autotunerHash)
+                   print('Autotuning SC FW: using cached algo = ', algType[0], ' for: ', autotunerHash)
                 end
             else
                 local perfResults = ffi.new("cudnnConvolutionFwdAlgoPerf_t[?]", 1)
@@ -191,7 +191,7 @@ function SpatialConvolution:createIODescriptors(input)
                 autotunerCache[1][autotunerHash] = perfResults[0].algo
                 if cudnn.verbose then
                     print(string.format(
-                              "Autotuning        Forward: Time: %3.5f Memory: %8d Algorithm: %d"
+                              "\nAutotuning SC     Forward: Time: %3.5f Memory: %8d Algorithm: %d"
                                   .. " Weight: %15s Input: %15s Output: %15s",
                               perfResults[0].time, tonumber(perfResults[0].memory),
                               tonumber(perfResults[0].algo),
@@ -228,6 +228,9 @@ function SpatialConvolution:createIODescriptors(input)
         if cudnn.benchmark then -- the manual auto-tuner is run
             if autotunerCache[2][autotunerHash] then
                 algType[0] = autotunerCache[2][autotunerHash]
+                if cudnn.verbose then
+                   print('Autotuning SC BW: using cached algo = ', algType[0], ' for: ', autotunerHash)
+                end
             else
                 local perfResults = ffi.new("cudnnConvolutionBwdFilterAlgoPerf_t[?]", 1)
                 local intt = torch.IntTensor(1);
@@ -276,6 +279,9 @@ function SpatialConvolution:createIODescriptors(input)
         if cudnn.benchmark then -- the manual auto-tuner is run
             if autotunerCache[3][autotunerHash] then
                 algType[0] = autotunerCache[3][autotunerHash]
+                if cudnn.verbose then
+                   print('Autotuning SC BWD: using cached algo = ', algType[0], ' for: ', autotunerHash)
+                end
             else
                 local perfResults = ffi.new("cudnnConvolutionBwdDataAlgoPerf_t[?]", 1)
                 local intt = torch.IntTensor(1);
@@ -390,7 +396,7 @@ function SpatialConvolution:updateGradInput(input, gradOutput)
     self:createIODescriptors(input)
 
     for g = 0,self.groups - 1 do
-        errcheck('cudnnConvolutionBackwardData_v3', cudnn.getHandle(),
+        errcheck('cudnnConvolutionBackwardData', cudnn.getHandle(),
                  one:data(),
                  self.weightDesc[0], self.weight:data() + g*self.weight_offset,
                  self.oDesc[0], gradOutput:data() + g*self.output_offset,
@@ -427,7 +433,7 @@ function SpatialConvolution:accGradParameters(input, gradOutput, scale)
 
     for g = 0, self.groups - 1 do
         -- gradWeight
-        errcheck('cudnnConvolutionBackwardFilter_v3', cudnn.getHandle(),
+        errcheck('cudnnConvolutionBackwardFilter', cudnn.getHandle(),
                  self.scaleT:data(),
                  self.iDesc[0], input:data() + g*self.input_offset,
                  self.oDesc[0], gradOutput:data() + g*self.output_offset,
author	Sergey Zagoruyko <zagoruyko2@gmail.com>	2016-04-13 16:01:55 +0300
committer	Sergey Zagoruyko <zagoruyko2@gmail.com>	2016-04-13 19:01:48 +0300
commit	70adafcae259f67129c2de6e1048594aa0283e59 (patch)
tree	432358dc7274ec86132e2c3bb40b655b0f4a4f61 /SpatialConvolution.lua
parent	0a040cf2bb3d0f0cdf217c8e92efaddf29ed2efc (diff)