Merge pull request #59 from soumith/autotunercaching

adding caching for autotuner results
author: Soumith Chintala <soumith@gmail.com> 2015-10-01 23:31:49 +0300
committer: Soumith Chintala <soumith@gmail.com> 2015-10-01 23:31:49 +0300
commit: c34a4a5b3f6430da45708598bc72e81e2f75b6dc (patch)
tree: b9bcbe643553a867a0ac3449a227376a16d38770
parent: fb31eba8a4252f07a6891e423d883b79371257a3 (diff)
parent: 880b94991c1726447da0940c4deb582d133d969b (diff)
1 files changed, 87 insertions, 33 deletions
diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua
index ea8c2e7..2566ccf 100644
--- a/SpatialConvolution.lua
+++ b/SpatialConvolution.lua
@@ -3,6 +3,11 @@ local SpatialConvolution, parent =
 local ffi = require 'ffi'
 local errcheck = cudnn.errcheck
 
+local autotunerCache = {}
+autotunerCache[1] = {} -- forward
+autotunerCache[2] = {} -- backwardFilter
+autotunerCache[3] = {} -- backwardData
+
 function SpatialConvolution:__init(nInputPlane, nOutputPlane,
                             kW, kH, dW, dH, padW, padH, groups)
     local delayedReset = self.reset
@@ -130,6 +135,21 @@ function SpatialConvolution:createIODescriptors(input)
         self.oDescForBias = cudnn.toDescriptor(self.output)
 
         -----------------------------------------------------------------------
+        local function shape(x)
+            local sz = x:size()
+            local str = ''
+            for i=1,sz:size() do
+                str = str .. sz[i] .. 'x'
+            end
+            if #str > 0 then
+                str = str:sub(1, #str-1)
+            end
+            return str
+        end
+        local autotunerHash = shape(self.weight) .. ';'
+            .. shape(input[input_slice]) .. ';'
+            .. shape(self.output[output_slice])
+
         local maxBufSize = 0
 
         -- create forwardAlgorithm descriptors
@@ -141,18 +161,32 @@ function SpatialConvolution:createIODescriptors(input)
         if self.fastest_mode or cudnn.fastest == true then
             algSearchMode = 'CUDNN_CONVOLUTION_FWD_PREFER_FASTEST'
         end
+
         if cudnn.benchmark then -- the manual auto-tuner is run
-            local perfResults = ffi.new("cudnnConvolutionFwdAlgoPerf_t[?]", 1)
-            local intt = torch.IntTensor(1);
-            errcheck('cudnnFindConvolutionForwardAlgorithm',
-                     cudnn.getHandle(),
-                     self.iDesc[0], self.weightDesc[0],
-                     self.convDesc[0], self.oDesc[0],
-                     1, intt:data(), perfResults)
-            algType[0] = perfResults[0].algo
-            if cudnn.verbose then
-                print('AutoTuning:', perfResults[0].time,
-                      tonumber(perfResults[0].memory), tonumber(perfResults[0].algo))
+            if autotunerCache[1][autotunerHash] then
+                algType[0] = autotunerCache[1][autotunerHash]
+                if cudnn.verbose then
+                    print('Using cached benchmark for: ', autotunerHash)
+                end
+            else
+                local perfResults = ffi.new("cudnnConvolutionFwdAlgoPerf_t[?]", 1)
+                local intt = torch.IntTensor(1);
+                errcheck('cudnnFindConvolutionForwardAlgorithm',
+                         cudnn.getHandle(),
+                         self.iDesc[0], self.weightDesc[0],
+                         self.convDesc[0], self.oDesc[0],
+                         1, intt:data(), perfResults)
+                algType[0] = perfResults[0].algo
+                autotunerCache[1][autotunerHash] = perfResults[0].algo
+                if cudnn.verbose then
+                    print(string.format(
+                              "Autotuning        Forward: Time: %3.5f Memory: %8d Algorithm: %d"
+                                  .. " Weight: %15s Input: %15s Output: %15s",
+                              perfResults[0].time, tonumber(perfResults[0].memory),
+                              tonumber(perfResults[0].algo),
+                              shape(self.weight), shape(input[input_slice]),
+                              shape(self.output[output_slice])))
+                end
             end
         else
             errcheck('cudnnGetConvolutionForwardAlgorithm',
@@ -181,17 +215,27 @@ function SpatialConvolution:createIODescriptors(input)
         end
 
         if cudnn.benchmark then -- the manual auto-tuner is run
-            local perfResults = ffi.new("cudnnConvolutionBwdFilterAlgoPerf_t[?]", 1)
-            local intt = torch.IntTensor(1);
-            errcheck('cudnnFindConvolutionBackwardFilterAlgorithm',
-                     cudnn.getHandle(),
-                     self.iDesc[0], self.oDesc[0],
-                     self.convDesc[0], self.weightDesc[0],
-                     1, intt:data(), perfResults)
-            algType[0] = perfResults[0].algo
-            if cudnn.verbose then
-                print('AutoTuning:', perfResults[0].time,
-                      tonumber(perfResults[0].memory), tonumber(perfResults[0].algo))
+            if autotunerCache[2][autotunerHash] then
+                algType[0] = autotunerCache[2][autotunerHash]
+            else
+                local perfResults = ffi.new("cudnnConvolutionBwdFilterAlgoPerf_t[?]", 1)
+                local intt = torch.IntTensor(1);
+                errcheck('cudnnFindConvolutionBackwardFilterAlgorithm',
+                         cudnn.getHandle(),
+                         self.iDesc[0], self.oDesc[0],
+                         self.convDesc[0], self.weightDesc[0],
+                         1, intt:data(), perfResults)
+                algType[0] = perfResults[0].algo
+                autotunerCache[2][autotunerHash] = perfResults[0].algo
+                if cudnn.verbose then
+                    print(string.format(
+                              "Autotuning backwardFilter: Time: %3.5f Memory: %8d Algorithm: %d"
+                                  .. " Weight: %15s Input: %15s Output: %15s",
+                              perfResults[0].time, tonumber(perfResults[0].memory),
+                              tonumber(perfResults[0].algo),
+                              shape(self.weight), shape(input[input_slice]),
+                              shape(self.output[output_slice])))
+                end
             end
         else
             errcheck('cudnnGetConvolutionBackwardFilterAlgorithm',
@@ -219,17 +263,27 @@ function SpatialConvolution:createIODescriptors(input)
             algSearchMode = 'CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST'
         end
         if cudnn.benchmark then -- the manual auto-tuner is run
-            local perfResults = ffi.new("cudnnConvolutionBwdDataAlgoPerf_t[?]", 1)
-            local intt = torch.IntTensor(1);
-            errcheck('cudnnFindConvolutionBackwardDataAlgorithm',
-                     cudnn.getHandle(),
-                     self.weightDesc[0], self.oDesc[0],
-                     self.convDesc[0], self.iDesc[0],
-                     1, intt:data(), perfResults)
-            algType[0] = perfResults[0].algo
-            if cudnn.verbose then
-                print('AutoTuning:', perfResults[0].time,
-                      tonumber(perfResults[0].memory), tonumber(perfResults[0].algo))
+            if autotunerCache[3][autotunerHash] then
+                algType[0] = autotunerCache[3][autotunerHash]
+            else
+                local perfResults = ffi.new("cudnnConvolutionBwdDataAlgoPerf_t[?]", 1)
+                local intt = torch.IntTensor(1);
+                errcheck('cudnnFindConvolutionBackwardDataAlgorithm',
+                         cudnn.getHandle(),
+                         self.weightDesc[0], self.oDesc[0],
+                         self.convDesc[0], self.iDesc[0],
+                         1, intt:data(), perfResults)
+                algType[0] = perfResults[0].algo
+                autotunerCache[3][autotunerHash] = perfResults[0].algo
+                if cudnn.verbose then
+                    print(string.format(
+                              "Autotuning   backwardData: Time: %3.5f Memory: %8d Algorithm: %d"
+                                  .. " Weight: %15s Input: %15s Output: %15s\n",
+                              perfResults[0].time, tonumber(perfResults[0].memory),
+                              tonumber(perfResults[0].algo),
+                              shape(self.weight), shape(input[input_slice]),
+                              shape(self.output[output_slice])))
+                end
             end
         else
             errcheck('cudnnGetConvolutionBackwardDataAlgorithm',
author	Soumith Chintala <soumith@gmail.com>	2015-10-01 23:31:49 +0300
committer	Soumith Chintala <soumith@gmail.com>	2015-10-01 23:31:49 +0300
commit	c34a4a5b3f6430da45708598bc72e81e2f75b6dc (patch)
tree	b9bcbe643553a867a0ac3449a227376a16d38770
parent	fb31eba8a4252f07a6891e423d883b79371257a3 (diff)
parent	880b94991c1726447da0940c4deb582d133d969b (diff)