Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/soumith/cudnn.torch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
authorSergey Zagoruyko <zagoruyko2@gmail.com>2016-04-13 16:01:55 +0300
committerSergey Zagoruyko <zagoruyko2@gmail.com>2016-04-13 19:01:48 +0300
commit70adafcae259f67129c2de6e1048594aa0283e59 (patch)
tree432358dc7274ec86132e2c3bb40b655b0f4a4f61 /test
parent0a040cf2bb3d0f0cdf217c8e92efaddf29ed2efc (diff)
R5 rebase
Diffstat (limited to 'test')
-rw-r--r--test/benchmark.lua84
1 files changed, 55 insertions, 29 deletions
diff --git a/test/benchmark.lua b/test/benchmark.lua
index 4372502..553e918 100644
--- a/test/benchmark.lua
+++ b/test/benchmark.lua
@@ -1,10 +1,11 @@
require 'cudnn'
require 'torch'
-function bench(title, nInputC, nOutputC, kH, kW, sH, sW, iH, iW, nBatch, ...)
+function benchSpatial(title, nInputC, nOutputC, kH, kW, sH, sW, iH, iW, nBatch, ...)
local m1 = cudnn.SpatialConvolution(nInputC,nOutputC,kW,kH, sW, sH):setMode(...):fastest():cuda()
local i1 = torch.zeros(nBatch, nInputC, iH, iW):cuda()
local o1 = m1:forward(i1)
+ cutorch.synchronize()
local t1 = torch.Timer()
local o1 = m1:forward(i1)
@@ -27,47 +28,72 @@ iH = (outH-1)*sH+kH
print('CUDNN Version: ', tonumber(cudnn.C.cudnnGetVersion()))
+print("cudnn.SpatialConvolution")
-- just auto-tuned by cudnn with CUDNN_CONVOLUTION_FWD_PREFER_FASTEST mode
-bench('Forward AutoTuned ', from, to, kH, kW, sH, sW, iH, iW, batchSize)
-
-bench('Forward implicit gemm ', from, to, kH, kW, sH, sW, iH, iW, batchSize,
- 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM',
- 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0',
- 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0')
-
-bench('Forward implicit precomp gemm', from, to, kH, kW, sH, sW, iH, iW, batchSize,
- 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM',
- 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0',
- 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0')
-
-bench('Forward gemm ', from, to, kH, kW, sH, sW, iH, iW, batchSize,
- 'CUDNN_CONVOLUTION_FWD_ALGO_GEMM',
- 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0',
- 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0')
+for i, mode_desc in ipairs({
+ {'Forward AutoTuned ', nil},
+ {'Forward implicit gemm ', 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM'},
+ {'Forward implicit precomp gemm', 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM'},
+ {'Forward gemm ', 'CUDNN_CONVOLUTION_FWD_ALGO_GEMM'},
+ {'Forward FFT ', 'CUDNN_CONVOLUTION_FWD_ALGO_FFT'},
+ {'Forward FFT tiling ', 'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'},
+-- {'Forward Winograd ', 'CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD'} -- not supported for this size
+}) do
+ local title = mode_desc[1]
+ local mode = mode_desc[2]
+
+ benchSpatial(title, from, to, kH, kW, sH, sW, iH, iW, batchSize, mode)
+end
-bench('Forward FFT ', from, to, kH, kW, sH, sW, iH, iW, batchSize,
- 'CUDNN_CONVOLUTION_FWD_ALGO_FFT',
- 'CUDNN_CONVOLUTION_BWD_DATA_ALGO_0',
- 'CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0')
+function benchVolumetric(title, nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH, kT_input, kW_input, kH_input, nBatch, ...)
+ local gconv = cudnn.VolumetricConvolution(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH):setMode(...):fastest():cuda()
+ local input = torch.zeros(nBatch, nInputPlane, kT_input, kW_input, kH_input):cuda()
+ local output = gconv:forward(input)
+ cutorch.synchronize()
+ local t1 = torch.Timer()
+ local output = gconv:forward(input)
+ cutorch.synchronize()
+ print(title .. ': ', nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH, kT_input, kW_input, kH_input, nBatch, t1:time().real)
+end
+print("cudnn.VolumetricConvolution")
+
+for i, mode_desc in ipairs({
+ {'Forward AutoTuned ', nil},
+ {'Forward implicit gemm ', 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM'},
+ {'Forward implicit precomp gemm', 'CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM'},
+-- {'Forward gemm ', 'CUDNN_CONVOLUTION_FWD_ALGO_GEMM'}, -- not supported for this size
+-- {'Forward FFT ', 'CUDNN_CONVOLUTION_FWD_ALGO_FFT'}, -- not supported for this size
+ {'Forward FFT tiling ', 'CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING'},
+-- {'Forward Winograd ', 'CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD'} -- not supported for this size
+}) do
+ local title = mode_desc[1]
+ local mode = mode_desc[2]
+
+ benchVolumetric(title, 256, 256, 3,3,3, 1,1,1, 1,1,1, 8, 28, 28, 50, mode)
+end
-- For reference, CuDNN Convolution modes
--[[
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0,
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1,
CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2,
- CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3, // Placeholder
- CUDNN_CONVOLUTION_FWD_ALGO_FFT = 4
+ CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3,
+ CUDNN_CONVOLUTION_FWD_ALGO_FFT = 4,
+ CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING = 5,
+ CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD = 6
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 = 0, // non-deterministic
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 = 1,
- CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = 2
-
- CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = 0, // non-deterministic
- CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = 1,
- CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = 2,
-
+ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = 2,
+ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 = 3 // non-deterministic, algo0 with workspace
+
+ CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = 0, // non-deterministic
+ CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = 1,
+ CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = 2,
+ CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING = 3,
+ CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD = 4
]]--