Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/soumith/cudnn.torch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNatalia Gimeshein <ngimelshein@nvidia.com>2016-10-06 02:47:28 +0300
committerNatalia Gimeshein <ngimelshein@nvidia.com>2016-10-08 02:03:02 +0300
commit068a0d2a85a3090d324656a2d7cf238952e8a91f (patch)
treeb2fdc08004b181bdfaf89e1db8a9884d55d10288
parent94eb9baea49c9e69f8688de08cca9a6a05f35e9a (diff)
make VolumetricFullConvolution use find
-rw-r--r--VolumetricFullConvolution.lua329
-rw-r--r--test/test.lua2
2 files changed, 63 insertions, 268 deletions
diff --git a/VolumetricFullConvolution.lua b/VolumetricFullConvolution.lua
index 3cc43a3..d0d00f2 100644
--- a/VolumetricFullConvolution.lua
+++ b/VolumetricFullConvolution.lua
@@ -1,65 +1,32 @@
local VolumetricFullConvolution, parent
= torch.class('cudnn.VolumetricFullConvolution', 'nn.VolumetricFullConvolution')
local ffi = require 'ffi'
-local errcheck = cudnn.errcheck
+local find = require 'cudnn.find'
+local errcheck = find.errcheck
-local autotunerCache = {}
-autotunerCache[1] = {} -- forward
-autotunerCache[2] = {} -- backwardFilter
-autotunerCache[3] = {} -- backwardData
+local Convolution = cudnn.SpatialConvolution
-- if you change the configuration of the module manually, call this
function VolumetricFullConvolution:resetWeightDescriptors()
- assert(cudnn.typemap[torch.typename(self.weight)], 'Only Cuda supported duh!')
- assert(cudnn.typemap[torch.typename(self.bias)] or not self.bias, 'Only Cuda supported duh!')
- -- create filterDescriptor for weight
- self.weightDesc = ffi.new('struct cudnnFilterStruct*[1]')
- errcheck('cudnnCreateFilterDescriptor', self.weightDesc)
local desc = torch.IntTensor({self.nInputPlane, self.nOutputPlane,
self.kT, self.kH, self.kW})
- errcheck('cudnnSetFilterNdDescriptor', self.weightDesc[0],
- cudnn.typemap[torch.typename(self.weight)], 'CUDNN_TENSOR_NCHW', 5,
- desc:data());
- local function destroyWDesc(d)
- errcheck('cudnnDestroyFilterDescriptor', d[0]);
- end
- ffi.gc(self.weightDesc, destroyWDesc)
-
- -- create descriptor for bias
- self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane,
- 1, 1))
+ return Convolution.resetWeightDescriptors(self,desc)
end
function VolumetricFullConvolution:fastest(mode)
- if mode == nil then mode = true end
- self.fastest_mode = mode
- self.iSize = self.iSize or torch.LongStorage(5)
- self.iSize:fill(0)
- return self
+ return Convolution.fastest(self, mode)
end
+
function VolumetricFullConvolution:setMode(fmode, bdmode, bwmode)
- if fmode ~= nil then
- self.fmode = fmode
- end
- if bdmode ~= nil then
- self.bdmode = bdmode
- end
- if bwmode ~= nil then
- self.bwmode = bwmode
- end
- self.iSize = self.iSize or torch.LongStorage(5)
- self.iSize:fill(0)
- return self
+ return Convolution.setMode(self, fmode, bdmode, bwmode)
end
function VolumetricFullConvolution:resetMode()
- self.fmode = nil
- self.bdmode = nil
- self.bwmode = nil
- return self
+ return Convolution.resetMode(self)
end
+
function VolumetricFullConvolution:createIODescriptors(input)
local batch = true
if input:dim() == 4 then
@@ -69,27 +36,21 @@ function VolumetricFullConvolution:createIODescriptors(input)
end
assert(input:dim() == 5 and input:isContiguous());
self.iSize = self.iSize or torch.LongStorage(5):fill(0)
- if not self.iDesc or not self.oDesc or
- input:size(1) ~= self.iSize[1] or input:size(2) ~= self.iSize[2]
- or input:size(3) ~= self.iSize[3] or input:size(4) ~= self.iSize[4]
- or input:size(5) ~= self.iSize[5] then
- self.iSize = input:size()
+ if Convolution.checkInputChanged(self, input) then
+ local input_slice = input[{{},{1,self.nInputPlane},{},{}}]
+ self.iDesc = cudnn.toDescriptor(input_slice)
-- create input descriptor
- self.iDesc = cudnn.toDescriptor(input)
+-- self.iDesc = cudnn.toDescriptor(input)
-- create conv descriptor
- self.convDesc = ffi.new('struct cudnnConvolutionStruct*[1]')
- errcheck('cudnnCreateConvolutionDescriptor', self.convDesc)
- local pad = torch.IntTensor({self.padT, self.padH, self.padW})
- local stride = torch.IntTensor({self.dT, self.dH, self.dW})
+ self.convDesc = cudnn.createDescriptors(1, 'struct cudnnConvolutionStruct*[?]',
+ 'cudnnCreateConvolutionDescriptor', 'cudnnDestroyConvolutionDescriptor')
+ self.pad = torch.IntTensor({self.padT, self.padH, self.padW})
+ self.stride = torch.IntTensor({self.dT, self.dH, self.dW})
local upscale = torch.IntTensor({1,1,1})
- errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0],
- 3, pad:data(),
- stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
+ errcheck(self, 'cudnnSetConvolutionNdDescriptor', self.convDesc[0],
+ 3, self.pad:data(),
+ self.stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION',
cudnn.configmap(torch.type(self.weight)));
- local function destroyConvDesc(d)
- errcheck('cudnnDestroyConvolutionDescriptor', d[0]);
- end
- ffi.gc(self.convDesc, destroyConvDesc)
-- get output shape, resize output
local iwidth = input:size(5)
@@ -102,196 +63,23 @@ function VolumetricFullConvolution:createIODescriptors(input)
self.output:resize(oSize:long():storage())
-- create descriptor for output
- local output_slice = {{},{1,self.nOutputPlane},{},{}}
- self.oDesc = cudnn.toDescriptor(self.output[output_slice])
- self.oDescBias = cudnn.toDescriptor(
+ local output_slice = self.output[{{},{1,self.nOutputPlane},{},{}}]
+ self.oDesc = cudnn.toDescriptor(output_slice)
+ self.oDescForBias = cudnn.toDescriptor(
self.output:view(self.output:size(1),
self.output:size(2),
self.output:size(3)*self.output:size(4),
self.output:size(5)))
-
- -----------------------------------------------------------------------
- local function shape(x)
- return table.concat(x:size():totable(),'x')
- end
- local autotunerHash = shape(self.weight) .. ';'
- .. shape(input) .. ';'
- .. shape(self.output)
-
- local maxBufSize = 0
-
- -- create forwardAlgorithm descriptors
- local algType = ffi.new("cudnnConvolutionFwdAlgo_t[?]", 1)
- local algSearchMode = 'CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT'
- local algWorkspaceLimit = self.workspace_limit
- or (self.nOutputPlane * self.kT * self.kH * self.kW * 4) -- 4 = sizeof int/float.
-
- if self.fastest_mode or cudnn.fastest == true then
- algSearchMode = 'CUDNN_CONVOLUTION_FWD_PREFER_FASTEST'
- end
-
- if cudnn.benchmark then -- the manual auto-tuner is run
- if autotunerCache[1][autotunerHash] then
- algType[0] = autotunerCache[1][autotunerHash]
- if cudnn.verbose then
- print('Autotuning VMC FW: using cached algo = ', algType[0], ' for: ', autotunerHash)
- end
- else
- local perfResults = ffi.new("cudnnConvolutionFwdAlgoPerf_t[?]", 1)
- local intt = torch.IntTensor(1);
- errcheck('cudnnFindConvolutionForwardAlgorithm',
- cudnn.getHandle(),
- self.oDesc[0], self.weightDesc[0],
- self.convDesc[0], self.iDesc[0],
- 1, intt:data(), perfResults)
- algType[0] = perfResults[0].algo
- autotunerCache[1][autotunerHash] = perfResults[0].algo
- if cudnn.verbose then
- print(string.format(
- "\nAutotuning VMC Forward: Time: %3.5f Memory: %8d Algorithm: %d"
- .. " Weight: %15s Input: %15s Output: %15s",
- perfResults[0].time, tonumber(perfResults[0].memory),
- tonumber(perfResults[0].algo),
- shape(self.weight), shape(input),
- shape(self.output)))
- end
- end
- else
- errcheck('cudnnGetConvolutionForwardAlgorithm',
- cudnn.getHandle(),
- self.oDesc[0], self.weightDesc[0],
- self.convDesc[0], self.iDesc[0],
- algSearchMode, algWorkspaceLimit, algType)
- end
- algType[0] = self.fmode or algType[0]
- self.fwdAlgType = algType
- local bufSize = torch.LongTensor(1)
- errcheck('cudnnGetConvolutionForwardWorkspaceSize',
- cudnn.getHandle(),
- self.oDesc[0], self.weightDesc[0],
- self.convDesc[0], self.iDesc[0],
- algType[0], bufSize:data())
- maxBufSize = math.max(maxBufSize, bufSize[1])
-
- -- create backwardFilterAlgorithm descriptors
- local algType = ffi.new("cudnnConvolutionBwdFilterAlgo_t[?]", 1)
- local algSearchMode = 'CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE'
- local algWorkspaceLimit = self.workspace_limit
- or (self.nInputPlane * self.kT * self.kH * self.kW * 4) -- 4 = sizeof int/float.
- if self.fastest_mode or cudnn.fastest == true then
- algSearchMode = 'CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST'
- end
-
- if cudnn.benchmark then -- the manual auto-tuner is run
- if autotunerCache[2][autotunerHash] then
- algType[0] = autotunerCache[2][autotunerHash]
- if cudnn.verbose then
- print('Autotuning VMC BWF: using cached algo = ', algType[0], ' for: ', autotunerHash)
- end
- else
- local perfResults = ffi.new("cudnnConvolutionBwdFilterAlgoPerf_t[?]", 1)
- local intt = torch.IntTensor(1);
- errcheck('cudnnFindConvolutionBackwardFilterAlgorithm',
- cudnn.getHandle(),
- self.oDesc[0], self.iDesc[0],
- self.convDesc[0], self.weightDesc[0],
- 1, intt:data(), perfResults)
- algType[0] = perfResults[0].algo
- autotunerCache[2][autotunerHash] = perfResults[0].algo
- if cudnn.verbose then
- print(string.format(
- "Autotuning backwardFilter: Time: %3.5f Memory: %8d Algorithm: %d"
- .. " Weight: %15s Input: %15s Output: %15s",
- perfResults[0].time, tonumber(perfResults[0].memory),
- tonumber(perfResults[0].algo),
- shape(self.weight), shape(input),
- shape(self.output)))
- end
- end
- else
- errcheck('cudnnGetConvolutionBackwardFilterAlgorithm',
- cudnn.getHandle(),
- self.oDesc[0], self.iDesc[0],
- self.convDesc[0], self.weightDesc[0],
- algSearchMode, algWorkspaceLimit, algType)
- end
- algType[0] = self.bwmode or algType[0]
- self.bwdFilterAlgType = algType
- local bufSize = torch.LongTensor(1)
- errcheck('cudnnGetConvolutionBackwardFilterWorkspaceSize',
- cudnn.getHandle(),
- self.oDesc[0], self.iDesc[0],
- self.convDesc[0], self.weightDesc[0],
- algType[0], bufSize:data())
- maxBufSize = math.max(maxBufSize, bufSize[1])
-
- -- create backwardDataAlgorithm descriptors
- local algType = ffi.new("cudnnConvolutionBwdDataAlgo_t[?]", 1)
- local algSearchMode = 'CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE'
- local algWorkspaceLimit = self.workspace_limit
- or (self.nOutputPlane * self.kH * self.kW * 4) -- 4 = sizeof int/float.
- if self.fastest_mode or cudnn.fastest == true then
- algSearchMode = 'CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST'
- end
- if cudnn.benchmark then -- the manual auto-tuner is run
- if autotunerCache[3][autotunerHash] then
- algType[0] = autotunerCache[3][autotunerHash]
- if cudnn.verbose then
- print('Autotuning VMC BWD: using cached algo = ', algType[0], ' for: ', autotunerHash)
- end
- else
- local perfResults = ffi.new("cudnnConvolutionBwdDataAlgoPerf_t[?]", 1)
- local intt = torch.IntTensor(1);
- errcheck('cudnnFindConvolutionBackwardDataAlgorithm',
- cudnn.getHandle(),
- self.weightDesc[0], self.iDesc[0],
- self.convDesc[0], self.oDesc[0],
- 1, intt:data(), perfResults)
- algType[0] = perfResults[0].algo
- autotunerCache[3][autotunerHash] = perfResults[0].algo
- if cudnn.verbose then
- print(string.format(
- "Autotuning backwardData: Time: %3.5f Memory: %8d Algorithm: %d"
- .. " Weight: %15s Input: %15s Output: %15s\n",
- perfResults[0].time, tonumber(perfResults[0].memory),
- tonumber(perfResults[0].algo),
- shape(self.weight), shape(input),
- shape(self.output)))
- end
- end
- else
- errcheck('cudnnGetConvolutionBackwardDataAlgorithm',
- cudnn.getHandle(),
- self.weightDesc[0], self.iDesc[0],
- self.convDesc[0], self.oDesc[0],
- algSearchMode, algWorkspaceLimit, algType)
- end
- algType[0] = self.bdmode or algType[0]
- self.bwdDataAlgType = algType
- local bufSize = torch.LongTensor(1)
- errcheck('cudnnGetConvolutionBackwardDataWorkspaceSize',
- cudnn.getHandle(),
- self.weightDesc[0], self.iDesc[0],
- self.convDesc[0], self.oDesc[0],
- algType[0], bufSize:data())
- maxBufSize = math.max(maxBufSize, bufSize[1])
-
- self.extraBuffer = self.extraBuffer or cudnn.getSharedWorkspace()
- self.extraBuffer = self.extraBuffer:cuda() -- always force float
- self.extraBufferSizeInBytes =
- self.extraBuffer:nElement() * 4 -- extraBuffer is always float
- if maxBufSize > self.extraBufferSizeInBytes then
- self.extraBuffer:resize(math.ceil(maxBufSize / 4))
- self.extraBufferSizeInBytes = maxBufSize
- end
- -----------------------------------------------------------------------
-
- if not batch then
+ self.input_offset = 0
+ self.output_offset = 0
+ self.weight_offset = 0
+ find:prepare(self, input_slice, output_slice)
+ if not batch then
self.output = self.output:view(self.output:size(2),
self.output:size(3),
self.output:size(4),
self.output:size(5))
- end
+ end
end
end
@@ -315,23 +103,30 @@ end
function VolumetricFullConvolution:updateOutput(input)
if not self.weightDesc then self:resetWeightDescriptors() end
self:createIODescriptors(input)
-
+ local finder = find.get()
-- Because SpatialFullConvolution is performing the adjoint of the forward
-- convolution operator, we need to swap the forward and backward passes.
- errcheck('cudnnConvolutionBackwardData', cudnn.getHandle(),
+
+
+ local bwdDataAlgo = finder:backwardDataAlgorithm(self, {self.weightDesc[0], self.weight,
+ self.iDesc[0],self.input_slice,
+ self.convDesc[0], self.oDesc[0], self.output_slice})
+ local extraBuffer, extraBufferSize = cudnn.getSharedWorkspace()
+
+ errcheck(self, 'cudnnConvolutionBackwardData', cudnn.getHandle(),
cudnn.scalar(input, 1),
self.weightDesc[0], self.weight:data(),
self.iDesc[0], input:data(),
- self.convDesc[0], self.bwdDataAlgType[0],
- self.extraBuffer:data(), self.extraBufferSizeInBytes,
+ self.convDesc[0], bwdDataAlgo,
+ extraBuffer, extraBufferSize,
cudnn.scalar(input, 0),
self.oDesc[0], self.output:data())
-- add bias
if self.bias then
- errcheck('cudnnAddTensor', cudnn.getHandle(),
+ errcheck(self, 'cudnnAddTensor', cudnn.getHandle(),
cudnn.scalar(input, 1), self.biasDesc[0], self.bias:data(),
- cudnn.scalar(input, 1), self.oDescBias[0], self.output:data())
+ cudnn.scalar(input, 1), self.oDescForBias[0], self.output:data())
end
return self.output
@@ -345,14 +140,19 @@ function VolumetricFullConvolution:updateGradInput(input, gradOutput)
assert(gradOutput:isContiguous(), 'gradOutput has to be contiguous')
if not self.weightDesc then self:resetWeightDescriptors() end
self:createIODescriptors(input)
+ local finder = find.get()
+ local fwdAlgo = finder:forwardAlgorithm(self, {self.oDesc[0], self.output_slice,
+ self.weightDesc[0], self.weight,
+ self.convDesc[0], self.iDesc[0], self.input_slice})
+ local extraBuffer, extraBufferSize = cudnn.getSharedWorkspace()
- errcheck('cudnnConvolutionForward', cudnn.getHandle(),
+ errcheck(self,'cudnnConvolutionForward', cudnn.getHandle(),
cudnn.scalar(input, 1),
self.oDesc[0], gradOutput:data(),
self.weightDesc[0], self.weight:data(),
self.convDesc[0],
- self.fwdAlgType[0],
- self.extraBuffer:data(), self.extraBufferSizeInBytes,
+ fwdAlgo,
+ extraBuffer, extraBufferSize,
cudnn.scalar(input, 0),
self.iDesc[0], self.gradInput:data());
return self.gradInput
@@ -372,36 +172,31 @@ function VolumetricFullConvolution:accGradParameters(input, gradOutput, scale)
self:createIODescriptors(input)
if not self.weightDesc then self:resetWeightDescriptors() end
-- gradBias
- errcheck('cudnnConvolutionBackwardBias', cudnn.getHandle(),
+
+ local finder = find.get()
+ local bwdFilterAlgo = finder:backwardFilterAlgorithm(self, {self.oDesc[0], self.output_slice,
+ self.iDesc[0], self.input_slice,
+ self.convDesc[0], self.weightDesc[0], self.weight})
+ errcheck(self, 'cudnnConvolutionBackwardBias', cudnn.getHandle(),
self.scaleT:data(),
- self.oDescBias[0], gradOutput:data(),
+ self.oDescForBias[0], gradOutput:data(),
cudnn.scalar(input, 1),
self.biasDesc[0], self.gradBias:data());
+ local extraBuffer, extraBufferSize = cudnn.getSharedWorkspace()
-- gradWeight
- errcheck('cudnnConvolutionBackwardFilter', cudnn.getHandle(),
+ errcheck(self, 'cudnnConvolutionBackwardFilter', cudnn.getHandle(),
self.scaleT:data(),
self.oDesc[0], gradOutput:data(),
self.iDesc[0], input:data(),
self.convDesc[0],
- self.bwdFilterAlgType[0],
- self.extraBuffer:data(), self.extraBufferSizeInBytes,
+ bwdFilterAlgo,
+ extraBuffer, extraBufferSize,
cudnn.scalar(input, 1),
self.weightDesc[0], self.gradWeight:data());
end
function VolumetricFullConvolution:clearDesc()
- self.weightDesc = nil
- self.biasDesc = nil
- self.convDesc = nil
- self.iDesc = nil
- self.oDesc = nil
- self.oDescBias = nil
- self.fwdAlgType = nil
- self.bwdDataAlgType = nil
- self.bwdFilterAlgType = nil
- self.extraBuffer = nil
- self.extraBufferInBytes = nil
- self.scaleT = nil
+ return Convolution.clearDesc(self)
end
function VolumetricFullConvolution:write(f)
diff --git a/test/test.lua b/test/test.lua
index 86c4d70..40d730c 100644
--- a/test/test.lua
+++ b/test/test.lua
@@ -944,7 +944,7 @@ mytester:add(cudnntest)
-- cudnn.verbose=true
-- cudnn.find.verbose=true
-cudnn.useFindEx=true
+cudnn.useFindEx=false
for i = 1, cutorch.getDeviceCount() do