Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/soumith/cudnn.torch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsoumith <soumith@fb.com>2014-12-20 02:56:16 +0300
committersoumith <soumith@fb.com>2014-12-20 02:56:16 +0300
commit89c1810899912b6b5acbeec738eb3d4a48437147 (patch)
tree78cf3a66cc195bc9f1677b2f80fcb687227717f6 /SpatialConvolution.lua
parente77f210a6c537284676c941689ea853beefd23f8 (diff)
fixing potential contention bug
Diffstat (limited to 'SpatialConvolution.lua')
-rw-r--r--SpatialConvolution.lua21
1 files changed, 11 insertions, 10 deletions
diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua
index 794653c..8292536 100644
--- a/SpatialConvolution.lua
+++ b/SpatialConvolution.lua
@@ -61,13 +61,13 @@ function SpatialConvolution:createIODescriptors(input)
-- create output descriptor and resize output
local oSize = torch.IntTensor(4)
local oSizeD = oSize:data()
- errcheck('cudnnGetConvolutionNdForwardOutputDim', self.convDesc[0], self.iDesc[0],
+ errcheck('cudnnGetConvolutionNdForwardOutputDim', self.convDesc[0], self.iDesc[0],
self.weightDesc[0], 4, oSizeD)
self.output:resize(oSize:long():storage())
-- create descriptor for output
self.oDesc = cudnn.toDescriptor(self.output)
-
- -- create forwardAlgorithm descriptors for
+
+ -- create forwardAlgorithm descriptors for
local algType = ffi.new("cudnnConvolutionFwdAlgo_t[?]", 1)
errcheck('cudnnGetConvolutionForwardAlgorithm',
cudnn.handle[cutorch.getDevice()-1],
@@ -75,7 +75,7 @@ function SpatialConvolution:createIODescriptors(input)
'CUDNN_CONVOLUTION_FWD_PREFER_FASTEST', -1, algType)
self.algType = algType
local bufSize = torch.LongTensor(1)
- errcheck('cudnnGetConvolutionForwardWorkspaceSize',
+ errcheck('cudnnGetConvolutionForwardWorkspaceSize',
cudnn.handle[cutorch.getDevice()-1],
self.iDesc[0], self.weightDesc[0], self.convDesc[0], self.oDesc[0],
algType[0], bufSize:data())
@@ -103,7 +103,7 @@ function SpatialConvolution:updateOutput(input)
one:data(),
self.iDesc[0], input:data(),
self.weightDesc[0], self.weight:data(),
- self.convDesc[0], self.algType[0],
+ self.convDesc[0], self.algType[0],
self.extraBuffer:data(), self.extraBuffer:nElement(),
zero:data(),
self.oDesc[0], self.output:data());
@@ -129,23 +129,24 @@ function SpatialConvolution:updateGradInput(input, gradOutput)
return self.gradInput
end
-local scaleT = torch.FloatTensor(1):fill(1.0)
function SpatialConvolution:accGradParameters(input, gradOutput, scale)
+ self.scaleT = self.scaleT or torch.FloatTensor(1):fill(1.0)
+ self.scaleT = self.scaleT:float() -- this line forces this member to always be on CPU (needed for cudnn)
scale = scale or 1.0
- scaleT[1] = scale
+ self.scaleT[1] = scale
assert((gradOutput:dim() == 3 or gradOutput:dim() == 4)
and gradOutput:isContiguous());
self:createIODescriptors(input)
if not self.weightDesc then self:resetWeightDescriptors() end
-- gradBias
errcheck('cudnnConvolutionBackwardBias', cudnn.handle[cutorch.getDevice()-1],
- scaleT:data(),
+ self.scaleT:data(),
self.oDesc[0], gradOutput:data(),
one:data(),
self.biasDesc[0], self.gradBias:data());
-- gradWeight
errcheck('cudnnConvolutionBackwardFilter', cudnn.handle[cutorch.getDevice()-1],
- scaleT:data(),
+ self.scaleT:data(),
self.iDesc[0], input:data(),
self.oDesc[0], gradOutput:data(),
self.convDesc[0],
@@ -157,7 +158,7 @@ end
--[[
function SpatialConvolution:zeroGradParameters()
-- gradWeight, gradBias to zero
- errcheck('cudnnSetTensor', cudnn.handle[cutorch.getDevice()-1],
+ errcheck('cudnnSetTensor', cudnn.handle[cutorch.getDevice()-1],
self.weightDesc, self.gradWeight:data(), zero:data());
errcheck('cudnnSetTensor', cudnn.handle[cutorch.getDevice()-1],
self.biasDesc, self.gradBias:data(), zero:data());