Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/soumith/cudnn.torch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--BatchNormalization.lua33
-rw-r--r--Pointwise.lua11
-rw-r--r--Pooling.lua11
-rw-r--r--Pooling3D.lua13
-rw-r--r--SpatialConvolution.lua39
-rw-r--r--SpatialCrossMapLRN.lua12
-rw-r--r--SpatialDivisiveNormalization.lua12
-rw-r--r--SpatialFullConvolution.lua28
-rw-r--r--SpatialSoftMax.lua12
-rw-r--r--TemporalConvolution.lua6
-rw-r--r--VolumetricConvolution.lua39
-rw-r--r--VolumetricMaxPooling.lua3
-rw-r--r--functional.lua28
-rw-r--r--init.lua38
-rw-r--r--test/test.lua34
15 files changed, 184 insertions, 135 deletions
diff --git a/BatchNormalization.lua b/BatchNormalization.lua
index 77db075..ac77e4f 100644
--- a/BatchNormalization.lua
+++ b/BatchNormalization.lua
@@ -56,10 +56,6 @@ function BatchNormalization:createIODescriptors(input)
end
end
-local one = torch.FloatTensor({1});
-local zero = torch.FloatTensor({0});
-local scaleTens = torch.FloatTensor(1);
-
function BatchNormalization:updateOutput(input)
self:createIODescriptors(input)
@@ -70,13 +66,13 @@ function BatchNormalization:updateOutput(input)
if self.train then
errcheck('cudnnBatchNormalizationForwardTraining',
- cudnn.getHandle(), self.mode, one:data(), zero:data(),
+ cudnn.getHandle(), self.mode, cudnn.scalar(input, 1), cudnn.scalar(input, 0),
self.iDesc[0], input:data(), self.oDesc[0], self.output:data(),
self.sDesc[0], self.weight:data(), self.bias:data(),
self.momentum, self.running_mean:data(), self.running_var:data(), self.eps, self.save_mean:data(), self.save_std:data());
else
errcheck('cudnnBatchNormalizationForwardInference',
- cudnn.getHandle(), self.mode, one:data(), zero:data(),
+ cudnn.getHandle(), self.mode, cudnn.scalar(input, 1), cudnn.scalar(input, 0),
self.iDesc[0], input:data(), self.oDesc[0], self.output:data(),
self.sDesc[0], self.weight:data(), self.bias:data(),
self.running_mean:data(), self.running_var:data(), self.eps);
@@ -85,17 +81,26 @@ function BatchNormalization:updateOutput(input)
end
local function backward(self,input,gradOutput, scale)
+ self.scaleT = self.scaleT or self.weight.new(1)
+ -- this line forces this member to always be on CPU (needed for cudnn)
+ self.scaleT = torch.type(self.weight) == 'torch.CudaDoubleTensor'
+ and self.scaleT:double() or self.scaleT:float()
+ scale = scale or 1.0
+ self.scaleT[1] = scale
+
assert(gradOutput:isContiguous())
self:createIODescriptors(input)
self.gradInput:resizeAs(input)
- scale = scale or 1
- scaleTens:fill(scale)
errcheck('cudnnBatchNormalizationBackward',
- cudnn.getHandle(), self.mode, one:data(), zero:data(), scaleTens:data(), one:data(),
- self.iDesc[0], input:data(), self.iDesc[0], gradOutput:data(), self.iDesc[0], self.gradInput:data(),
- -- input is bottom, gradOutput is topDiff, self.gradInput is resultBottomDiff
- self.sDesc[0], self.weight:data(), self.gradWeight:data(), self.gradBias:data(),
- self.eps, self.save_mean:data(), self.save_std:data());
+ cudnn.getHandle(), self.mode, cudnn.scalar(input, 1),
+ cudnn.scalar(input, 0), self.scaleT:data(), cudnn.scalar(input, 1),
+ self.iDesc[0], input:data(), self.iDesc[0],
+ gradOutput:data(), self.iDesc[0], self.gradInput:data(),
+ -- input is bottom, gradOutput is topDiff,
+ -- self.gradInput is resultBottomDiff
+ self.sDesc[0], self.weight:data(), self.gradWeight:data(),
+ self.gradBias:data(), self.eps, self.save_mean:data(),
+ self.save_std:data());
return self.gradInput
end
@@ -139,7 +144,7 @@ end
function BatchNormalization:type(type, tensorCache)
local _type = type == 'torch.CudaHalfTensor' and 'torch.CudaTensor' or type
- parent.type(self, _type, tensorCache)
+ parent.type(self, _type, tensorCache)
self.output = self.output:type(type)
self.gradInput = self.gradInput:type(type)
return self
diff --git a/Pointwise.lua b/Pointwise.lua
index 9cfe0f2..76e6499 100644
--- a/Pointwise.lua
+++ b/Pointwise.lua
@@ -37,17 +37,14 @@ function Pointwise:createIODescriptors(input)
end
-local one = torch.FloatTensor({1});
-local zero = torch.FloatTensor({0});
-
function Pointwise:updateOutput(input)
self:createIODescriptors(input)
if self.inplace then self.output:set(input) end
errcheck('cudnnActivationForward',
cudnn.getHandle(), self.activDesc[0],
- one:data(),
+ cudnn.scalar(input, 1),
self.iDesc[0], input:data(),
- zero:data(),
+ cudnn.scalar(input, 0),
self.iDesc[0], self.output:data());
return self.output
end
@@ -67,11 +64,11 @@ function Pointwise:updateGradInput(input, gradOutput)
end
errcheck('cudnnActivationBackward',
cudnn.getHandle(), self.activDesc[0],
- one:data(),
+ cudnn.scalar(input, 1),
self.iDesc[0], self.output:data(),
self.iDesc[0], gradOutput:data(),
self.iDesc[0], input:data(),
- zero:data(),
+ cudnn.scalar(input, 0),
self.iDesc[0], self.gradInput:data());
return self.gradInput
end
diff --git a/Pooling.lua b/Pooling.lua
index 53220f1..9e13abf 100644
--- a/Pooling.lua
+++ b/Pooling.lua
@@ -75,17 +75,14 @@ function Pooling:createIODescriptors(input)
end
end
-local one = torch.FloatTensor({1});
-local zero = torch.FloatTensor({0});
-
function Pooling:updateOutput(input)
if not self.poolDesc then self:resetPoolDescriptors() end
self:createIODescriptors(input)
errcheck('cudnnPoolingForward', cudnn.getHandle(),
self.poolDesc[0],
- one:data(),
+ cudnn.scalar(input, 1),
self.iDesc[0], input:data(),
- zero:data(),
+ cudnn.scalar(input, 0),
self.oDesc[0], self.output:data());
return self.output
end
@@ -102,11 +99,11 @@ function Pooling:updateGradInput(input, gradOutput)
self:createIODescriptors(input)
errcheck('cudnnPoolingBackward',
cudnn.getHandle(), self.poolDesc[0],
- one:data(),
+ cudnn.scalar(input, 1),
self.oDesc[0], self.output:data(),
self.oDesc[0], gradOutput:data(),
self.iDesc[0], input:data(),
- zero:data(),
+ cudnn.scalar(input, 0),
self.iDesc[0], self.gradInput:data());
return self.gradInput
end
diff --git a/Pooling3D.lua b/Pooling3D.lua
index e4c0218..b488c9a 100644
--- a/Pooling3D.lua
+++ b/Pooling3D.lua
@@ -29,7 +29,7 @@ end
function Pooling:resetPoolDescriptors()
-- create pooling descriptor
- self.padT = self.padT or 0
+ self.padT = self.padT or 0
self.padW = self.padW or 0
self.padH = self.padH or 0
self.poolDesc = ffi.new('struct cudnnPoolingStruct*[1]')
@@ -83,17 +83,14 @@ function Pooling:createIODescriptors(input)
end
end
-local one = torch.FloatTensor({1});
-local zero = torch.FloatTensor({0});
-
function Pooling:updateOutput(input)
if not self.poolDesc then self:resetPoolDescriptors() end
self:createIODescriptors(input)
errcheck('cudnnPoolingForward', cudnn.getHandle(),
self.poolDesc[0],
- one:data(),
+ cudnn.scalar(input, 1),
self.iDesc[0], input:data(),
- zero:data(),
+ cudnn.scalar(input, 0),
self.oDesc[0], self.output:data());
return self.output
end
@@ -112,11 +109,11 @@ function Pooling:updateGradInput(input, gradOutput)
self:createIODescriptors(input)
errcheck('cudnnPoolingBackward',
cudnn.getHandle(), self.poolDesc[0],
- one:data(),
+ cudnn.scalar(input, 1),
self.oDesc[0], self.output:data(),
self.oDesc[0], gradOutput:data(),
self.iDesc[0], input:data(),
- zero:data(),
+ cudnn.scalar(input, 0),
self.iDesc[0], self.gradInput:data());
return self.gradInput
end
diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua
index 58c78b2..8680586 100644
--- a/SpatialConvolution.lua
+++ b/SpatialConvolution.lua
@@ -165,7 +165,7 @@ function SpatialConvolution:createIODescriptors(input)
local algType = ffi.new("cudnnConvolutionFwdAlgo_t[?]", 1)
local algSearchMode = 'CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT'
local algWorkspaceLimit = self.workspace_limit
- or (self.nInputPlane * self.kH * self.kW * 4) -- 4 = sizeof int/float.
+ or (self.nInputPlane * self.kH * self.kW * cudnn.sizeof(self.weight))
if self.fastest_mode or cudnn.fastest == true then
algSearchMode = 'CUDNN_CONVOLUTION_FWD_PREFER_FASTEST'
@@ -218,7 +218,7 @@ function SpatialConvolution:createIODescriptors(input)
local algType = ffi.new("cudnnConvolutionBwdFilterAlgo_t[?]", 1)
local algSearchMode = 'CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE'
local algWorkspaceLimit = self.workspace_limit
- or (self.nInputPlane * self.kH * self.kW * 4) -- 4 = sizeof int/float.
+ or (self.nInputPlane * self.kH * self.kW * cudnn.sizeof(self.weight))
if self.fastest_mode or cudnn.fastest == true then
algSearchMode = 'CUDNN_CONVOLUTION_BWD_FILTER_PREFER_FASTEST'
end
@@ -270,7 +270,7 @@ function SpatialConvolution:createIODescriptors(input)
local algType = ffi.new("cudnnConvolutionBwdDataAlgo_t[?]", 1)
local algSearchMode = 'CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE'
local algWorkspaceLimit = self.workspace_limit
- or (self.nInputPlane * self.kH * self.kW * 4) -- 4 = sizeof int/float.
+ or (self.nInputPlane * self.kH * self.kW * cudnn.sizeof(self.weight))
if self.fastest_mode or cudnn.fastest == true then
algSearchMode = 'CUDNN_CONVOLUTION_BWD_DATA_PREFER_FASTEST'
end
@@ -318,10 +318,12 @@ function SpatialConvolution:createIODescriptors(input)
maxBufSize = math.max(maxBufSize, bufSize[1])
self.extraBuffer = self.extraBuffer or cudnn.getSharedWorkspace()
- self.extraBufferSizeInBytes = self.extraBuffer:nElement() * 4 -- float
+ self.extraBuffer = self.extraBuffer:cuda() -- always force float
+ self.extraBufferSizeInBytes =
+ self.extraBuffer:nElement() * 4 -- extraBuffer is always float
if maxBufSize > self.extraBufferSizeInBytes then
- self.extraBuffer:resize(math.ceil(maxBufSize/4))
- self.extraBufferSizeInBytes = maxBufSize
+ self.extraBuffer:resize(math.ceil(maxBufSize / 4))
+ self.extraBufferSizeInBytes = maxBufSize
end
-----------------------------------------------------------------------
@@ -341,8 +343,8 @@ function SpatialConvolution:createIODescriptors(input)
end
end
-local one = torch.FloatTensor({1});
-local zero = torch.FloatTensor({0});
+
+
local function makeContiguous(self, input, gradOutput)
if not input:isContiguous() then
@@ -365,20 +367,20 @@ function SpatialConvolution:updateOutput(input)
for g = 0, self.groups - 1 do
errcheck('cudnnConvolutionForward', cudnn.getHandle(),
- one:data(),
+ cudnn.scalar(input, 1),
self.iDesc[0], input:data() + g*self.input_offset,
self.weightDesc[0], self.weight:data() + g*self.weight_offset,
self.convDesc[0], self.fwdAlgType[0],
self.extraBuffer:data(), self.extraBufferSizeInBytes,
- zero:data(),
+ cudnn.scalar(input, 0),
self.oDesc[0], self.output:data() + g*self.output_offset);
end
-- add bias
if self.bias then
errcheck('cudnnAddTensor', cudnn.getHandle(),
- one:data(), self.biasDesc[0], self.bias:data(),
- one:data(), self.oDescForBias[0], self.output:data())
+ cudnn.scalar(input, 1), self.biasDesc[0], self.bias:data(),
+ cudnn.scalar(input, 1), self.oDescForBias[0], self.output:data())
end
return self.output
@@ -395,22 +397,23 @@ function SpatialConvolution:updateGradInput(input, gradOutput)
for g = 0,self.groups - 1 do
errcheck('cudnnConvolutionBackwardData', cudnn.getHandle(),
- one:data(),
+ cudnn.scalar(input, 1),
self.weightDesc[0], self.weight:data() + g*self.weight_offset,
self.oDesc[0], gradOutput:data() + g*self.output_offset,
self.convDesc[0],
self.bwdDataAlgType[0],
self.extraBuffer:data(), self.extraBufferSizeInBytes,
- zero:data(),
+ cudnn.scalar(input, 0),
self.iDesc[0], self.gradInput:data() + g*self.input_offset);
end
return self.gradInput
end
function SpatialConvolution:accGradParameters(input, gradOutput, scale)
- self.scaleT = self.scaleT or torch.FloatTensor(1):fill(1.0)
+ self.scaleT = self.scaleT or self.weight.new(1)
-- this line forces this member to always be on CPU (needed for cudnn)
- self.scaleT = self.scaleT:float()
+ self.scaleT = torch.type(self.weight) == 'torch.CudaDoubleTensor'
+ and self.scaleT:double() or self.scaleT:float()
scale = scale or 1.0
self.scaleT[1] = scale
@@ -425,7 +428,7 @@ function SpatialConvolution:accGradParameters(input, gradOutput, scale)
errcheck('cudnnConvolutionBackwardBias', cudnn.getHandle(),
self.scaleT:data(),
self.oDescForBias[0], gradOutput:data(),
- one:data(),
+ cudnn.scalar(input, 1),
self.biasDesc[0], self.gradBias:data())
end
@@ -438,7 +441,7 @@ function SpatialConvolution:accGradParameters(input, gradOutput, scale)
self.convDesc[0],
self.bwdFilterAlgType[0],
self.extraBuffer:data(), self.extraBufferSizeInBytes,
- one:data(),
+ cudnn.scalar(input, 1),
self.weightDesc[0], self.gradWeight:data() + g*self.weight_offset);
end
end
diff --git a/SpatialCrossMapLRN.lua b/SpatialCrossMapLRN.lua
index 1f4ba33..4d75571 100644
--- a/SpatialCrossMapLRN.lua
+++ b/SpatialCrossMapLRN.lua
@@ -48,8 +48,8 @@ function LRN:createIODescriptors(input)
end
end
-local one = torch.FloatTensor({1});
-local zero = torch.FloatTensor({0});
+
+
function LRN:updateOutput(input)
if self.K then self.k, self.K = self.K, nil end
@@ -58,9 +58,9 @@ function LRN:updateOutput(input)
errcheck('cudnnLRNCrossChannelForward', cudnn.getHandle(),
self.LRNDesc[0],
'CUDNN_LRN_CROSS_CHANNEL_DIM1',
- one:data(),
+ cudnn.scalar(input, 1),
self.iDesc[0], input:data(),
- zero:data(),
+ cudnn.scalar(input, 0),
self.iDesc[0], self.output:data());
return self.output
end
@@ -80,11 +80,11 @@ function LRN:updateGradInput(input, gradOutput)
errcheck('cudnnLRNCrossChannelBackward',
cudnn.getHandle(), self.LRNDesc[0],
'CUDNN_LRN_CROSS_CHANNEL_DIM1',
- one:data(),
+ cudnn.scalar(input, 1),
self.iDesc[0], self.output:data(),
self.iDesc[0], gradOutput:data(),
self.iDesc[0], input:data(),
- zero:data(),
+ cudnn.scalar(input, 0),
self.iDesc[0], self.gradInput:data());
return self.gradInput
end
diff --git a/SpatialDivisiveNormalization.lua b/SpatialDivisiveNormalization.lua
index 3462b61..6917ec8 100644
--- a/SpatialDivisiveNormalization.lua
+++ b/SpatialDivisiveNormalization.lua
@@ -52,8 +52,8 @@ function DivisiveNorm:createIODescriptors(input)
end
end
-local one = torch.FloatTensor({1});
-local zero = torch.FloatTensor({0});
+
+
function DivisiveNorm:updateOutput(input)
if not self.DivisiveNormDesc then self:resetPoolDescriptors() end
@@ -61,9 +61,9 @@ function DivisiveNorm:updateOutput(input)
errcheck('cudnnDivisiveNormCrossChannelForward', cudnn.getHandle(),
self.DivisiveNormDesc[0],
'CUDNN_DivisiveNorm_CROSS_CHANNEL_DIM1',
- one:data(),
+ cudnn.scalar(input, 1),
self.iDesc[0], input:data(),
- zero:data(),
+ cudnn.scalar(input, 0),
self.iDesc[0], self.output:data());
return self.output
end
@@ -80,11 +80,11 @@ function DivisiveNorm:updateGradInput(input, gradOutput)
errcheck('cudnnDivisiveNormCrossChannelBackward',
cudnn.getHandle(), self.DivisiveNormDesc[0],
'CUDNN_DivisiveNorm_CROSS_CHANNEL_DIM1',
- one:data(),
+ cudnn.scalar(input, 1),
self.iDesc[0], self.output:data(),
self.iDesc[0], gradOutput:data(),
self.iDesc[0], input:data(),
- zero:data(),
+ cudnn.scalar(input, 0),
self.iDesc[0], self.gradInput:data());
return self.gradInput
end
diff --git a/SpatialFullConvolution.lua b/SpatialFullConvolution.lua
index ff91a13..e937893 100644
--- a/SpatialFullConvolution.lua
+++ b/SpatialFullConvolution.lua
@@ -302,8 +302,8 @@ function SpatialFullConvolution:createIODescriptors(input)
end
end
-local one = torch.FloatTensor({1});
-local zero = torch.FloatTensor({0});
+
+
function SpatialFullConvolution:updateOutput(input)
if not self.weightDesc then self:resetWeightDescriptors() end
@@ -312,19 +312,19 @@ function SpatialFullConvolution:updateOutput(input)
-- Because SpatialFullConvolution is performing the adjoint of the forward
-- convolution operator, we need to swap the forward and backward passes.
errcheck('cudnnConvolutionBackwardData', cudnn.getHandle(),
- one:data(),
+ cudnn.scalar(input, 1),
self.weightDesc[0], self.weight:data(),
self.iDesc[0], input:data(),
self.convDesc[0], self.bwdDataAlgType[0],
self.extraBuffer:data(), self.extraBufferSizeInBytes,
- zero:data(),
+ cudnn.scalar(input, 0),
self.oDesc[0], self.output:data())
-- add bias
if self.bias then
errcheck('cudnnAddTensor', cudnn.getHandle(),
- one:data(), self.biasDesc[0], self.bias:data(),
- one:data(), self.oDescForBias[0], self.output:data())
+ cudnn.scalar(input, 1), self.biasDesc[0], self.bias:data(),
+ cudnn.scalar(input, 1), self.oDescForBias[0], self.output:data())
end
return self.output
@@ -340,25 +340,27 @@ function SpatialFullConvolution:updateGradInput(input, gradOutput)
self:createIODescriptors(input)
errcheck('cudnnConvolutionForward', cudnn.getHandle(),
- one:data(),
+ cudnn.scalar(input, 1),
self.oDesc[0], gradOutput:data(),
self.weightDesc[0], self.weight:data(),
self.convDesc[0],
self.fwdAlgType[0],
self.extraBuffer:data(), self.extraBufferSizeInBytes,
- zero:data(),
+ cudnn.scalar(input, 0),
self.iDesc[0], self.gradInput:data());
return self.gradInput
end
function SpatialFullConvolution:accGradParameters(input, gradOutput, scale)
- self.scaleT = self.scaleT or torch.FloatTensor(1):fill(1.0)
+ self.scaleT = self.scaleT or self.weight.new(1)
-- this line forces this member to always be on CPU (needed for cudnn)
- self.scaleT = self.scaleT:float()
+ self.scaleT = torch.type(self.weight) == 'torch.CudaDoubleTensor'
+ and self.scaleT:double() or self.scaleT:float()
scale = scale or 1.0
self.scaleT[1] = scale
- assert(gradOutput:dim() == 3 or gradOutput:dim() == 4, 'gradOutput has to be 3D or 4D');
+ assert(gradOutput:dim() == 3 or gradOutput:dim() == 4,
+ 'gradOutput has to be 3D or 4D');
assert(gradOutput:isContiguous(), 'gradOutput has to be contiguous')
if not self.weightDesc then self:resetWeightDescriptors() end
self:createIODescriptors(input)
@@ -368,7 +370,7 @@ function SpatialFullConvolution:accGradParameters(input, gradOutput, scale)
errcheck('cudnnConvolutionBackwardBias', cudnn.getHandle(),
self.scaleT:data(),
self.oDescForBias[0], gradOutput:data(),
- one:data(),
+ cudnn.scalar(input, 1),
self.biasDesc[0], self.gradBias:data())
end
@@ -380,7 +382,7 @@ function SpatialFullConvolution:accGradParameters(input, gradOutput, scale)
self.convDesc[0],
self.bwdFilterAlgType[0],
self.extraBuffer:data(), self.extraBufferSizeInBytes,
- one:data(),
+ cudnn.scalar(input, 1),
self.weightDesc[0], self.gradWeight:data())
end
diff --git a/SpatialSoftMax.lua b/SpatialSoftMax.lua
index 167eb1f..7b52bdd 100644
--- a/SpatialSoftMax.lua
+++ b/SpatialSoftMax.lua
@@ -50,17 +50,17 @@ function SpatialSoftMax:createIODescriptors(input)
end
end
-local one = torch.FloatTensor({1});
-local zero = torch.FloatTensor({0});
+
+
function SpatialSoftMax:updateOutput(input)
self:createIODescriptors(input)
errcheck('cudnnSoftmaxForward',
cudnn.getHandle(),
self.algorithm, self.mode,
- one:data(),
+ cudnn.scalar(input, 1),
self.iDesc[0], input:data(),
- zero:data(),
+ cudnn.scalar(input, 0),
self.oDesc[0], self.output:data());
return self.output
end
@@ -77,10 +77,10 @@ function SpatialSoftMax:updateGradInput(input, gradOutput)
errcheck('cudnnSoftmaxBackward',
cudnn.getHandle(),
self.algorithm, self.mode,
- one:data(),
+ cudnn.scalar(input, 1),
self.oDesc[0], self.output:data(),
self.oDesc[0], gradOutput:data(),
- zero:data(),
+ cudnn.scalar(input, 0),
self.iDesc[0], self.gradInput:data());
return self.gradInput
end
diff --git a/TemporalConvolution.lua b/TemporalConvolution.lua
index 4648ffd..001d192 100644
--- a/TemporalConvolution.lua
+++ b/TemporalConvolution.lua
@@ -102,9 +102,9 @@ function TemporalConvolution:updateGradInput(input, gradOutput)
end
function TemporalConvolution:accGradParameters(input,gradOutput,scale)
---2d (4d) view of input
- local _input = inputview(input)
--- transpose gradOutput (it will likely be transposed twice, hopefully, no big deal
+ --2d (4d) view of input
+ local _input = inputview(input)
+ -- transpose gradOutput (it will likely be transposed twice, hopefully, no big deal
local _gradOutput = transposeGradOutput(gradOutput,self.buffer)
cudnn.SpatialConvolution.accGradParameters(self,_input,_gradOutput,scale)
end
diff --git a/VolumetricConvolution.lua b/VolumetricConvolution.lua
index b255467..3f32c3d 100644
--- a/VolumetricConvolution.lua
+++ b/VolumetricConvolution.lua
@@ -275,10 +275,12 @@ function VolumetricConvolution:createIODescriptors(input)
maxBufSize = math.max(maxBufSize, bufSize[1])
self.extraBuffer = self.extraBuffer or cudnn.getSharedWorkspace()
- self.extraBufferSizeInBytes = self.extraBuffer:nElement() * 4 -- float
+ self.extraBuffer = self.extraBuffer:cuda() -- always force float
+ self.extraBufferSizeInBytes =
+ self.extraBuffer:nElement() * 4 -- extraBuffer is always float
if maxBufSize > self.extraBufferSizeInBytes then
- self.extraBuffer:resize(math.ceil(maxBufSize/4))
- self.extraBufferSizeInBytes = maxBufSize
+ self.extraBuffer:resize(math.ceil(maxBufSize / 4))
+ self.extraBufferSizeInBytes = maxBufSize
end
-----------------------------------------------------------------------
@@ -291,8 +293,8 @@ function VolumetricConvolution:createIODescriptors(input)
end
end
-local one = torch.FloatTensor({1});
-local zero = torch.FloatTensor({0});
+
+
local function makeContiguous(self, input, gradOutput)
if not input:isContiguous() then
@@ -313,16 +315,16 @@ function VolumetricConvolution:updateOutput(input)
input = makeContiguous(self, input)
self:createIODescriptors(input)
errcheck('cudnnConvolutionForward', cudnn.getHandle(),
- one:data(),
+ cudnn.scalar(input, 1),
self.iDesc[0], input:data(),
self.weightDesc[0], self.weight:data(),
self.convDesc[0], self.fwdAlgType[0],
self.extraBuffer:data(), self.extraBufferSizeInBytes,
- zero:data(),
+ cudnn.scalar(input, 0),
self.oDesc[0], self.output:data());
errcheck('cudnnAddTensor', cudnn.getHandle(),
- one:data(),
- self.biasDesc[0], self.bias:data(), one:data(),
+ cudnn.scalar(input, 1),
+ self.biasDesc[0], self.bias:data(), cudnn.scalar(input, 1),
self.oDescBias[0], self.output:data());
return self.output
end
@@ -337,24 +339,25 @@ function VolumetricConvolution:updateGradInput(input, gradOutput)
if not self.weightDesc then self:resetWeightDescriptors() end
self:createIODescriptors(input)
errcheck('cudnnConvolutionBackwardData', cudnn.getHandle(),
- one:data(),
+ cudnn.scalar(input, 1),
self.weightDesc[0], self.weight:data(),
self.oDesc[0], gradOutput:data(),
self.convDesc[0],
self.bwdDataAlgType[0],
self.extraBuffer:data(), self.extraBufferSizeInBytes,
- zero:data(),
+ cudnn.scalar(input, 0),
self.iDesc[0], self.gradInput:data());
return self.gradInput
end
function VolumetricConvolution:accGradParameters(input, gradOutput, scale)
- self.scaleT = self.scaleT or torch.FloatTensor(1):fill(1.0)
- -- this line forces this member to always be on CPU (needed for cudnn)
- self.scaleT = self.scaleT:float()
+ self.scaleT = self.scaleT or self.weight.new(1)
+ -- this line forces this member to always be on CPU (needed for cudnn)
+ self.scaleT = torch.type(self.weight) == 'torch.CudaDoubleTensor'
+ and self.scaleT:double() or self.scaleT:float()
+ scale = scale or 1.0
+ self.scaleT[1] = scale
- scale = scale or 1.0
- self.scaleT[1] = scale
input, gradOutput = makeContiguous(self, input, gradOutput)
assert(gradOutput:dim() == 4 or gradOutput:dim() == 5,
'gradOutput has to be a 4D or 5D tensor');
@@ -364,7 +367,7 @@ function VolumetricConvolution:accGradParameters(input, gradOutput, scale)
errcheck('cudnnConvolutionBackwardBias', cudnn.getHandle(),
self.scaleT:data(),
self.oDescBias[0], gradOutput:data(),
- one:data(),
+ cudnn.scalar(input, 1),
self.biasDesc[0], self.gradBias:data());
-- gradWeight
errcheck('cudnnConvolutionBackwardFilter', cudnn.getHandle(),
@@ -374,7 +377,7 @@ function VolumetricConvolution:accGradParameters(input, gradOutput, scale)
self.convDesc[0],
self.bwdFilterAlgType[0],
self.extraBuffer:data(), self.extraBufferSizeInBytes,
- one:data(),
+ cudnn.scalar(input, 1),
self.weightDesc[0], self.gradWeight:data());
end
diff --git a/VolumetricMaxPooling.lua b/VolumetricMaxPooling.lua
index cf4d60b..e87d29c 100644
--- a/VolumetricMaxPooling.lua
+++ b/VolumetricMaxPooling.lua
@@ -1,4 +1,5 @@
-local VolumetricMaxPooling, parent = torch.class('cudnn.VolumetricMaxPooling', 'cudnn._Pooling3D')
+local VolumetricMaxPooling, parent = torch.class('cudnn.VolumetricMaxPooling',
+ 'cudnn._Pooling3D')
function VolumetricMaxPooling:updateOutput(input)
self.mode = 'CUDNN_POOLING_MAX'
diff --git a/functional.lua b/functional.lua
index cea9df9..24c6030 100644
--- a/functional.lua
+++ b/functional.lua
@@ -13,8 +13,8 @@ end
cudnn.functional = {}
-local one = torch.FloatTensor({1});
-local zero = torch.FloatTensor({0});
+
+
local function Batch2D(t)
return t:view(1, t:size(1), t:size(2), t:size(3))
@@ -28,8 +28,8 @@ cudnn.functional.bias2D_updateOutput = function(handle, bias, output)
local biasDesc = cudnn.toDescriptor(bias:view(1, bias:nElement(),1,1))
local oDesc = cudnn.toDescriptor(output)
errcheck('cudnnAddTensor', handle,
- one:data(), biasDesc[0], bias:data(),
- one:data(), oDesc[0], output:data())
+ cudnn.scalar(output, 1), biasDesc[0], bias:data(),
+ cudnn.scalar(output, 1), oDesc[0], output:data())
end
-- accumulates the gradients into gradBias.
@@ -43,7 +43,7 @@ cudnn.functional.bias2D_accGradParameters = function(handle, gradOutput, gradBia
errcheck('cudnnConvolutionBackwardBias', handle,
scaleT:data(),
oDesc[0], gradOutput:data(),
- one:data(),
+ cudnn.scalar(gradOutput, 1),
biasDesc[0], gradBias:data())
end
@@ -115,12 +115,12 @@ cudnn.functional.Convolution2D_updateOutput = function(handle, input, weight, ou
-- do convolution
errcheck('cudnnConvolutionForward', handle,
- one:data(),
+ cudnn.scalar(input, 1),
iDesc[0], input:data(),
weightDesc[0], weight:data(),
convDesc[0], algType[0],
workspace and workspace:data() or nil, algWorkspaceLimit,
- zero:data(),
+ cudnn.scalar(input, 0),
oDesc[0], output:data());
end
@@ -176,13 +176,13 @@ cudnn.functional.Convolution2D_updateGradInput = function(handle, input, weight,
-- do convolution
errcheck('cudnnConvolutionBackwardData', handle,
- one:data(),
+ cudnn.scalar(input, 1),
weightDesc[0], weight:data(),
oDesc[0], gradOutput:data(),
convDesc[0],
algType[0],
NULL, 0,
- zero:data(),
+ cudnn.scalar(input, 0),
iDesc[0], gradInput:data());
@@ -249,7 +249,7 @@ cudnn.functional.Convolution2D_accGradParameters = function(handle, input, gradW
convDesc[0],
algType[0],
NULL, 0,
- one:data(),
+ cudnn.scalar(input, 1),
weightDesc[0], gradWeight:data());
end
@@ -298,9 +298,9 @@ cudnn.functional.Pooling_updateOutput = function(handle, mode, input, output,
-- pool
errcheck('cudnnPoolingForward', handle,
poolDesc[0],
- one:data(),
+ cudnn.scalar(input, 1),
iDesc[0], input:data(),
- zero:data(),
+ cudnn.scalar(input, 0),
oDesc[0], output:data());
end
@@ -361,11 +361,11 @@ cudnn.functional.Pooling_updateGradInput = function(handle, mode, input, output,
-- pool
errcheck('cudnnPoolingBackward',
handle, poolDesc[0],
- one:data(),
+ cudnn.scalar(input, 1),
oDesc[0], output:data(),
oDesc[0], gradOutput:data(),
iDesc[0], input:data(),
- zero:data(),
+ cudnn.scalar(input, 0),
iDesc[0], gradInput:data());
end
diff --git a/init.lua b/init.lua
index d879d92..6920c95 100644
--- a/init.lua
+++ b/init.lua
@@ -36,6 +36,42 @@ cudnn.typemap = {
['torch.CudaDoubleTensor'] = 'CUDNN_DATA_DOUBLE',
}
+cudnn.typemap = {
+ ['torch.CudaHalfTensor'] = 'CUDNN_DATA_HALF',
+ ['torch.CudaTensor'] = 'CUDNN_DATA_FLOAT',
+ ['torch.CudaDoubleTensor'] = 'CUDNN_DATA_DOUBLE',
+}
+
+local sizeofmap = {
+ ['torch.CudaHalfTensor'] = ffi.sizeof('half'),
+ ['torch.CudaTensor'] = ffi.sizeof('float'),
+ ['torch.CudaDoubleTensor'] = ffi.sizeof('double'),
+}
+
+function cudnn.sizeof(t)
+ return sizeofmap[torch.type(t)]
+end
+
+local onemap = {
+ ['torch.CudaHalfTensor'] = torch.FloatTensor({1}),
+ ['torch.CudaTensor'] = torch.FloatTensor({1}),
+ ['torch.CudaDoubleTensor'] = torch.DoubleTensor({1}),
+}
+local zeromap = {
+ ['torch.CudaHalfTensor'] = torch.FloatTensor({0}),
+ ['torch.CudaTensor'] = torch.FloatTensor({0}),
+ ['torch.CudaDoubleTensor'] = torch.DoubleTensor({0}),
+}
+function cudnn.scalar(t, val)
+ if val == 1 then
+ return onemap[torch.type(t)]:data()
+ elseif val == 0 then
+ return zeromap[torch.type(t)]:data()
+ else
+ error('unknown scalar')
+ end
+end
+
-- TODO: determine if device supports true half and use true half on it
-- so far use float for half and float, double for double
local function determineHalfCapability(dev)
@@ -126,7 +162,7 @@ function cudnn.getSharedWorkspace()
local device = cutorch.getDevice()
local stream = cutorch.getStream() -- starts from 0
if not sharedBuffer[device][stream] then
- sharedBuffer[device][stream] = torch.CudaTensor(1)
+ sharedBuffer[device][stream] = torch.CudaTensor(1)
end
return sharedBuffer[device][stream]
end
diff --git a/test/test.lua b/test/test.lua
index ef0a5a3..0c9e852 100644
--- a/test/test.lua
+++ b/test/test.lua
@@ -43,6 +43,7 @@ end
function torch.CudaHalfTensor:__sub(b)
return self:cuda() - b:cuda()
end
+
function torch.CudaHalfTensor:abs()
return self:cuda():abs():cudaHalf()
end
@@ -55,6 +56,10 @@ function torch.CudaHalfTensor:mean()
return self:cuda():mean()
end
+function torch.CudaDoubleTensor:__sub(b)
+ return self:cuda() - b:cuda()
+end
+
function torch.CudaDoubleTensor:mean()
return self:cuda():mean()
end
@@ -98,20 +103,23 @@ local function testLayer(nnlayer, cudnnlayer, input, gradOutput, scale,
precision = testparams.precision_backward
end
mytester:assertlt(error, precision, 'error on ' .. name
- .. ' batchMode = ' .. tostring(batchMode)
- .. ' ' .. description)
+ .. ', batchMode = ' .. tostring(batchMode)
+ .. ', type = ' .. torch.type(res[name])
+ .. ', ' .. description)
end
-- IO
local ferr,berr = jac.testIO(cudnnlayer, cast(input))
mytester:assertlt(ferr, testparams.precision_io,
torch.typename(cudnnlayer) .. ' - i/o forward err '
- .. ' batchMode = ' .. tostring(batchMode)
- .. ' ' .. description)
+ .. ', batchMode = ' .. tostring(batchMode)
+ .. ', type = ' .. torch.type(res[name])
+ .. ', ' .. description)
mytester:assertlt(berr, testparams.precision_io,
torch.typename(cudnnlayer) .. ' - i/o backward err '
- .. ' batchMode = ' .. tostring(batchMode)
- .. ' ' .. description)
+ .. ', batchMode = ' .. tostring(batchMode)
+ .. ', type = ' .. torch.type(res[name])
+ .. ', ' .. description)
end
function cudnntest.SpatialConvolution()
@@ -779,21 +787,21 @@ for i = 1, 1 do -- cutorch.getDeviceCount() do
testparams = testparams_float
mytester:run()
+ -- double tensor may be broken at some places, gets NaNs.
+ print'Testing torch.CudaDoubleTensor'
+ testparams = testparams_double
+ mytester:run()
+
print(
-[[Half and Double Tensor tests are disabled due to missing functionality.
+ [[Half Tensor tests are disabled due to missing functionality.
They will be enabled once fully fixed and functional.
-See https://github.com/soumith/cudnn.torch/issues/226 and https://github.com/soumith/cudnn.torch/issues/225 for progress
+See https://github.com/soumith/cudnn.torch/issues/225 for progress
]])
-
-- Developers, do not commit uncommented regions until bindings fixed
-- print'Testing torch.CudaHalfTensor'
-- testparams = testparams_half
-- mytester:run()
- -- double tensor may be broken at some places, gets NaNs.
- -- print'Testing torch.CudaDoubleTensor'
- -- testparams = testparams_double
- -- mytester:run()
end
end