Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/soumith/cudnn.torch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsoumith <soumith@fb.com>2014-12-20 10:30:08 +0300
committersoumith <soumith@fb.com>2014-12-20 10:30:08 +0300
commit035863d94fb41b48ccd0babf7055c6bb719bbf8f (patch)
treebff149765b13aa311012e47d4b4e2613b273e3e8
parenta2def9658f9df262a252ff71e9dfd310ab722b13 (diff)
lint fixes (80 columns)
-rw-r--r--Pointwise.lua3
-rw-r--r--Pooling.lua5
-rw-r--r--SpatialConvolution.lua35
-rw-r--r--VolumetricConvolution.lua58
-rw-r--r--ffi.lua446
-rw-r--r--test/test.lua9
6 files changed, 293 insertions, 263 deletions
diff --git a/Pointwise.lua b/Pointwise.lua
index aecfe08..56a374c 100644
--- a/Pointwise.lua
+++ b/Pointwise.lua
@@ -50,7 +50,8 @@ end
function Pointwise:updateGradInput(input, gradOutput)
assert((gradOutput:dim() == 4 or gradOutput:dim() == 3));
if not gradOutput:isContiguous() then
- self._gradOutput = self._gradOutput or gradOutput.new():resizeAs(gradOutput)
+ self._gradOutput = self._gradOutput
+ or gradOutput.new():resizeAs(gradOutput)
self._gradOutput:copy(gradOutput)
gradOutput = self._gradOutput
end
diff --git a/Pooling.lua b/Pooling.lua
index 87d56bf..af28a3c 100644
--- a/Pooling.lua
+++ b/Pooling.lua
@@ -84,7 +84,7 @@ local zero = torch.FloatTensor({0});
function Pooling:updateOutput(input)
if not self.poolDesc then self:resetPoolDescriptors() end
self:createIODescriptors(input)
- errcheck('cudnnPoolingForward', cudnn.handle[cutorch.getDevice()-1],
+ errcheck('cudnnPoolingForward', cudnn.handle[cutorch.getDevice()-1],
self.poolDesc[0],
one:data(),
self.iDesc[0], input:data(),
@@ -102,7 +102,8 @@ function Pooling:updateGradInput(input, gradOutput)
end
if not self.poolDesc then self:resetPoolDescriptors() end
self:createIODescriptors(input)
- errcheck('cudnnPoolingBackward', cudnn.handle[cutorch.getDevice()-1], self.poolDesc[0],
+ errcheck('cudnnPoolingBackward',
+ cudnn.handle[cutorch.getDevice()-1], self.poolDesc[0],
one:data(),
self.oDesc[0], self.output:data(),
self.oDesc[0], gradOutput:data(),
diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua
index 8292536..cb5464c 100644
--- a/SpatialConvolution.lua
+++ b/SpatialConvolution.lua
@@ -1,8 +1,10 @@
-local SpatialConvolution, parent = torch.class('cudnn.SpatialConvolution', 'nn.SpatialConvolution')
+local SpatialConvolution, parent =
+ torch.class('cudnn.SpatialConvolution', 'nn.SpatialConvolution')
local ffi = require 'ffi'
local errcheck = cudnn.errcheck
-function SpatialConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH)
+function SpatialConvolution:__init(nInputPlane, nOutputPlane,
+ kW, kH, dW, dH, padW, padH)
parent.__init(self, nInputPlane, nOutputPlane, kW, kH, dW, dH)
self.padW = padW or 0
self.padH = padH or 0
@@ -12,12 +14,15 @@ end
-- if you change the configuration of the module manually, call this
function SpatialConvolution:resetWeightDescriptors()
- assert(torch.typename(self.weight) == 'torch.CudaTensor', 'Only Cuda supported duh!')
- assert(torch.typename(self.bias) == 'torch.CudaTensor', 'Only Cuda supported duh!')
+ assert(torch.typename(self.weight) == 'torch.CudaTensor',
+ 'Only Cuda supported duh!')
+ assert(torch.typename(self.bias) == 'torch.CudaTensor',
+ 'Only Cuda supported duh!')
-- create filterDescriptor for weight
self.weightDesc = ffi.new('struct cudnnFilterStruct*[1]')
errcheck('cudnnCreateFilterDescriptor', self.weightDesc)
- local desc = torch.IntTensor({self.nOutputPlane, self.nInputPlane, self.kH, self.kW})
+ local desc = torch.IntTensor({self.nOutputPlane, self.nInputPlane,
+ self.kH, self.kW})
errcheck('cudnnSetFilterNdDescriptor', self.weightDesc[0],
'CUDNN_DATA_FLOAT', 4,
desc:data());
@@ -27,7 +32,7 @@ function SpatialConvolution:resetWeightDescriptors()
ffi.gc(self.weightDesc, destroyWDesc)
-- create descriptor for bias
- self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane, 1, 1))
+ self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane,1,1))
end
function SpatialConvolution:createIODescriptors(input)
@@ -51,7 +56,8 @@ function SpatialConvolution:createIODescriptors(input)
local pad = torch.IntTensor({self.padH, self.padW})
local stride = torch.IntTensor({self.dH, self.dW})
local upscale = torch.IntTensor({1,1})
- errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0], 2, pad:data(),
+ errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0],
+ 2, pad:data(),
stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION');
local function destroyConvDesc(d)
errcheck('cudnnDestroyConvolutionDescriptor', d[0]);
@@ -61,7 +67,8 @@ function SpatialConvolution:createIODescriptors(input)
-- create output descriptor and resize output
local oSize = torch.IntTensor(4)
local oSizeD = oSize:data()
- errcheck('cudnnGetConvolutionNdForwardOutputDim', self.convDesc[0], self.iDesc[0],
+ errcheck('cudnnGetConvolutionNdForwardOutputDim',
+ self.convDesc[0], self.iDesc[0],
self.weightDesc[0], 4, oSizeD)
self.output:resize(oSize:long():storage())
-- create descriptor for output
@@ -71,13 +78,15 @@ function SpatialConvolution:createIODescriptors(input)
local algType = ffi.new("cudnnConvolutionFwdAlgo_t[?]", 1)
errcheck('cudnnGetConvolutionForwardAlgorithm',
cudnn.handle[cutorch.getDevice()-1],
- self.iDesc[0], self.weightDesc[0], self.convDesc[0], self.oDesc[0],
+ self.iDesc[0], self.weightDesc[0],
+ self.convDesc[0], self.oDesc[0],
'CUDNN_CONVOLUTION_FWD_PREFER_FASTEST', -1, algType)
self.algType = algType
local bufSize = torch.LongTensor(1)
errcheck('cudnnGetConvolutionForwardWorkspaceSize',
cudnn.handle[cutorch.getDevice()-1],
- self.iDesc[0], self.weightDesc[0], self.convDesc[0], self.oDesc[0],
+ self.iDesc[0], self.weightDesc[0],
+ self.convDesc[0], self.oDesc[0],
algType[0], bufSize:data())
self.extraBuffer = self.extraBuffer or input.new(1)
if bufSize[1] ~= 0 then self.extraBuffer:resize(bufSize[1]) end
@@ -107,7 +116,8 @@ function SpatialConvolution:updateOutput(input)
self.extraBuffer:data(), self.extraBuffer:nElement(),
zero:data(),
self.oDesc[0], self.output:data());
- errcheck('cudnnAddTensor', cudnn.handle[cutorch.getDevice()-1], 'CUDNN_ADD_SAME_C',
+ errcheck('cudnnAddTensor', cudnn.handle[cutorch.getDevice()-1],
+ 'CUDNN_ADD_SAME_C',
one:data(), self.biasDesc[0], self.bias:data(), one:data(),
self.oDesc[0], self.output:data());
return self.output
@@ -131,7 +141,8 @@ end
function SpatialConvolution:accGradParameters(input, gradOutput, scale)
self.scaleT = self.scaleT or torch.FloatTensor(1):fill(1.0)
- self.scaleT = self.scaleT:float() -- this line forces this member to always be on CPU (needed for cudnn)
+ -- this line forces this member to always be on CPU (needed for cudnn)
+ self.scaleT = self.scaleT:float()
scale = scale or 1.0
self.scaleT[1] = scale
assert((gradOutput:dim() == 3 or gradOutput:dim() == 4)
diff --git a/VolumetricConvolution.lua b/VolumetricConvolution.lua
index 05a08d4..74857e2 100644
--- a/VolumetricConvolution.lua
+++ b/VolumetricConvolution.lua
@@ -1,8 +1,12 @@
-local VolumetricConvolution, parent = torch.class('cudnn.VolumetricConvolution', 'nn.VolumetricConvolution')
+local VolumetricConvolution, parent
+ = torch.class('cudnn.VolumetricConvolution', 'nn.VolumetricConvolution')
local ffi = require 'ffi'
local errcheck = cudnn.errcheck
-function VolumetricConvolution:__init(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH)
+function VolumetricConvolution:__init(nInputPlane, nOutputPlane,
+ kT, kW, kH,
+ dT, dW, dH,
+ padT, padW, padH)
parent.__init(self, nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH)
self.padT = padT or 0
self.padW = padW or 0
@@ -13,12 +17,15 @@ end
-- if you change the configuration of the module manually, call this
function VolumetricConvolution:resetWeightDescriptors()
- assert(torch.typename(self.weight) == 'torch.CudaTensor', 'Only Cuda supported duh!')
- assert(torch.typename(self.bias) == 'torch.CudaTensor', 'Only Cuda supported duh!')
+ assert(torch.typename(self.weight) == 'torch.CudaTensor',
+ 'Only Cuda supported duh!')
+ assert(torch.typename(self.bias) == 'torch.CudaTensor',
+ 'Only Cuda supported duh!')
-- create filterDescriptor for weight
self.weightDesc = ffi.new('struct cudnnFilterStruct*[1]')
errcheck('cudnnCreateFilterDescriptor', self.weightDesc)
- local desc = torch.IntTensor({self.nOutputPlane, self.nInputPlane, self.kT, self.kH, self.kW})
+ local desc = torch.IntTensor({self.nOutputPlane, self.nInputPlane,
+ self.kT, self.kH, self.kW})
errcheck('cudnnSetFilterNdDescriptor', self.weightDesc[0],
'CUDNN_DATA_FLOAT', 5,
desc:data());
@@ -28,13 +35,15 @@ function VolumetricConvolution:resetWeightDescriptors()
ffi.gc(self.weightDesc, destroyWDesc)
-- create descriptor for bias
- self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane, 1, 1))
+ self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane,
+ 1, 1))
end
function VolumetricConvolution:createIODescriptors(input)
local batch = true
if input:dim() == 4 then
- input = input:view(1, input:size(1), input:size(2), input:size(3), input:size(4))
+ input = input:view(1, input:size(1), input:size(2),
+ input:size(3), input:size(4))
batch = false
end
assert(input:dim() == 5 and input:isContiguous());
@@ -53,7 +62,8 @@ function VolumetricConvolution:createIODescriptors(input)
local pad = torch.IntTensor({self.padT, self.padH, self.padW})
local stride = torch.IntTensor({self.dT, self.dH, self.dW})
local upscale = torch.IntTensor({1,1,1})
- errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0], 3, pad:data(),
+ errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0],
+ 3, pad:data(),
stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION');
local function destroyConvDesc(d)
errcheck('cudnnDestroyConvolutionDescriptor', d[0]);
@@ -63,28 +73,31 @@ function VolumetricConvolution:createIODescriptors(input)
-- create output descriptor and resize output
local oSize = torch.IntTensor(5)
local oSizeD = oSize:data()
- errcheck('cudnnGetConvolutionNdForwardOutputDim', self.convDesc[0], self.iDesc[0],
+ errcheck('cudnnGetConvolutionNdForwardOutputDim',
+ self.convDesc[0], self.iDesc[0],
self.weightDesc[0], 5, oSizeD)
self.output:resize(oSize:long():storage())
-- create descriptor for output
self.oDesc = cudnn.toDescriptor(self.output)
- self.oDescBias = cudnn.toDescriptor(self.output:view(self.output:size(1),
- self.output:size(2),
- self.output:size(3)
- *self.output:size(4),
- self.output:size(5)))
+ self.oDescBias = cudnn.toDescriptor(
+ self.output:view(self.output:size(1),
+ self.output:size(2),
+ self.output:size(3)*self.output:size(4),
+ self.output:size(5)))
-- create forwardAlgorithm descriptors for
local algType = ffi.new("cudnnConvolutionFwdAlgo_t[?]", 1)
errcheck('cudnnGetConvolutionForwardAlgorithm',
cudnn.handle[cutorch.getDevice()-1],
- self.iDesc[0], self.weightDesc[0], self.convDesc[0], self.oDesc[0],
- 'CUDNN_CONVOLUTION_FWD_PREFER_FASTEST', -1, algType)
+ self.iDesc[0], self.weightDesc[0], self.convDesc[0],
+ self.oDesc[0], 'CUDNN_CONVOLUTION_FWD_PREFER_FASTEST',
+ -1, algType)
self.algType = algType
local bufSize = torch.LongTensor(1)
errcheck('cudnnGetConvolutionForwardWorkspaceSize',
cudnn.handle[cutorch.getDevice()-1],
- self.iDesc[0], self.weightDesc[0], self.convDesc[0], self.oDesc[0],
+ self.iDesc[0], self.weightDesc[0],
+ self.convDesc[0], self.oDesc[0],
algType[0], bufSize:data())
self.extraBuffer = self.extraBuffer or input.new(1)
if bufSize[1] ~= 0 then self.extraBuffer:resize(bufSize[1]) end
@@ -116,8 +129,9 @@ function VolumetricConvolution:updateOutput(input)
self.extraBuffer:data(), self.extraBuffer:nElement(),
zero:data(),
self.oDesc[0], self.output:data());
- errcheck('cudnnAddTensor', cudnn.handle[cutorch.getDevice()-1], 'CUDNN_ADD_SAME_C',
- one:data(), self.biasDesc[0], self.bias:data(), one:data(),
+ errcheck('cudnnAddTensor', cudnn.handle[cutorch.getDevice()-1],
+ 'CUDNN_ADD_SAME_C', one:data(),
+ self.biasDesc[0], self.bias:data(), one:data(),
self.oDescBias[0], self.output:data());
return self.output
end
@@ -140,7 +154,8 @@ end
function VolumetricConvolution:accGradParameters(input, gradOutput, scale)
self.scaleT = self.scaleT or torch.FloatTensor(1):fill(1.0)
- self.scaleT = self.scaleT:float() -- this line forces this member to always be on CPU (needed for cudnn)
+ -- this line forces this member to always be on CPU (needed for cudnn)
+ self.scaleT = self.scaleT:float()
scale = scale or 1.0
self.scaleT[1] = scale
@@ -155,7 +170,8 @@ function VolumetricConvolution:accGradParameters(input, gradOutput, scale)
one:data(),
self.biasDesc[0], self.gradBias:data());
-- gradWeight
- errcheck('cudnnConvolutionBackwardFilter', cudnn.handle[cutorch.getDevice()-1],
+ errcheck('cudnnConvolutionBackwardFilter',
+ cudnn.handle[cutorch.getDevice()-1],
self.scaleT:data(),
self.iDesc[0], input:data(),
self.oDesc[0], gradOutput:data(),
diff --git a/ffi.lua b/ffi.lua
index 24eb33e..7f21f63 100644
--- a/ffi.lua
+++ b/ffi.lua
@@ -43,53 +43,51 @@ typedef enum
CUDNN_TENSOR_NHWC = 1 /* feature maps interleaved ( cStride = 1 )*/
} cudnnTensorFormat_t;
-cudnnStatus_t cudnnCreateTensorDescriptor( cudnnTensorDescriptor_t *tensorDesc );
+cudnnStatus_t cudnnCreateTensorDescriptor( cudnnTensorDescriptor_t *tensorDesc);
cudnnStatus_t cudnnSetTensorNdDescriptor( cudnnTensorDescriptor_t tensorDesc,
cudnnDataType_t dataType,
int nbDims,
const int dimA[],
const int strideA[]
);
-cudnnStatus_t cudnnDestroyTensorDescriptor( cudnnTensorDescriptor_t tensorDesc );
+cudnnStatus_t cudnnDestroyTensorDescriptor( cudnnTensorDescriptor_t tensorDesc);
typedef enum
{
CUDNN_ADD_IMAGE = 0,
- CUDNN_ADD_SAME_HW = 0,
+ CUDNN_ADD_SAME_HW = 0,
CUDNN_ADD_FEATURE_MAP = 1,
CUDNN_ADD_SAME_CHW = 1,
CUDNN_ADD_SAME_C = 2,
CUDNN_ADD_FULL_TENSOR = 3
-} cudnnAddMode_t;
-/* Tensor Bias addition : srcDest = alpha * bias + beta * srcDestDesc */
-cudnnStatus_t cudnnAddTensor( cudnnHandle_t handle,
- cudnnAddMode_t mode,
- const void *alpha,
- const cudnnTensorDescriptor_t biasDesc,
- const void *biasData,
- const void *beta,
- cudnnTensorDescriptor_t srcDestDesc,
- void *srcDestData
- );
-
-/* Set all data points of a tensor to a given value : srcDest = value */
+} cudnnAddMode_t;
+
+cudnnStatus_t cudnnAddTensor(cudnnHandle_t handle,
+ cudnnAddMode_t mode,
+ const void *alpha,
+ const cudnnTensorDescriptor_t biasDesc,
+ const void *biasData,
+ const void *beta,
+ cudnnTensorDescriptor_t srcDestDesc,
+ void *srcDestData
+ );
+
cudnnStatus_t cudnnSetTensor( cudnnHandle_t handle,
- const cudnnTensorDescriptor_t srcDestDesc,
- void *srcDestData,
- const void *value
+ const cudnnTensorDescriptor_t srcDestDesc,
+ void *srcDestData,
+ const void *value
);
-/* Set all data points of a tensor to a given value : srcDest = alpha * srcDest */
-cudnnStatus_t cudnnScaleTensor( cudnnHandle_t handle,
- const cudnnTensorDescriptor_t srcDestDesc,
- void *srcDestData,
- const void *alpha
- );
+cudnnStatus_t cudnnScaleTensor(cudnnHandle_t handle,
+ const cudnnTensorDescriptor_t srcDestDesc,
+ void *srcDestData,
+ const void *alpha
+ );
typedef enum
{
- CUDNN_CONVOLUTION = 0,
- CUDNN_CROSS_CORRELATION = 1
+ CUDNN_CONVOLUTION = 0,
+ CUDNN_CROSS_CORRELATION = 1
} cudnnConvolutionMode_t;
typedef enum
@@ -98,139 +96,140 @@ typedef enum
CUDNN_CONVOLUTION_WEIGHT_GRAD = 1, /* Weight Gradient update function */
CUDNN_CONVOLUTION_DATA_GRAD = 2 /* Data Gradient update function */
} cudnnConvolutionPath_t;
-cudnnStatus_t cudnnCreateFilterDescriptor( cudnnFilterDescriptor_t *filterDesc );
-cudnnStatus_t cudnnSetFilterNdDescriptor( cudnnFilterDescriptor_t filterDesc,
- cudnnDataType_t dataType, // image data type
- int nbDims,
- const int filterDimA[]
- );
+cudnnStatus_t cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc);
+cudnnStatus_t cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc,
+ cudnnDataType_t dataType,
+ int nbDims,
+ const int filterDimA[]
+ );
+
+cudnnStatus_t cudnnDestroyFilterDescriptor( cudnnFilterDescriptor_t filterDesc);
+cudnnStatus_t
+ cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc );
+cudnnStatus_t
+ cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc,
+ int arrayLength, /* nbDims-2 size */
+ const int padA[],
+ const int filterStrideA[],
+ const int upscaleA[],
+ cudnnConvolutionMode_t mode
+ );
+
+cudnnStatus_t
+ cudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc,
+ int arrayLengthRequested,
+ int *arrayLength,
+ int padA[],
+ int strideA[],
+ int upscaleA[],
+ cudnnConvolutionMode_t *mode
+ );
-cudnnStatus_t cudnnDestroyFilterDescriptor( cudnnFilterDescriptor_t filterDesc );
-
-cudnnStatus_t cudnnCreateConvolutionDescriptor( cudnnConvolutionDescriptor_t *convDesc );
-cudnnStatus_t cudnnSetConvolutionNdDescriptor( cudnnConvolutionDescriptor_t convDesc,
- int arrayLength, /* nbDims-2 size */
- const int padA[],
- const int filterStrideA[],
- const int upscaleA[],
- cudnnConvolutionMode_t mode
- );
-
-cudnnStatus_t cudnnGetConvolutionNdDescriptor( const cudnnConvolutionDescriptor_t convDesc,
- int arrayLengthRequested,
- int *arrayLength,
- int padA[],
- int strideA[],
- int upscaleA[],
- cudnnConvolutionMode_t *mode
- );
-
-
-/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
-cudnnStatus_t cudnnGetConvolutionNdForwardOutputDim( const cudnnConvolutionDescriptor_t convDesc,
- const cudnnTensorDescriptor_t inputTensorDesc,
- const cudnnFilterDescriptor_t filterDesc,
- int nbDims,
- int tensorOuputDimA[]
- );
+
+cudnnStatus_t
+ cudnnGetConvolutionNdForwardOutputDim(
+ const cudnnConvolutionDescriptor_t convDesc,
+ const cudnnTensorDescriptor_t inputTensorDesc,
+ const cudnnFilterDescriptor_t filterDesc,
+ int nbDims,
+ int tensorOuputDimA[]
+ );
/* Destroy an instance of convolution descriptor */
-cudnnStatus_t cudnnDestroyConvolutionDescriptor( cudnnConvolutionDescriptor_t convDesc );
+cudnnStatus_t cudnnDestroyConvolutionDescriptor(
+ cudnnConvolutionDescriptor_t convDesc );
typedef enum
{
CUDNN_CONVOLUTION_FWD_NO_WORKSPACE = 0,
CUDNN_CONVOLUTION_FWD_PREFER_FASTEST = 1,
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT = 2
-} cudnnConvolutionFwdPreference_t;
-
+} cudnnConvolutionFwdPreference_t;
+
typedef enum
{
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0,
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1,
CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2,
- CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3
+ CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3
} cudnnConvolutionFwdAlgo_t;
-cudnnStatus_t cudnnGetConvolutionForwardAlgorithm( cudnnHandle_t handle,
- const cudnnTensorDescriptor_t srcDesc,
- const cudnnFilterDescriptor_t filterDesc,
- const cudnnConvolutionDescriptor_t convDesc,
- const cudnnTensorDescriptor_t destDesc,
- cudnnConvolutionFwdPreference_t preference,
- size_t memoryLimitInbytes,
- cudnnConvolutionFwdAlgo_t *algo
- );
-
+cudnnStatus_t cudnnGetConvolutionForwardAlgorithm( cudnnHandle_t handle,
+ const cudnnTensorDescriptor_t srcDesc,
+ const cudnnFilterDescriptor_t filterDesc,
+ const cudnnConvolutionDescriptor_t convDesc,
+ const cudnnTensorDescriptor_t destDesc,
+ cudnnConvolutionFwdPreference_t preference,
+ size_t memoryLimitInbytes,
+ cudnnConvolutionFwdAlgo_t *algo
+ );
+
/*
* convolution algorithm (which requires potentially some workspace)
*/
- /* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
-cudnnStatus_t cudnnGetConvolutionForwardWorkspaceSize( cudnnHandle_t handle,
- const cudnnTensorDescriptor_t srcDesc,
- const cudnnFilterDescriptor_t filterDesc,
- const cudnnConvolutionDescriptor_t convDesc,
- const cudnnTensorDescriptor_t destDesc,
- cudnnConvolutionFwdAlgo_t algo,
- size_t *sizeInBytes
- );
-
+cudnnStatus_t cudnnGetConvolutionForwardWorkspaceSize( cudnnHandle_t handle,
+ const cudnnTensorDescriptor_t srcDesc,
+ const cudnnFilterDescriptor_t filterDesc,
+ const cudnnConvolutionDescriptor_t convDesc,
+ const cudnnTensorDescriptor_t destDesc,
+ cudnnConvolutionFwdAlgo_t algo,
+ size_t *sizeInBytes
+ );
-/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */
/* Function to perform the forward multiconvolution */
-cudnnStatus_t cudnnConvolutionForward( cudnnHandle_t handle,
- const void *alpha,
- const cudnnTensorDescriptor_t srcDesc,
- const void *srcData,
- const cudnnFilterDescriptor_t filterDesc,
- const void *filterData,
- const cudnnConvolutionDescriptor_t convDesc,
- cudnnConvolutionFwdAlgo_t algo,
- void *workSpace,
- size_t workSpaceSizeInBytes,
- const void *beta,
- const cudnnTensorDescriptor_t destDesc,
- void *destData
+cudnnStatus_t cudnnConvolutionForward(cudnnHandle_t handle,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const cudnnFilterDescriptor_t filterDesc,
+ const void *filterData,
+ const cudnnConvolutionDescriptor_t convDesc,
+ cudnnConvolutionFwdAlgo_t algo,
+ void *workSpace,
+ size_t workSpaceSizeInBytes,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDesc,
+ void *destData
);
/* Functions to perform the backward multiconvolution */
-cudnnStatus_t cudnnConvolutionBackwardBias( cudnnHandle_t handle,
- const void *alpha,
- const cudnnTensorDescriptor_t srcDesc,
- const void *srcData,
- const void *beta,
- const cudnnTensorDescriptor_t destDesc,
- void *destData
+cudnnStatus_t cudnnConvolutionBackwardBias( cudnnHandle_t handle,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDesc,
+ void *destData
);
-
-
-
-cudnnStatus_t cudnnConvolutionBackwardFilter( cudnnHandle_t handle,
- const void *alpha,
- const cudnnTensorDescriptor_t srcDesc,
- const void *srcData,
- const cudnnTensorDescriptor_t diffDesc,
- const void *diffData,
- const cudnnConvolutionDescriptor_t convDesc,
- const void *beta,
- const cudnnFilterDescriptor_t gradDesc,
- void *gradData
- );
-
-
-cudnnStatus_t cudnnConvolutionBackwardData( cudnnHandle_t handle,
- const void *alpha,
- const cudnnFilterDescriptor_t filterDesc,
- const void *filterData,
- const cudnnTensorDescriptor_t diffDesc,
- const void *diffData,
- const cudnnConvolutionDescriptor_t convDesc,
- const void *beta,
- const cudnnTensorDescriptor_t gradDesc,
- void *gradData
- );
+
+
+
+cudnnStatus_t cudnnConvolutionBackwardFilter( cudnnHandle_t handle,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const cudnnTensorDescriptor_t diffDesc,
+ const void *diffData,
+ const cudnnConvolutionDescriptor_t convDesc,
+ const void *beta,
+ const cudnnFilterDescriptor_t gradDesc,
+ void *gradData
+ );
+
+
+cudnnStatus_t cudnnConvolutionBackwardData( cudnnHandle_t handle,
+ const void *alpha,
+ const cudnnFilterDescriptor_t filterDesc,
+ const void *filterData,
+ const cudnnTensorDescriptor_t diffDesc,
+ const void *diffData,
+ const cudnnConvolutionDescriptor_t convDesc,
+ const void *beta,
+ const cudnnTensorDescriptor_t gradDesc,
+ void *gradData
+ );
/*
@@ -238,43 +237,41 @@ cudnnStatus_t cudnnConvolutionBackwardData( cudnnHandle_t
*/
typedef enum
{
- CUDNN_SOFTMAX_FAST = 0, /* straightforward implementation */
- CUDNN_SOFTMAX_ACCURATE = 1 /* subtract max from every point to avoid overflow */
+ CUDNN_SOFTMAX_FAST = 0,
+ CUDNN_SOFTMAX_ACCURATE = 1
} cudnnSoftmaxAlgorithm_t;
typedef enum
{
- CUDNN_SOFTMAX_MODE_INSTANCE = 0, /* compute the softmax over all C, H, W for each N */
- CUDNN_SOFTMAX_MODE_CHANNEL = 1 /* compute the softmax over all C for each H, W, N */
+ CUDNN_SOFTMAX_MODE_INSTANCE = 0,
+ CUDNN_SOFTMAX_MODE_CHANNEL = 1
} cudnnSoftmaxMode_t;
-/* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */
-
/* Function to perform forward softmax */
cudnnStatus_t cudnnSoftmaxForward( cudnnHandle_t handle,
- cudnnSoftmaxAlgorithm_t algorithm,
- cudnnSoftmaxMode_t mode,
- const void *alpha,
- const cudnnTensorDescriptor_t srcDesc,
- const void *srcData,
- const void *beta,
- const cudnnTensorDescriptor_t destDesc,
- void *destData
- );
+ cudnnSoftmaxAlgorithm_t algorithm,
+ cudnnSoftmaxMode_t mode,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDesc,
+ void *destData
+ );
/* Function to perform backward softmax */
cudnnStatus_t cudnnSoftmaxBackward( cudnnHandle_t handle,
- cudnnSoftmaxAlgorithm_t algorithm,
- cudnnSoftmaxMode_t mode,
- const void *alpha,
- const cudnnTensorDescriptor_t srcDesc,
- const void *srcData,
- const cudnnTensorDescriptor_t srcDiffDesc,
- const void *srcDiffData,
- const void *beta,
- const cudnnTensorDescriptor_t destDiffDesc,
- void *destDiffData
- );
+ cudnnSoftmaxAlgorithm_t algorithm,
+ cudnnSoftmaxMode_t mode,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const cudnnTensorDescriptor_t srcDiffDesc,
+ const void *srcDiffData,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDiffDesc,
+ void *destDiffData
+ );
@@ -285,56 +282,60 @@ typedef enum
} cudnnPoolingMode_t;
/* Create an instance of pooling descriptor */
-cudnnStatus_t cudnnCreatePoolingDescriptor( cudnnPoolingDescriptor_t *poolingDesc);
-cudnnStatus_t cudnnSetPoolingNdDescriptor( cudnnPoolingDescriptor_t poolingDesc,
- const cudnnPoolingMode_t mode,
- int nbDims,
- const int windowDimA[],
- const int paddingA[],
- const int strideA[]
+cudnnStatus_t cudnnCreatePoolingDescriptor(
+ cudnnPoolingDescriptor_t *poolingDesc);
+cudnnStatus_t cudnnSetPoolingNdDescriptor(
+ cudnnPoolingDescriptor_t poolingDesc,
+ const cudnnPoolingMode_t mode,
+ int nbDims,
+ const int windowDimA[],
+ const int paddingA[],
+ const int strideA[]
);
-cudnnStatus_t cudnnGetPoolingNdDescriptor( const cudnnPoolingDescriptor_t poolingDesc,
- const int nbDimsRequested,
- cudnnPoolingMode_t *mode,
- int *nbDims,
- int windowDimA[],
- int paddingA[],
- int strideA[]
- );
-
-cudnnStatus_t cudnnGetPoolingNdForwardOutputDim( const cudnnPoolingDescriptor_t poolingDesc,
- const cudnnTensorDescriptor_t inputTensorDesc,
- int nbDims,
- int outputTensorDimA[]);
+cudnnStatus_t cudnnGetPoolingNdDescriptor(
+ const cudnnPoolingDescriptor_t poolingDesc,
+ const int nbDimsRequested,
+ cudnnPoolingMode_t *mode,
+ int *nbDims,
+ int windowDimA[],
+ int paddingA[],
+ int strideA[]
+ );
+
+cudnnStatus_t cudnnGetPoolingNdForwardOutputDim(
+ const cudnnPoolingDescriptor_t poolingDesc,
+ const cudnnTensorDescriptor_t inputTensorDesc,
+ int nbDims,
+ int outputTensorDimA[]);
/* Destroy an instance of pooling descriptor */
-cudnnStatus_t cudnnDestroyPoolingDescriptor( cudnnPoolingDescriptor_t poolingDesc );
-/* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */
+cudnnStatus_t cudnnDestroyPoolingDescriptor(
+ cudnnPoolingDescriptor_t poolingDesc );
/* Function to perform forward pooling */
cudnnStatus_t cudnnPoolingForward( cudnnHandle_t handle,
- const cudnnPoolingDescriptor_t poolingDesc,
- const void *alpha,
- const cudnnTensorDescriptor_t srcDesc,
- const void *srcData,
- const void *beta,
- const cudnnTensorDescriptor_t destDesc,
- void *destData
- );
+ const cudnnPoolingDescriptor_t poolingDesc,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDesc,
+ void *destData
+ );
/* Function to perform backward pooling */
cudnnStatus_t cudnnPoolingBackward( cudnnHandle_t handle,
- const cudnnPoolingDescriptor_t poolingDesc,
- const void *alpha,
- const cudnnTensorDescriptor_t srcDesc,
- const void *srcData,
- const cudnnTensorDescriptor_t srcDiffDesc,
- const void *srcDiffData,
- const cudnnTensorDescriptor_t destDesc,
- const void *destData,
- const void *beta,
- const cudnnTensorDescriptor_t destDiffDesc,
- void *destDiffData
+ const cudnnPoolingDescriptor_t poolingDesc,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const cudnnTensorDescriptor_t srcDiffDesc,
+ const void *srcDiffData,
+ const cudnnTensorDescriptor_t destDesc,
+ const void *destData,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDiffDesc,
+ void *destDiffData
);
typedef enum
@@ -346,36 +347,35 @@ typedef enum
/* Function to perform forward activation */
cudnnStatus_t cudnnActivationForward( cudnnHandle_t handle,
- cudnnActivationMode_t mode,
- const void *alpha,
- const cudnnTensorDescriptor_t srcDesc,
- const void *srcData,
- const void *beta,
- const cudnnTensorDescriptor_t destDesc,
- void *destData
- );
+ cudnnActivationMode_t mode,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDesc,
+ void *destData
+ );
/* Function to perform backward activation */
cudnnStatus_t cudnnActivationBackward( cudnnHandle_t handle,
- cudnnActivationMode_t mode,
- const void *alpha,
- const cudnnTensorDescriptor_t srcDesc,
- const void *srcData,
- const cudnnTensorDescriptor_t srcDiffDesc,
- const void *srcDiffData,
- const cudnnTensorDescriptor_t destDesc,
- const void *destData,
- const void *beta,
- const cudnnTensorDescriptor_t destDiffDesc,
- void *destDiffData
- );
+ cudnnActivationMode_t mode,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const cudnnTensorDescriptor_t srcDiffDesc,
+ const void *srcDiffData,
+ const cudnnTensorDescriptor_t destDesc,
+ const void *destData,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDiffDesc,
+ void *destDiffData
+ );
]]
-local ok
-ok,err = pcall(function() cudnn.C = ffi.load('libcudnn') end)
+local ok,err = pcall(function() cudnn.C = ffi.load('libcudnn') end)
if not ok then
print(err)
- error([['libcudnn.so not found in library path.
+ error([['libcudnn.so not found in library path.
Please install CuDNN from https://developer.nvidia.com/cuDNN
Then make sure all the files named as libcudnn.so* are placed in your library load path (for example /usr/local/lib , or manually add a path to LD_LIBRARY_PATH)
]])
diff --git a/test/test.lua b/test/test.lua
index 9931431..03f7c2f 100644
--- a/test/test.lua
+++ b/test/test.lua
@@ -179,7 +179,7 @@ function cudnntest.VolumetricConvolution_forward_single()
local inj = (outj-1)*sj+kj
local ink = (outk-1)*sk+kk
local input = torch.randn(from,ink,inj,ini):cuda()
- local sconv = nn.VolumetricConvolution(from,to,kk,ki,kj,sk,si,sj):float() --:cuda()
+ local sconv = nn.VolumetricConvolution(from,to,kk,ki,kj,sk,si,sj):float()
local groundtruth = sconv:forward(input:float())
cutorch.synchronize()
local gconv = cudnn.VolumetricConvolution(from,to,kk,ki,kj,sk,si,sj):cuda()
@@ -188,7 +188,8 @@ function cudnntest.VolumetricConvolution_forward_single()
local rescuda = gconv:forward(input)
cutorch.synchronize()
local error = rescuda:float() - groundtruth:float()
- mytester:assertlt(error:abs():max(), precision_forward, 'error on state (forward) ')
+ mytester:assertlt(error:abs():max(), precision_forward,
+ 'error on state (forward) ')
end
function cudnntest.VolumetricConvolution_backward_single()
@@ -208,8 +209,8 @@ function cudnntest.VolumetricConvolution_backward_single()
local ink = (outk-1)*sk+kk
local input = torch.randn(from,ink,inj,ini):cuda()
local gradOutput = torch.randn(to,outk,outj,outi):cuda()
- local sconv = nn.VolumetricConvolution(from,to,kk,ki,kj,sk,si,sj):float() --:cuda()
- local groundtruth = sconv:forward(input:float())
+ local sconv = nn.VolumetricConvolution(from,to,kk,ki,kj,sk,si,sj):float()
+ sconv:forward(input:float())
sconv:zeroGradParameters()
local groundgrad = sconv:backward(input:float(), gradOutput:float())
cutorch.synchronize()