From 035863d94fb41b48ccd0babf7055c6bb719bbf8f Mon Sep 17 00:00:00 2001 From: soumith Date: Fri, 19 Dec 2014 23:30:08 -0800 Subject: lint fixes (80 columns) --- Pointwise.lua | 3 +- Pooling.lua | 5 +- SpatialConvolution.lua | 35 ++-- VolumetricConvolution.lua | 58 +++--- ffi.lua | 446 +++++++++++++++++++++++----------------------- test/test.lua | 9 +- 6 files changed, 293 insertions(+), 263 deletions(-) diff --git a/Pointwise.lua b/Pointwise.lua index aecfe08..56a374c 100644 --- a/Pointwise.lua +++ b/Pointwise.lua @@ -50,7 +50,8 @@ end function Pointwise:updateGradInput(input, gradOutput) assert((gradOutput:dim() == 4 or gradOutput:dim() == 3)); if not gradOutput:isContiguous() then - self._gradOutput = self._gradOutput or gradOutput.new():resizeAs(gradOutput) + self._gradOutput = self._gradOutput + or gradOutput.new():resizeAs(gradOutput) self._gradOutput:copy(gradOutput) gradOutput = self._gradOutput end diff --git a/Pooling.lua b/Pooling.lua index 87d56bf..af28a3c 100644 --- a/Pooling.lua +++ b/Pooling.lua @@ -84,7 +84,7 @@ local zero = torch.FloatTensor({0}); function Pooling:updateOutput(input) if not self.poolDesc then self:resetPoolDescriptors() end self:createIODescriptors(input) - errcheck('cudnnPoolingForward', cudnn.handle[cutorch.getDevice()-1], + errcheck('cudnnPoolingForward', cudnn.handle[cutorch.getDevice()-1], self.poolDesc[0], one:data(), self.iDesc[0], input:data(), @@ -102,7 +102,8 @@ function Pooling:updateGradInput(input, gradOutput) end if not self.poolDesc then self:resetPoolDescriptors() end self:createIODescriptors(input) - errcheck('cudnnPoolingBackward', cudnn.handle[cutorch.getDevice()-1], self.poolDesc[0], + errcheck('cudnnPoolingBackward', + cudnn.handle[cutorch.getDevice()-1], self.poolDesc[0], one:data(), self.oDesc[0], self.output:data(), self.oDesc[0], gradOutput:data(), diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua index 8292536..cb5464c 100644 --- a/SpatialConvolution.lua +++ b/SpatialConvolution.lua @@ -1,8 +1,10 @@ -local SpatialConvolution, parent = torch.class('cudnn.SpatialConvolution', 'nn.SpatialConvolution') +local SpatialConvolution, parent = + torch.class('cudnn.SpatialConvolution', 'nn.SpatialConvolution') local ffi = require 'ffi' local errcheck = cudnn.errcheck -function SpatialConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH) +function SpatialConvolution:__init(nInputPlane, nOutputPlane, + kW, kH, dW, dH, padW, padH) parent.__init(self, nInputPlane, nOutputPlane, kW, kH, dW, dH) self.padW = padW or 0 self.padH = padH or 0 @@ -12,12 +14,15 @@ end -- if you change the configuration of the module manually, call this function SpatialConvolution:resetWeightDescriptors() - assert(torch.typename(self.weight) == 'torch.CudaTensor', 'Only Cuda supported duh!') - assert(torch.typename(self.bias) == 'torch.CudaTensor', 'Only Cuda supported duh!') + assert(torch.typename(self.weight) == 'torch.CudaTensor', + 'Only Cuda supported duh!') + assert(torch.typename(self.bias) == 'torch.CudaTensor', + 'Only Cuda supported duh!') -- create filterDescriptor for weight self.weightDesc = ffi.new('struct cudnnFilterStruct*[1]') errcheck('cudnnCreateFilterDescriptor', self.weightDesc) - local desc = torch.IntTensor({self.nOutputPlane, self.nInputPlane, self.kH, self.kW}) + local desc = torch.IntTensor({self.nOutputPlane, self.nInputPlane, + self.kH, self.kW}) errcheck('cudnnSetFilterNdDescriptor', self.weightDesc[0], 'CUDNN_DATA_FLOAT', 4, desc:data()); @@ -27,7 +32,7 @@ function SpatialConvolution:resetWeightDescriptors() ffi.gc(self.weightDesc, destroyWDesc) -- create descriptor for bias - self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane, 1, 1)) + self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane,1,1)) end function SpatialConvolution:createIODescriptors(input) @@ -51,7 +56,8 @@ function SpatialConvolution:createIODescriptors(input) local pad = torch.IntTensor({self.padH, self.padW}) local stride = torch.IntTensor({self.dH, self.dW}) local upscale = torch.IntTensor({1,1}) - errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0], 2, pad:data(), + errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0], + 2, pad:data(), stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION'); local function destroyConvDesc(d) errcheck('cudnnDestroyConvolutionDescriptor', d[0]); @@ -61,7 +67,8 @@ function SpatialConvolution:createIODescriptors(input) -- create output descriptor and resize output local oSize = torch.IntTensor(4) local oSizeD = oSize:data() - errcheck('cudnnGetConvolutionNdForwardOutputDim', self.convDesc[0], self.iDesc[0], + errcheck('cudnnGetConvolutionNdForwardOutputDim', + self.convDesc[0], self.iDesc[0], self.weightDesc[0], 4, oSizeD) self.output:resize(oSize:long():storage()) -- create descriptor for output @@ -71,13 +78,15 @@ function SpatialConvolution:createIODescriptors(input) local algType = ffi.new("cudnnConvolutionFwdAlgo_t[?]", 1) errcheck('cudnnGetConvolutionForwardAlgorithm', cudnn.handle[cutorch.getDevice()-1], - self.iDesc[0], self.weightDesc[0], self.convDesc[0], self.oDesc[0], + self.iDesc[0], self.weightDesc[0], + self.convDesc[0], self.oDesc[0], 'CUDNN_CONVOLUTION_FWD_PREFER_FASTEST', -1, algType) self.algType = algType local bufSize = torch.LongTensor(1) errcheck('cudnnGetConvolutionForwardWorkspaceSize', cudnn.handle[cutorch.getDevice()-1], - self.iDesc[0], self.weightDesc[0], self.convDesc[0], self.oDesc[0], + self.iDesc[0], self.weightDesc[0], + self.convDesc[0], self.oDesc[0], algType[0], bufSize:data()) self.extraBuffer = self.extraBuffer or input.new(1) if bufSize[1] ~= 0 then self.extraBuffer:resize(bufSize[1]) end @@ -107,7 +116,8 @@ function SpatialConvolution:updateOutput(input) self.extraBuffer:data(), self.extraBuffer:nElement(), zero:data(), self.oDesc[0], self.output:data()); - errcheck('cudnnAddTensor', cudnn.handle[cutorch.getDevice()-1], 'CUDNN_ADD_SAME_C', + errcheck('cudnnAddTensor', cudnn.handle[cutorch.getDevice()-1], + 'CUDNN_ADD_SAME_C', one:data(), self.biasDesc[0], self.bias:data(), one:data(), self.oDesc[0], self.output:data()); return self.output @@ -131,7 +141,8 @@ end function SpatialConvolution:accGradParameters(input, gradOutput, scale) self.scaleT = self.scaleT or torch.FloatTensor(1):fill(1.0) - self.scaleT = self.scaleT:float() -- this line forces this member to always be on CPU (needed for cudnn) + -- this line forces this member to always be on CPU (needed for cudnn) + self.scaleT = self.scaleT:float() scale = scale or 1.0 self.scaleT[1] = scale assert((gradOutput:dim() == 3 or gradOutput:dim() == 4) diff --git a/VolumetricConvolution.lua b/VolumetricConvolution.lua index 05a08d4..74857e2 100644 --- a/VolumetricConvolution.lua +++ b/VolumetricConvolution.lua @@ -1,8 +1,12 @@ -local VolumetricConvolution, parent = torch.class('cudnn.VolumetricConvolution', 'nn.VolumetricConvolution') +local VolumetricConvolution, parent + = torch.class('cudnn.VolumetricConvolution', 'nn.VolumetricConvolution') local ffi = require 'ffi' local errcheck = cudnn.errcheck -function VolumetricConvolution:__init(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH) +function VolumetricConvolution:__init(nInputPlane, nOutputPlane, + kT, kW, kH, + dT, dW, dH, + padT, padW, padH) parent.__init(self, nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH) self.padT = padT or 0 self.padW = padW or 0 @@ -13,12 +17,15 @@ end -- if you change the configuration of the module manually, call this function VolumetricConvolution:resetWeightDescriptors() - assert(torch.typename(self.weight) == 'torch.CudaTensor', 'Only Cuda supported duh!') - assert(torch.typename(self.bias) == 'torch.CudaTensor', 'Only Cuda supported duh!') + assert(torch.typename(self.weight) == 'torch.CudaTensor', + 'Only Cuda supported duh!') + assert(torch.typename(self.bias) == 'torch.CudaTensor', + 'Only Cuda supported duh!') -- create filterDescriptor for weight self.weightDesc = ffi.new('struct cudnnFilterStruct*[1]') errcheck('cudnnCreateFilterDescriptor', self.weightDesc) - local desc = torch.IntTensor({self.nOutputPlane, self.nInputPlane, self.kT, self.kH, self.kW}) + local desc = torch.IntTensor({self.nOutputPlane, self.nInputPlane, + self.kT, self.kH, self.kW}) errcheck('cudnnSetFilterNdDescriptor', self.weightDesc[0], 'CUDNN_DATA_FLOAT', 5, desc:data()); @@ -28,13 +35,15 @@ function VolumetricConvolution:resetWeightDescriptors() ffi.gc(self.weightDesc, destroyWDesc) -- create descriptor for bias - self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane, 1, 1)) + self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane, + 1, 1)) end function VolumetricConvolution:createIODescriptors(input) local batch = true if input:dim() == 4 then - input = input:view(1, input:size(1), input:size(2), input:size(3), input:size(4)) + input = input:view(1, input:size(1), input:size(2), + input:size(3), input:size(4)) batch = false end assert(input:dim() == 5 and input:isContiguous()); @@ -53,7 +62,8 @@ function VolumetricConvolution:createIODescriptors(input) local pad = torch.IntTensor({self.padT, self.padH, self.padW}) local stride = torch.IntTensor({self.dT, self.dH, self.dW}) local upscale = torch.IntTensor({1,1,1}) - errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0], 3, pad:data(), + errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0], + 3, pad:data(), stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION'); local function destroyConvDesc(d) errcheck('cudnnDestroyConvolutionDescriptor', d[0]); @@ -63,28 +73,31 @@ function VolumetricConvolution:createIODescriptors(input) -- create output descriptor and resize output local oSize = torch.IntTensor(5) local oSizeD = oSize:data() - errcheck('cudnnGetConvolutionNdForwardOutputDim', self.convDesc[0], self.iDesc[0], + errcheck('cudnnGetConvolutionNdForwardOutputDim', + self.convDesc[0], self.iDesc[0], self.weightDesc[0], 5, oSizeD) self.output:resize(oSize:long():storage()) -- create descriptor for output self.oDesc = cudnn.toDescriptor(self.output) - self.oDescBias = cudnn.toDescriptor(self.output:view(self.output:size(1), - self.output:size(2), - self.output:size(3) - *self.output:size(4), - self.output:size(5))) + self.oDescBias = cudnn.toDescriptor( + self.output:view(self.output:size(1), + self.output:size(2), + self.output:size(3)*self.output:size(4), + self.output:size(5))) -- create forwardAlgorithm descriptors for local algType = ffi.new("cudnnConvolutionFwdAlgo_t[?]", 1) errcheck('cudnnGetConvolutionForwardAlgorithm', cudnn.handle[cutorch.getDevice()-1], - self.iDesc[0], self.weightDesc[0], self.convDesc[0], self.oDesc[0], - 'CUDNN_CONVOLUTION_FWD_PREFER_FASTEST', -1, algType) + self.iDesc[0], self.weightDesc[0], self.convDesc[0], + self.oDesc[0], 'CUDNN_CONVOLUTION_FWD_PREFER_FASTEST', + -1, algType) self.algType = algType local bufSize = torch.LongTensor(1) errcheck('cudnnGetConvolutionForwardWorkspaceSize', cudnn.handle[cutorch.getDevice()-1], - self.iDesc[0], self.weightDesc[0], self.convDesc[0], self.oDesc[0], + self.iDesc[0], self.weightDesc[0], + self.convDesc[0], self.oDesc[0], algType[0], bufSize:data()) self.extraBuffer = self.extraBuffer or input.new(1) if bufSize[1] ~= 0 then self.extraBuffer:resize(bufSize[1]) end @@ -116,8 +129,9 @@ function VolumetricConvolution:updateOutput(input) self.extraBuffer:data(), self.extraBuffer:nElement(), zero:data(), self.oDesc[0], self.output:data()); - errcheck('cudnnAddTensor', cudnn.handle[cutorch.getDevice()-1], 'CUDNN_ADD_SAME_C', - one:data(), self.biasDesc[0], self.bias:data(), one:data(), + errcheck('cudnnAddTensor', cudnn.handle[cutorch.getDevice()-1], + 'CUDNN_ADD_SAME_C', one:data(), + self.biasDesc[0], self.bias:data(), one:data(), self.oDescBias[0], self.output:data()); return self.output end @@ -140,7 +154,8 @@ end function VolumetricConvolution:accGradParameters(input, gradOutput, scale) self.scaleT = self.scaleT or torch.FloatTensor(1):fill(1.0) - self.scaleT = self.scaleT:float() -- this line forces this member to always be on CPU (needed for cudnn) + -- this line forces this member to always be on CPU (needed for cudnn) + self.scaleT = self.scaleT:float() scale = scale or 1.0 self.scaleT[1] = scale @@ -155,7 +170,8 @@ function VolumetricConvolution:accGradParameters(input, gradOutput, scale) one:data(), self.biasDesc[0], self.gradBias:data()); -- gradWeight - errcheck('cudnnConvolutionBackwardFilter', cudnn.handle[cutorch.getDevice()-1], + errcheck('cudnnConvolutionBackwardFilter', + cudnn.handle[cutorch.getDevice()-1], self.scaleT:data(), self.iDesc[0], input:data(), self.oDesc[0], gradOutput:data(), diff --git a/ffi.lua b/ffi.lua index 24eb33e..7f21f63 100644 --- a/ffi.lua +++ b/ffi.lua @@ -43,53 +43,51 @@ typedef enum CUDNN_TENSOR_NHWC = 1 /* feature maps interleaved ( cStride = 1 )*/ } cudnnTensorFormat_t; -cudnnStatus_t cudnnCreateTensorDescriptor( cudnnTensorDescriptor_t *tensorDesc ); +cudnnStatus_t cudnnCreateTensorDescriptor( cudnnTensorDescriptor_t *tensorDesc); cudnnStatus_t cudnnSetTensorNdDescriptor( cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims, const int dimA[], const int strideA[] ); -cudnnStatus_t cudnnDestroyTensorDescriptor( cudnnTensorDescriptor_t tensorDesc ); +cudnnStatus_t cudnnDestroyTensorDescriptor( cudnnTensorDescriptor_t tensorDesc); typedef enum { CUDNN_ADD_IMAGE = 0, - CUDNN_ADD_SAME_HW = 0, + CUDNN_ADD_SAME_HW = 0, CUDNN_ADD_FEATURE_MAP = 1, CUDNN_ADD_SAME_CHW = 1, CUDNN_ADD_SAME_C = 2, CUDNN_ADD_FULL_TENSOR = 3 -} cudnnAddMode_t; -/* Tensor Bias addition : srcDest = alpha * bias + beta * srcDestDesc */ -cudnnStatus_t cudnnAddTensor( cudnnHandle_t handle, - cudnnAddMode_t mode, - const void *alpha, - const cudnnTensorDescriptor_t biasDesc, - const void *biasData, - const void *beta, - cudnnTensorDescriptor_t srcDestDesc, - void *srcDestData - ); - -/* Set all data points of a tensor to a given value : srcDest = value */ +} cudnnAddMode_t; + +cudnnStatus_t cudnnAddTensor(cudnnHandle_t handle, + cudnnAddMode_t mode, + const void *alpha, + const cudnnTensorDescriptor_t biasDesc, + const void *biasData, + const void *beta, + cudnnTensorDescriptor_t srcDestDesc, + void *srcDestData + ); + cudnnStatus_t cudnnSetTensor( cudnnHandle_t handle, - const cudnnTensorDescriptor_t srcDestDesc, - void *srcDestData, - const void *value + const cudnnTensorDescriptor_t srcDestDesc, + void *srcDestData, + const void *value ); -/* Set all data points of a tensor to a given value : srcDest = alpha * srcDest */ -cudnnStatus_t cudnnScaleTensor( cudnnHandle_t handle, - const cudnnTensorDescriptor_t srcDestDesc, - void *srcDestData, - const void *alpha - ); +cudnnStatus_t cudnnScaleTensor(cudnnHandle_t handle, + const cudnnTensorDescriptor_t srcDestDesc, + void *srcDestData, + const void *alpha + ); typedef enum { - CUDNN_CONVOLUTION = 0, - CUDNN_CROSS_CORRELATION = 1 + CUDNN_CONVOLUTION = 0, + CUDNN_CROSS_CORRELATION = 1 } cudnnConvolutionMode_t; typedef enum @@ -98,139 +96,140 @@ typedef enum CUDNN_CONVOLUTION_WEIGHT_GRAD = 1, /* Weight Gradient update function */ CUDNN_CONVOLUTION_DATA_GRAD = 2 /* Data Gradient update function */ } cudnnConvolutionPath_t; -cudnnStatus_t cudnnCreateFilterDescriptor( cudnnFilterDescriptor_t *filterDesc ); -cudnnStatus_t cudnnSetFilterNdDescriptor( cudnnFilterDescriptor_t filterDesc, - cudnnDataType_t dataType, // image data type - int nbDims, - const int filterDimA[] - ); +cudnnStatus_t cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc); +cudnnStatus_t cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc, + cudnnDataType_t dataType, + int nbDims, + const int filterDimA[] + ); + +cudnnStatus_t cudnnDestroyFilterDescriptor( cudnnFilterDescriptor_t filterDesc); +cudnnStatus_t + cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc ); +cudnnStatus_t + cudnnSetConvolutionNdDescriptor(cudnnConvolutionDescriptor_t convDesc, + int arrayLength, /* nbDims-2 size */ + const int padA[], + const int filterStrideA[], + const int upscaleA[], + cudnnConvolutionMode_t mode + ); + +cudnnStatus_t + cudnnGetConvolutionNdDescriptor(const cudnnConvolutionDescriptor_t convDesc, + int arrayLengthRequested, + int *arrayLength, + int padA[], + int strideA[], + int upscaleA[], + cudnnConvolutionMode_t *mode + ); -cudnnStatus_t cudnnDestroyFilterDescriptor( cudnnFilterDescriptor_t filterDesc ); - -cudnnStatus_t cudnnCreateConvolutionDescriptor( cudnnConvolutionDescriptor_t *convDesc ); -cudnnStatus_t cudnnSetConvolutionNdDescriptor( cudnnConvolutionDescriptor_t convDesc, - int arrayLength, /* nbDims-2 size */ - const int padA[], - const int filterStrideA[], - const int upscaleA[], - cudnnConvolutionMode_t mode - ); - -cudnnStatus_t cudnnGetConvolutionNdDescriptor( const cudnnConvolutionDescriptor_t convDesc, - int arrayLengthRequested, - int *arrayLength, - int padA[], - int strideA[], - int upscaleA[], - cudnnConvolutionMode_t *mode - ); - - -/* Helper function to return the dimensions of the output tensor given a convolution descriptor */ -cudnnStatus_t cudnnGetConvolutionNdForwardOutputDim( const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - const cudnnFilterDescriptor_t filterDesc, - int nbDims, - int tensorOuputDimA[] - ); + +cudnnStatus_t + cudnnGetConvolutionNdForwardOutputDim( + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t inputTensorDesc, + const cudnnFilterDescriptor_t filterDesc, + int nbDims, + int tensorOuputDimA[] + ); /* Destroy an instance of convolution descriptor */ -cudnnStatus_t cudnnDestroyConvolutionDescriptor( cudnnConvolutionDescriptor_t convDesc ); +cudnnStatus_t cudnnDestroyConvolutionDescriptor( + cudnnConvolutionDescriptor_t convDesc ); typedef enum { CUDNN_CONVOLUTION_FWD_NO_WORKSPACE = 0, CUDNN_CONVOLUTION_FWD_PREFER_FASTEST = 1, CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT = 2 -} cudnnConvolutionFwdPreference_t; - +} cudnnConvolutionFwdPreference_t; + typedef enum { CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0, CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1, CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2, - CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3 + CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3 } cudnnConvolutionFwdAlgo_t; -cudnnStatus_t cudnnGetConvolutionForwardAlgorithm( cudnnHandle_t handle, - const cudnnTensorDescriptor_t srcDesc, - const cudnnFilterDescriptor_t filterDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t destDesc, - cudnnConvolutionFwdPreference_t preference, - size_t memoryLimitInbytes, - cudnnConvolutionFwdAlgo_t *algo - ); - +cudnnStatus_t cudnnGetConvolutionForwardAlgorithm( cudnnHandle_t handle, + const cudnnTensorDescriptor_t srcDesc, + const cudnnFilterDescriptor_t filterDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t destDesc, + cudnnConvolutionFwdPreference_t preference, + size_t memoryLimitInbytes, + cudnnConvolutionFwdAlgo_t *algo + ); + /* * convolution algorithm (which requires potentially some workspace) */ - /* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/ -cudnnStatus_t cudnnGetConvolutionForwardWorkspaceSize( cudnnHandle_t handle, - const cudnnTensorDescriptor_t srcDesc, - const cudnnFilterDescriptor_t filterDesc, - const cudnnConvolutionDescriptor_t convDesc, - const cudnnTensorDescriptor_t destDesc, - cudnnConvolutionFwdAlgo_t algo, - size_t *sizeInBytes - ); - +cudnnStatus_t cudnnGetConvolutionForwardWorkspaceSize( cudnnHandle_t handle, + const cudnnTensorDescriptor_t srcDesc, + const cudnnFilterDescriptor_t filterDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t destDesc, + cudnnConvolutionFwdAlgo_t algo, + size_t *sizeInBytes + ); -/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */ /* Function to perform the forward multiconvolution */ -cudnnStatus_t cudnnConvolutionForward( cudnnHandle_t handle, - const void *alpha, - const cudnnTensorDescriptor_t srcDesc, - const void *srcData, - const cudnnFilterDescriptor_t filterDesc, - const void *filterData, - const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionFwdAlgo_t algo, - void *workSpace, - size_t workSpaceSizeInBytes, - const void *beta, - const cudnnTensorDescriptor_t destDesc, - void *destData +cudnnStatus_t cudnnConvolutionForward(cudnnHandle_t handle, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const cudnnFilterDescriptor_t filterDesc, + const void *filterData, + const cudnnConvolutionDescriptor_t convDesc, + cudnnConvolutionFwdAlgo_t algo, + void *workSpace, + size_t workSpaceSizeInBytes, + const void *beta, + const cudnnTensorDescriptor_t destDesc, + void *destData ); /* Functions to perform the backward multiconvolution */ -cudnnStatus_t cudnnConvolutionBackwardBias( cudnnHandle_t handle, - const void *alpha, - const cudnnTensorDescriptor_t srcDesc, - const void *srcData, - const void *beta, - const cudnnTensorDescriptor_t destDesc, - void *destData +cudnnStatus_t cudnnConvolutionBackwardBias( cudnnHandle_t handle, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const void *beta, + const cudnnTensorDescriptor_t destDesc, + void *destData ); - - - -cudnnStatus_t cudnnConvolutionBackwardFilter( cudnnHandle_t handle, - const void *alpha, - const cudnnTensorDescriptor_t srcDesc, - const void *srcData, - const cudnnTensorDescriptor_t diffDesc, - const void *diffData, - const cudnnConvolutionDescriptor_t convDesc, - const void *beta, - const cudnnFilterDescriptor_t gradDesc, - void *gradData - ); - - -cudnnStatus_t cudnnConvolutionBackwardData( cudnnHandle_t handle, - const void *alpha, - const cudnnFilterDescriptor_t filterDesc, - const void *filterData, - const cudnnTensorDescriptor_t diffDesc, - const void *diffData, - const cudnnConvolutionDescriptor_t convDesc, - const void *beta, - const cudnnTensorDescriptor_t gradDesc, - void *gradData - ); + + + +cudnnStatus_t cudnnConvolutionBackwardFilter( cudnnHandle_t handle, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const cudnnTensorDescriptor_t diffDesc, + const void *diffData, + const cudnnConvolutionDescriptor_t convDesc, + const void *beta, + const cudnnFilterDescriptor_t gradDesc, + void *gradData + ); + + +cudnnStatus_t cudnnConvolutionBackwardData( cudnnHandle_t handle, + const void *alpha, + const cudnnFilterDescriptor_t filterDesc, + const void *filterData, + const cudnnTensorDescriptor_t diffDesc, + const void *diffData, + const cudnnConvolutionDescriptor_t convDesc, + const void *beta, + const cudnnTensorDescriptor_t gradDesc, + void *gradData + ); /* @@ -238,43 +237,41 @@ cudnnStatus_t cudnnConvolutionBackwardData( cudnnHandle_t */ typedef enum { - CUDNN_SOFTMAX_FAST = 0, /* straightforward implementation */ - CUDNN_SOFTMAX_ACCURATE = 1 /* subtract max from every point to avoid overflow */ + CUDNN_SOFTMAX_FAST = 0, + CUDNN_SOFTMAX_ACCURATE = 1 } cudnnSoftmaxAlgorithm_t; typedef enum { - CUDNN_SOFTMAX_MODE_INSTANCE = 0, /* compute the softmax over all C, H, W for each N */ - CUDNN_SOFTMAX_MODE_CHANNEL = 1 /* compute the softmax over all C for each H, W, N */ + CUDNN_SOFTMAX_MODE_INSTANCE = 0, + CUDNN_SOFTMAX_MODE_CHANNEL = 1 } cudnnSoftmaxMode_t; -/* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */ - /* Function to perform forward softmax */ cudnnStatus_t cudnnSoftmaxForward( cudnnHandle_t handle, - cudnnSoftmaxAlgorithm_t algorithm, - cudnnSoftmaxMode_t mode, - const void *alpha, - const cudnnTensorDescriptor_t srcDesc, - const void *srcData, - const void *beta, - const cudnnTensorDescriptor_t destDesc, - void *destData - ); + cudnnSoftmaxAlgorithm_t algorithm, + cudnnSoftmaxMode_t mode, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const void *beta, + const cudnnTensorDescriptor_t destDesc, + void *destData + ); /* Function to perform backward softmax */ cudnnStatus_t cudnnSoftmaxBackward( cudnnHandle_t handle, - cudnnSoftmaxAlgorithm_t algorithm, - cudnnSoftmaxMode_t mode, - const void *alpha, - const cudnnTensorDescriptor_t srcDesc, - const void *srcData, - const cudnnTensorDescriptor_t srcDiffDesc, - const void *srcDiffData, - const void *beta, - const cudnnTensorDescriptor_t destDiffDesc, - void *destDiffData - ); + cudnnSoftmaxAlgorithm_t algorithm, + cudnnSoftmaxMode_t mode, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const cudnnTensorDescriptor_t srcDiffDesc, + const void *srcDiffData, + const void *beta, + const cudnnTensorDescriptor_t destDiffDesc, + void *destDiffData + ); @@ -285,56 +282,60 @@ typedef enum } cudnnPoolingMode_t; /* Create an instance of pooling descriptor */ -cudnnStatus_t cudnnCreatePoolingDescriptor( cudnnPoolingDescriptor_t *poolingDesc); -cudnnStatus_t cudnnSetPoolingNdDescriptor( cudnnPoolingDescriptor_t poolingDesc, - const cudnnPoolingMode_t mode, - int nbDims, - const int windowDimA[], - const int paddingA[], - const int strideA[] +cudnnStatus_t cudnnCreatePoolingDescriptor( + cudnnPoolingDescriptor_t *poolingDesc); +cudnnStatus_t cudnnSetPoolingNdDescriptor( + cudnnPoolingDescriptor_t poolingDesc, + const cudnnPoolingMode_t mode, + int nbDims, + const int windowDimA[], + const int paddingA[], + const int strideA[] ); -cudnnStatus_t cudnnGetPoolingNdDescriptor( const cudnnPoolingDescriptor_t poolingDesc, - const int nbDimsRequested, - cudnnPoolingMode_t *mode, - int *nbDims, - int windowDimA[], - int paddingA[], - int strideA[] - ); - -cudnnStatus_t cudnnGetPoolingNdForwardOutputDim( const cudnnPoolingDescriptor_t poolingDesc, - const cudnnTensorDescriptor_t inputTensorDesc, - int nbDims, - int outputTensorDimA[]); +cudnnStatus_t cudnnGetPoolingNdDescriptor( + const cudnnPoolingDescriptor_t poolingDesc, + const int nbDimsRequested, + cudnnPoolingMode_t *mode, + int *nbDims, + int windowDimA[], + int paddingA[], + int strideA[] + ); + +cudnnStatus_t cudnnGetPoolingNdForwardOutputDim( + const cudnnPoolingDescriptor_t poolingDesc, + const cudnnTensorDescriptor_t inputTensorDesc, + int nbDims, + int outputTensorDimA[]); /* Destroy an instance of pooling descriptor */ -cudnnStatus_t cudnnDestroyPoolingDescriptor( cudnnPoolingDescriptor_t poolingDesc ); -/* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */ +cudnnStatus_t cudnnDestroyPoolingDescriptor( + cudnnPoolingDescriptor_t poolingDesc ); /* Function to perform forward pooling */ cudnnStatus_t cudnnPoolingForward( cudnnHandle_t handle, - const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, - const cudnnTensorDescriptor_t srcDesc, - const void *srcData, - const void *beta, - const cudnnTensorDescriptor_t destDesc, - void *destData - ); + const cudnnPoolingDescriptor_t poolingDesc, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const void *beta, + const cudnnTensorDescriptor_t destDesc, + void *destData + ); /* Function to perform backward pooling */ cudnnStatus_t cudnnPoolingBackward( cudnnHandle_t handle, - const cudnnPoolingDescriptor_t poolingDesc, - const void *alpha, - const cudnnTensorDescriptor_t srcDesc, - const void *srcData, - const cudnnTensorDescriptor_t srcDiffDesc, - const void *srcDiffData, - const cudnnTensorDescriptor_t destDesc, - const void *destData, - const void *beta, - const cudnnTensorDescriptor_t destDiffDesc, - void *destDiffData + const cudnnPoolingDescriptor_t poolingDesc, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const cudnnTensorDescriptor_t srcDiffDesc, + const void *srcDiffData, + const cudnnTensorDescriptor_t destDesc, + const void *destData, + const void *beta, + const cudnnTensorDescriptor_t destDiffDesc, + void *destDiffData ); typedef enum @@ -346,36 +347,35 @@ typedef enum /* Function to perform forward activation */ cudnnStatus_t cudnnActivationForward( cudnnHandle_t handle, - cudnnActivationMode_t mode, - const void *alpha, - const cudnnTensorDescriptor_t srcDesc, - const void *srcData, - const void *beta, - const cudnnTensorDescriptor_t destDesc, - void *destData - ); + cudnnActivationMode_t mode, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const void *beta, + const cudnnTensorDescriptor_t destDesc, + void *destData + ); /* Function to perform backward activation */ cudnnStatus_t cudnnActivationBackward( cudnnHandle_t handle, - cudnnActivationMode_t mode, - const void *alpha, - const cudnnTensorDescriptor_t srcDesc, - const void *srcData, - const cudnnTensorDescriptor_t srcDiffDesc, - const void *srcDiffData, - const cudnnTensorDescriptor_t destDesc, - const void *destData, - const void *beta, - const cudnnTensorDescriptor_t destDiffDesc, - void *destDiffData - ); + cudnnActivationMode_t mode, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const cudnnTensorDescriptor_t srcDiffDesc, + const void *srcDiffData, + const cudnnTensorDescriptor_t destDesc, + const void *destData, + const void *beta, + const cudnnTensorDescriptor_t destDiffDesc, + void *destDiffData + ); ]] -local ok -ok,err = pcall(function() cudnn.C = ffi.load('libcudnn') end) +local ok,err = pcall(function() cudnn.C = ffi.load('libcudnn') end) if not ok then print(err) - error([['libcudnn.so not found in library path. + error([['libcudnn.so not found in library path. Please install CuDNN from https://developer.nvidia.com/cuDNN Then make sure all the files named as libcudnn.so* are placed in your library load path (for example /usr/local/lib , or manually add a path to LD_LIBRARY_PATH) ]]) diff --git a/test/test.lua b/test/test.lua index 9931431..03f7c2f 100644 --- a/test/test.lua +++ b/test/test.lua @@ -179,7 +179,7 @@ function cudnntest.VolumetricConvolution_forward_single() local inj = (outj-1)*sj+kj local ink = (outk-1)*sk+kk local input = torch.randn(from,ink,inj,ini):cuda() - local sconv = nn.VolumetricConvolution(from,to,kk,ki,kj,sk,si,sj):float() --:cuda() + local sconv = nn.VolumetricConvolution(from,to,kk,ki,kj,sk,si,sj):float() local groundtruth = sconv:forward(input:float()) cutorch.synchronize() local gconv = cudnn.VolumetricConvolution(from,to,kk,ki,kj,sk,si,sj):cuda() @@ -188,7 +188,8 @@ function cudnntest.VolumetricConvolution_forward_single() local rescuda = gconv:forward(input) cutorch.synchronize() local error = rescuda:float() - groundtruth:float() - mytester:assertlt(error:abs():max(), precision_forward, 'error on state (forward) ') + mytester:assertlt(error:abs():max(), precision_forward, + 'error on state (forward) ') end function cudnntest.VolumetricConvolution_backward_single() @@ -208,8 +209,8 @@ function cudnntest.VolumetricConvolution_backward_single() local ink = (outk-1)*sk+kk local input = torch.randn(from,ink,inj,ini):cuda() local gradOutput = torch.randn(to,outk,outj,outi):cuda() - local sconv = nn.VolumetricConvolution(from,to,kk,ki,kj,sk,si,sj):float() --:cuda() - local groundtruth = sconv:forward(input:float()) + local sconv = nn.VolumetricConvolution(from,to,kk,ki,kj,sk,si,sj):float() + sconv:forward(input:float()) sconv:zeroGradParameters() local groundgrad = sconv:backward(input:float(), gradOutput:float()) cutorch.synchronize() -- cgit v1.2.3