Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/soumith/cudnn.torch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSoumith Chintala <soumith@gmail.com>2014-12-19 02:36:12 +0300
committerSoumith Chintala <soumith@gmail.com>2014-12-19 23:28:13 +0300
commitad958c0e268d876ee4d713510b8c3ef83b37bca0 (patch)
tree0defbe1196f778c9fb3f79f5f6e7a1da9ae92cda /ffi.lua
parentd290c4cb9d632120d3fba97caefb3afb961081bf (diff)
everything works with R2. all unit tests pass. Maxpooling has free zero-padding
Diffstat (limited to 'ffi.lua')
-rw-r--r--ffi.lua461
1 files changed, 296 insertions, 165 deletions
diff --git a/ffi.lua b/ffi.lua
index 8a68ac6..24eb33e 100644
--- a/ffi.lua
+++ b/ffi.lua
@@ -18,31 +18,40 @@ typedef enum
CUDNN_STATUS_LICENSE_ERROR = 10
} cudnnStatus_t;
+const char * cudnnGetErrorString(cudnnStatus_t status);
+
typedef struct CUstream_st *cudaStream_t;
cudnnStatus_t cudnnCreate(cudnnHandle_t *handle);
cudnnStatus_t cudnnDestroy(cudnnHandle_t handle);
-typedef struct cudnnTensor4dStruct* cudnnTensor4dDescriptor_t;
+cudnnStatus_t cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
+cudnnStatus_t cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId);
+
+typedef struct cudnnTensorStruct* cudnnTensorDescriptor_t;
typedef struct cudnnConvolutionStruct* cudnnConvolutionDescriptor_t;
typedef struct cudnnPoolingStruct* cudnnPoolingDescriptor_t;
typedef struct cudnnFilterStruct* cudnnFilterDescriptor_t;
+
typedef enum
{
CUDNN_DATA_FLOAT = 0,
CUDNN_DATA_DOUBLE = 1
} cudnnDataType_t;
-cudnnStatus_t cudnnCreateTensor4dDescriptor( cudnnTensor4dDescriptor_t *tensorDesc );
-cudnnStatus_t cudnnSetTensor4dDescriptorEx( cudnnTensor4dDescriptor_t tensorDesc,
- cudnnDataType_t dataType, // image data type
- int n, // number of inputs (batch size)
- int c, // number of input feature maps
- int h, // height of input section
- int w, // width of input section
- int nStride,
- int cStride,
- int hStride,
- int wStride
- );
-cudnnStatus_t cudnnDestroyTensor4dDescriptor( cudnnTensor4dDescriptor_t tensorDesc );
+
+typedef enum
+{
+ CUDNN_TENSOR_NCHW = 0, /* row major (wStride = 1, hStride = w) */
+ CUDNN_TENSOR_NHWC = 1 /* feature maps interleaved ( cStride = 1 )*/
+} cudnnTensorFormat_t;
+
+cudnnStatus_t cudnnCreateTensorDescriptor( cudnnTensorDescriptor_t *tensorDesc );
+cudnnStatus_t cudnnSetTensorNdDescriptor( cudnnTensorDescriptor_t tensorDesc,
+ cudnnDataType_t dataType,
+ int nbDims,
+ const int dimA[],
+ const int strideA[]
+ );
+cudnnStatus_t cudnnDestroyTensorDescriptor( cudnnTensorDescriptor_t tensorDesc );
+
typedef enum
{
CUDNN_ADD_IMAGE = 0,
@@ -52,19 +61,37 @@ typedef enum
CUDNN_ADD_SAME_C = 2,
CUDNN_ADD_FULL_TENSOR = 3
} cudnnAddMode_t;
-cudnnStatus_t cudnnAddTensor4d( cudnnHandle_t handle,
- cudnnAddMode_t mode,
- const void *alpha,
- cudnnTensor4dDescriptor_t biasDesc,
- const void *biasData,
- cudnnTensor4dDescriptor_t srcDestDesc,
- void *srcDestData
+/* Tensor Bias addition : srcDest = alpha * bias + beta * srcDestDesc */
+cudnnStatus_t cudnnAddTensor( cudnnHandle_t handle,
+ cudnnAddMode_t mode,
+ const void *alpha,
+ const cudnnTensorDescriptor_t biasDesc,
+ const void *biasData,
+ const void *beta,
+ cudnnTensorDescriptor_t srcDestDesc,
+ void *srcDestData
);
+
+/* Set all data points of a tensor to a given value : srcDest = value */
+cudnnStatus_t cudnnSetTensor( cudnnHandle_t handle,
+ const cudnnTensorDescriptor_t srcDestDesc,
+ void *srcDestData,
+ const void *value
+ );
+
+/* Set all data points of a tensor to a given value : srcDest = alpha * srcDest */
+cudnnStatus_t cudnnScaleTensor( cudnnHandle_t handle,
+ const cudnnTensorDescriptor_t srcDestDesc,
+ void *srcDestData,
+ const void *alpha
+ );
+
typedef enum
{
CUDNN_CONVOLUTION = 0,
CUDNN_CROSS_CORRELATION = 1
} cudnnConvolutionMode_t;
+
typedef enum
{
CUDNN_CONVOLUTION_FWD = 0, /* Tensor Convolution function */
@@ -72,178 +99,282 @@ typedef enum
CUDNN_CONVOLUTION_DATA_GRAD = 2 /* Data Gradient update function */
} cudnnConvolutionPath_t;
cudnnStatus_t cudnnCreateFilterDescriptor( cudnnFilterDescriptor_t *filterDesc );
-cudnnStatus_t cudnnSetFilterDescriptor( cudnnFilterDescriptor_t filterDesc,
+cudnnStatus_t cudnnSetFilterNdDescriptor( cudnnFilterDescriptor_t filterDesc,
cudnnDataType_t dataType, // image data type
- int k, // number of output feature maps
- int c, // number of input feature maps
- int h, // height of each input filter
- int w // width of each input fitler
- );
-cudnnStatus_t cudnnDestroyFilterDescriptor( cudnnFilterDescriptor_t filterDesc );
+ int nbDims,
+ const int filterDimA[]
+ );
+
+cudnnStatus_t cudnnDestroyFilterDescriptor( cudnnFilterDescriptor_t filterDesc );
+
cudnnStatus_t cudnnCreateConvolutionDescriptor( cudnnConvolutionDescriptor_t *convDesc );
-cudnnStatus_t cudnnSetConvolutionDescriptor( cudnnConvolutionDescriptor_t convDesc,
- cudnnTensor4dDescriptor_t inputTensorDesc,
- cudnnFilterDescriptor_t filterDesc,
- int pad_h, // zero-padding height
- int pad_w, // zero-padding width
- int u, // vertical filter stride
- int v, // horizontal filter stride
- int upscalex, // upscale the input in x-direction
- int upscaley, // upscale the input in y-direction
- cudnnConvolutionMode_t mode
- );
-cudnnStatus_t cudnnGetOutputTensor4dDim( const cudnnConvolutionDescriptor_t convDesc,
- cudnnConvolutionPath_t path,
- int *n,
- int *c,
- int *h,
- int *w
- );
-cudnnStatus_t cudnnDestroyConvolutionDescriptor( cudnnConvolutionDescriptor_t convDesc );
+cudnnStatus_t cudnnSetConvolutionNdDescriptor( cudnnConvolutionDescriptor_t convDesc,
+ int arrayLength, /* nbDims-2 size */
+ const int padA[],
+ const int filterStrideA[],
+ const int upscaleA[],
+ cudnnConvolutionMode_t mode
+ );
+
+cudnnStatus_t cudnnGetConvolutionNdDescriptor( const cudnnConvolutionDescriptor_t convDesc,
+ int arrayLengthRequested,
+ int *arrayLength,
+ int padA[],
+ int strideA[],
+ int upscaleA[],
+ cudnnConvolutionMode_t *mode
+ );
+
+
+/* Helper function to return the dimensions of the output tensor given a convolution descriptor */
+cudnnStatus_t cudnnGetConvolutionNdForwardOutputDim( const cudnnConvolutionDescriptor_t convDesc,
+ const cudnnTensorDescriptor_t inputTensorDesc,
+ const cudnnFilterDescriptor_t filterDesc,
+ int nbDims,
+ int tensorOuputDimA[]
+ );
+
+/* Destroy an instance of convolution descriptor */
+cudnnStatus_t cudnnDestroyConvolutionDescriptor( cudnnConvolutionDescriptor_t convDesc );
+
+typedef enum
+{
+ CUDNN_CONVOLUTION_FWD_NO_WORKSPACE = 0,
+ CUDNN_CONVOLUTION_FWD_PREFER_FASTEST = 1,
+ CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT = 2
+} cudnnConvolutionFwdPreference_t;
+
typedef enum
{
- CUDNN_RESULT_ACCUMULATE = 0, /* Evaluate O += I * F */
- CUDNN_RESULT_NO_ACCUMULATE = 1 /* Evaluate O = I * F */
-} cudnnAccumulateResult_t;
-cudnnStatus_t cudnnConvolutionForward( cudnnHandle_t handle,
- cudnnTensor4dDescriptor_t srcDesc,
- const void *srcData,
- cudnnFilterDescriptor_t filterDesc,
- const void *filterData,
- cudnnConvolutionDescriptor_t convDesc,
- cudnnTensor4dDescriptor_t destDesc,
- void *destData,
- cudnnAccumulateResult_t accumulate
+ CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0,
+ CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1,
+ CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2,
+ CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3
+} cudnnConvolutionFwdAlgo_t;
+
+cudnnStatus_t cudnnGetConvolutionForwardAlgorithm( cudnnHandle_t handle,
+ const cudnnTensorDescriptor_t srcDesc,
+ const cudnnFilterDescriptor_t filterDesc,
+ const cudnnConvolutionDescriptor_t convDesc,
+ const cudnnTensorDescriptor_t destDesc,
+ cudnnConvolutionFwdPreference_t preference,
+ size_t memoryLimitInbytes,
+ cudnnConvolutionFwdAlgo_t *algo
+ );
+
+/*
+ * convolution algorithm (which requires potentially some workspace)
+ */
+
+ /* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/
+cudnnStatus_t cudnnGetConvolutionForwardWorkspaceSize( cudnnHandle_t handle,
+ const cudnnTensorDescriptor_t srcDesc,
+ const cudnnFilterDescriptor_t filterDesc,
+ const cudnnConvolutionDescriptor_t convDesc,
+ const cudnnTensorDescriptor_t destDesc,
+ cudnnConvolutionFwdAlgo_t algo,
+ size_t *sizeInBytes
+ );
+
+
+/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */
+
+/* Function to perform the forward multiconvolution */
+cudnnStatus_t cudnnConvolutionForward( cudnnHandle_t handle,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const cudnnFilterDescriptor_t filterDesc,
+ const void *filterData,
+ const cudnnConvolutionDescriptor_t convDesc,
+ cudnnConvolutionFwdAlgo_t algo,
+ void *workSpace,
+ size_t workSpaceSizeInBytes,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDesc,
+ void *destData
);
-cudnnStatus_t cudnnConvolutionBackwardBias( cudnnHandle_t handle,
- cudnnTensor4dDescriptor_t srcDesc,
- const void *srcData,
- cudnnTensor4dDescriptor_t destDesc,
- void *destData,
- cudnnAccumulateResult_t accumulate
+
+/* Functions to perform the backward multiconvolution */
+cudnnStatus_t cudnnConvolutionBackwardBias( cudnnHandle_t handle,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDesc,
+ void *destData
);
-cudnnStatus_t cudnnConvolutionBackwardFilter( cudnnHandle_t handle,
- cudnnTensor4dDescriptor_t srcDesc,
- const void *srcData,
- cudnnTensor4dDescriptor_t diffDesc,
- const void *diffData,
- cudnnConvolutionDescriptor_t convDesc,
- cudnnFilterDescriptor_t gradDesc,
- void *gradData,
- cudnnAccumulateResult_t accumulate
+
+
+
+cudnnStatus_t cudnnConvolutionBackwardFilter( cudnnHandle_t handle,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const cudnnTensorDescriptor_t diffDesc,
+ const void *diffData,
+ const cudnnConvolutionDescriptor_t convDesc,
+ const void *beta,
+ const cudnnFilterDescriptor_t gradDesc,
+ void *gradData
);
-cudnnStatus_t cudnnConvolutionBackwardData( cudnnHandle_t handle,
- cudnnFilterDescriptor_t filterDesc,
- const void *filterData,
- cudnnTensor4dDescriptor_t diffDesc,
- const void *diffData,
- cudnnConvolutionDescriptor_t convDesc,
- cudnnTensor4dDescriptor_t gradDesc,
- void *gradData,
- cudnnAccumulateResult_t accumulate
- );
+
+
+cudnnStatus_t cudnnConvolutionBackwardData( cudnnHandle_t handle,
+ const void *alpha,
+ const cudnnFilterDescriptor_t filterDesc,
+ const void *filterData,
+ const cudnnTensorDescriptor_t diffDesc,
+ const void *diffData,
+ const cudnnConvolutionDescriptor_t convDesc,
+ const void *beta,
+ const cudnnTensorDescriptor_t gradDesc,
+ void *gradData
+ );
+
+
+/*
+ * softmax algorithm
+ */
+typedef enum
+{
+ CUDNN_SOFTMAX_FAST = 0, /* straightforward implementation */
+ CUDNN_SOFTMAX_ACCURATE = 1 /* subtract max from every point to avoid overflow */
+} cudnnSoftmaxAlgorithm_t;
+
+typedef enum
+{
+ CUDNN_SOFTMAX_MODE_INSTANCE = 0, /* compute the softmax over all C, H, W for each N */
+ CUDNN_SOFTMAX_MODE_CHANNEL = 1 /* compute the softmax over all C for each H, W, N */
+} cudnnSoftmaxMode_t;
+
+/* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */
+
+/* Function to perform forward softmax */
+cudnnStatus_t cudnnSoftmaxForward( cudnnHandle_t handle,
+ cudnnSoftmaxAlgorithm_t algorithm,
+ cudnnSoftmaxMode_t mode,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDesc,
+ void *destData
+ );
+
+/* Function to perform backward softmax */
+cudnnStatus_t cudnnSoftmaxBackward( cudnnHandle_t handle,
+ cudnnSoftmaxAlgorithm_t algorithm,
+ cudnnSoftmaxMode_t mode,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const cudnnTensorDescriptor_t srcDiffDesc,
+ const void *srcDiffData,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDiffDesc,
+ void *destDiffData
+ );
+
+
+
typedef enum
{
CUDNN_POOLING_MAX = 0,
CUDNN_POOLING_AVERAGE = 1
} cudnnPoolingMode_t;
-cudnnStatus_t cudnnCreatePoolingDescriptor( cudnnPoolingDescriptor_t *poolingDesc);
-cudnnStatus_t cudnnSetPoolingDescriptor( cudnnPoolingDescriptor_t poolingDesc,
- cudnnPoolingMode_t mode,
- int windowHeight,
- int windowWidth,
- int verticalStride,
- int horizontalStride
+
+/* Create an instance of pooling descriptor */
+cudnnStatus_t cudnnCreatePoolingDescriptor( cudnnPoolingDescriptor_t *poolingDesc);
+cudnnStatus_t cudnnSetPoolingNdDescriptor( cudnnPoolingDescriptor_t poolingDesc,
+ const cudnnPoolingMode_t mode,
+ int nbDims,
+ const int windowDimA[],
+ const int paddingA[],
+ const int strideA[]
);
-cudnnStatus_t cudnnGetPoolingDescriptor( const cudnnPoolingDescriptor_t poolingDesc,
+
+cudnnStatus_t cudnnGetPoolingNdDescriptor( const cudnnPoolingDescriptor_t poolingDesc,
+ const int nbDimsRequested,
cudnnPoolingMode_t *mode,
- int *windowHeight,
- int *windowWidth,
- int *verticalStride,
- int *horizontalStride
- );
-cudnnStatus_t cudnnDestroyPoolingDescriptor( cudnnPoolingDescriptor_t poolingDesc );
-cudnnStatus_t cudnnPoolingForward( cudnnHandle_t handle,
- cudnnPoolingDescriptor_t poolingDesc,
- cudnnTensor4dDescriptor_t srcDesc,
- const void *srcData,
- cudnnTensor4dDescriptor_t destDesc,
- void *destData
+ int *nbDims,
+ int windowDimA[],
+ int paddingA[],
+ int strideA[]
+ );
+
+cudnnStatus_t cudnnGetPoolingNdForwardOutputDim( const cudnnPoolingDescriptor_t poolingDesc,
+ const cudnnTensorDescriptor_t inputTensorDesc,
+ int nbDims,
+ int outputTensorDimA[]);
+/* Destroy an instance of pooling descriptor */
+cudnnStatus_t cudnnDestroyPoolingDescriptor( cudnnPoolingDescriptor_t poolingDesc );
+/* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */
+
+/* Function to perform forward pooling */
+cudnnStatus_t cudnnPoolingForward( cudnnHandle_t handle,
+ const cudnnPoolingDescriptor_t poolingDesc,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDesc,
+ void *destData
);
-cudnnStatus_t cudnnPoolingBackward( cudnnHandle_t handle,
- cudnnPoolingDescriptor_t poolingDesc,
- cudnnTensor4dDescriptor_t srcDesc,
- const void *srcData,
- cudnnTensor4dDescriptor_t srcDiffDesc,
- const void *srcDiffData,
- cudnnTensor4dDescriptor_t destDesc,
- const void *destData,
- cudnnTensor4dDescriptor_t destDiffDesc,
- void *destDiffData
+
+/* Function to perform backward pooling */
+cudnnStatus_t cudnnPoolingBackward( cudnnHandle_t handle,
+ const cudnnPoolingDescriptor_t poolingDesc,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const cudnnTensorDescriptor_t srcDiffDesc,
+ const void *srcDiffData,
+ const cudnnTensorDescriptor_t destDesc,
+ const void *destData,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDiffDesc,
+ void *destDiffData
);
+
typedef enum
{
CUDNN_ACTIVATION_SIGMOID = 0,
CUDNN_ACTIVATION_RELU = 1,
CUDNN_ACTIVATION_TANH = 2
} cudnnActivationMode_t;
-cudnnStatus_t cudnnActivationForward( cudnnHandle_t handle,
- cudnnActivationMode_t mode,
- cudnnTensor4dDescriptor_t srcDesc,
- const void *srcData,
- cudnnTensor4dDescriptor_t destDesc,
- void *destData
- );
-cudnnStatus_t cudnnActivationBackward( cudnnHandle_t handle,
- cudnnActivationMode_t mode,
- cudnnTensor4dDescriptor_t srcDesc,
- const void *srcData,
- cudnnTensor4dDescriptor_t srcDiffDesc,
- const void *srcDiffData,
- cudnnTensor4dDescriptor_t destDesc,
- const void *destData,
- cudnnTensor4dDescriptor_t destDiffDesc,
- void *destDiffData
- );
-
-
-typedef enum
-{
- CUDNN_SOFTMAX_FAST = 0, CUDNN_SOFTMAX_ACCURATE = 1
-} cudnnSoftmaxAlgorithm_t;
-
-typedef enum
-{
- CUDNN_SOFTMAX_MODE_INSTANCE = 0, CUDNN_SOFTMAX_MODE_CHANNEL = 1
-} cudnnSoftmaxMode_t;
-
-
-cudnnStatus_t cudnnSoftmaxForward( cudnnHandle_t handle,
- cudnnSoftmaxAlgorithm_t algorithm,
- cudnnSoftmaxMode_t mode,
- cudnnTensor4dDescriptor_t srcDesc,
- const void *srcData,
- cudnnTensor4dDescriptor_t destDesc,
- void *destData
- );
-
-cudnnStatus_t cudnnSoftmaxBackward( cudnnHandle_t handle,
- cudnnSoftmaxAlgorithm_t algorithm,
- cudnnSoftmaxMode_t mode,
- cudnnTensor4dDescriptor_t srcDesc,
- const void *srcData,
- cudnnTensor4dDescriptor_t srcDiffDesc,
- const void *srcDiffData,
- cudnnTensor4dDescriptor_t destDiffDesc,
- void *destDiffData
- );
+/* Function to perform forward activation */
+cudnnStatus_t cudnnActivationForward( cudnnHandle_t handle,
+ cudnnActivationMode_t mode,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDesc,
+ void *destData
+ );
+/* Function to perform backward activation */
+cudnnStatus_t cudnnActivationBackward( cudnnHandle_t handle,
+ cudnnActivationMode_t mode,
+ const void *alpha,
+ const cudnnTensorDescriptor_t srcDesc,
+ const void *srcData,
+ const cudnnTensorDescriptor_t srcDiffDesc,
+ const void *srcDiffData,
+ const cudnnTensorDescriptor_t destDesc,
+ const void *destData,
+ const void *beta,
+ const cudnnTensorDescriptor_t destDiffDesc,
+ void *destDiffData
+ );
]]
local ok
-ok = pcall(function() cudnn.C = ffi.load('libcudnn') end)
+ok,err = pcall(function() cudnn.C = ffi.load('libcudnn') end)
if not ok then
+ print(err)
error([['libcudnn.so not found in library path.
Please install CuDNN from https://developer.nvidia.com/cuDNN
Then make sure all the files named as libcudnn.so* are placed in your library load path (for example /usr/local/lib , or manually add a path to LD_LIBRARY_PATH)