diff options
author | Soumith Chintala <soumith@gmail.com> | 2014-12-19 02:36:12 +0300 |
---|---|---|
committer | Soumith Chintala <soumith@gmail.com> | 2014-12-19 23:28:13 +0300 |
commit | ad958c0e268d876ee4d713510b8c3ef83b37bca0 (patch) | |
tree | 0defbe1196f778c9fb3f79f5f6e7a1da9ae92cda /ffi.lua | |
parent | d290c4cb9d632120d3fba97caefb3afb961081bf (diff) |
everything works with R2. all unit tests pass. Maxpooling has free zero-padding
Diffstat (limited to 'ffi.lua')
-rw-r--r-- | ffi.lua | 461 |
1 files changed, 296 insertions, 165 deletions
@@ -18,31 +18,40 @@ typedef enum CUDNN_STATUS_LICENSE_ERROR = 10 } cudnnStatus_t; +const char * cudnnGetErrorString(cudnnStatus_t status); + typedef struct CUstream_st *cudaStream_t; cudnnStatus_t cudnnCreate(cudnnHandle_t *handle); cudnnStatus_t cudnnDestroy(cudnnHandle_t handle); -typedef struct cudnnTensor4dStruct* cudnnTensor4dDescriptor_t; +cudnnStatus_t cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId); +cudnnStatus_t cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId); + +typedef struct cudnnTensorStruct* cudnnTensorDescriptor_t; typedef struct cudnnConvolutionStruct* cudnnConvolutionDescriptor_t; typedef struct cudnnPoolingStruct* cudnnPoolingDescriptor_t; typedef struct cudnnFilterStruct* cudnnFilterDescriptor_t; + typedef enum { CUDNN_DATA_FLOAT = 0, CUDNN_DATA_DOUBLE = 1 } cudnnDataType_t; -cudnnStatus_t cudnnCreateTensor4dDescriptor( cudnnTensor4dDescriptor_t *tensorDesc ); -cudnnStatus_t cudnnSetTensor4dDescriptorEx( cudnnTensor4dDescriptor_t tensorDesc, - cudnnDataType_t dataType, // image data type - int n, // number of inputs (batch size) - int c, // number of input feature maps - int h, // height of input section - int w, // width of input section - int nStride, - int cStride, - int hStride, - int wStride - ); -cudnnStatus_t cudnnDestroyTensor4dDescriptor( cudnnTensor4dDescriptor_t tensorDesc ); + +typedef enum +{ + CUDNN_TENSOR_NCHW = 0, /* row major (wStride = 1, hStride = w) */ + CUDNN_TENSOR_NHWC = 1 /* feature maps interleaved ( cStride = 1 )*/ +} cudnnTensorFormat_t; + +cudnnStatus_t cudnnCreateTensorDescriptor( cudnnTensorDescriptor_t *tensorDesc ); +cudnnStatus_t cudnnSetTensorNdDescriptor( cudnnTensorDescriptor_t tensorDesc, + cudnnDataType_t dataType, + int nbDims, + const int dimA[], + const int strideA[] + ); +cudnnStatus_t cudnnDestroyTensorDescriptor( cudnnTensorDescriptor_t tensorDesc ); + typedef enum { CUDNN_ADD_IMAGE = 0, @@ -52,19 +61,37 @@ typedef enum CUDNN_ADD_SAME_C = 2, CUDNN_ADD_FULL_TENSOR = 3 } cudnnAddMode_t; -cudnnStatus_t cudnnAddTensor4d( cudnnHandle_t handle, - cudnnAddMode_t mode, - const void *alpha, - cudnnTensor4dDescriptor_t biasDesc, - const void *biasData, - cudnnTensor4dDescriptor_t srcDestDesc, - void *srcDestData +/* Tensor Bias addition : srcDest = alpha * bias + beta * srcDestDesc */ +cudnnStatus_t cudnnAddTensor( cudnnHandle_t handle, + cudnnAddMode_t mode, + const void *alpha, + const cudnnTensorDescriptor_t biasDesc, + const void *biasData, + const void *beta, + cudnnTensorDescriptor_t srcDestDesc, + void *srcDestData ); + +/* Set all data points of a tensor to a given value : srcDest = value */ +cudnnStatus_t cudnnSetTensor( cudnnHandle_t handle, + const cudnnTensorDescriptor_t srcDestDesc, + void *srcDestData, + const void *value + ); + +/* Set all data points of a tensor to a given value : srcDest = alpha * srcDest */ +cudnnStatus_t cudnnScaleTensor( cudnnHandle_t handle, + const cudnnTensorDescriptor_t srcDestDesc, + void *srcDestData, + const void *alpha + ); + typedef enum { CUDNN_CONVOLUTION = 0, CUDNN_CROSS_CORRELATION = 1 } cudnnConvolutionMode_t; + typedef enum { CUDNN_CONVOLUTION_FWD = 0, /* Tensor Convolution function */ @@ -72,178 +99,282 @@ typedef enum CUDNN_CONVOLUTION_DATA_GRAD = 2 /* Data Gradient update function */ } cudnnConvolutionPath_t; cudnnStatus_t cudnnCreateFilterDescriptor( cudnnFilterDescriptor_t *filterDesc ); -cudnnStatus_t cudnnSetFilterDescriptor( cudnnFilterDescriptor_t filterDesc, +cudnnStatus_t cudnnSetFilterNdDescriptor( cudnnFilterDescriptor_t filterDesc, cudnnDataType_t dataType, // image data type - int k, // number of output feature maps - int c, // number of input feature maps - int h, // height of each input filter - int w // width of each input fitler - ); -cudnnStatus_t cudnnDestroyFilterDescriptor( cudnnFilterDescriptor_t filterDesc ); + int nbDims, + const int filterDimA[] + ); + +cudnnStatus_t cudnnDestroyFilterDescriptor( cudnnFilterDescriptor_t filterDesc ); + cudnnStatus_t cudnnCreateConvolutionDescriptor( cudnnConvolutionDescriptor_t *convDesc ); -cudnnStatus_t cudnnSetConvolutionDescriptor( cudnnConvolutionDescriptor_t convDesc, - cudnnTensor4dDescriptor_t inputTensorDesc, - cudnnFilterDescriptor_t filterDesc, - int pad_h, // zero-padding height - int pad_w, // zero-padding width - int u, // vertical filter stride - int v, // horizontal filter stride - int upscalex, // upscale the input in x-direction - int upscaley, // upscale the input in y-direction - cudnnConvolutionMode_t mode - ); -cudnnStatus_t cudnnGetOutputTensor4dDim( const cudnnConvolutionDescriptor_t convDesc, - cudnnConvolutionPath_t path, - int *n, - int *c, - int *h, - int *w - ); -cudnnStatus_t cudnnDestroyConvolutionDescriptor( cudnnConvolutionDescriptor_t convDesc ); +cudnnStatus_t cudnnSetConvolutionNdDescriptor( cudnnConvolutionDescriptor_t convDesc, + int arrayLength, /* nbDims-2 size */ + const int padA[], + const int filterStrideA[], + const int upscaleA[], + cudnnConvolutionMode_t mode + ); + +cudnnStatus_t cudnnGetConvolutionNdDescriptor( const cudnnConvolutionDescriptor_t convDesc, + int arrayLengthRequested, + int *arrayLength, + int padA[], + int strideA[], + int upscaleA[], + cudnnConvolutionMode_t *mode + ); + + +/* Helper function to return the dimensions of the output tensor given a convolution descriptor */ +cudnnStatus_t cudnnGetConvolutionNdForwardOutputDim( const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t inputTensorDesc, + const cudnnFilterDescriptor_t filterDesc, + int nbDims, + int tensorOuputDimA[] + ); + +/* Destroy an instance of convolution descriptor */ +cudnnStatus_t cudnnDestroyConvolutionDescriptor( cudnnConvolutionDescriptor_t convDesc ); + +typedef enum +{ + CUDNN_CONVOLUTION_FWD_NO_WORKSPACE = 0, + CUDNN_CONVOLUTION_FWD_PREFER_FASTEST = 1, + CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT = 2 +} cudnnConvolutionFwdPreference_t; + typedef enum { - CUDNN_RESULT_ACCUMULATE = 0, /* Evaluate O += I * F */ - CUDNN_RESULT_NO_ACCUMULATE = 1 /* Evaluate O = I * F */ -} cudnnAccumulateResult_t; -cudnnStatus_t cudnnConvolutionForward( cudnnHandle_t handle, - cudnnTensor4dDescriptor_t srcDesc, - const void *srcData, - cudnnFilterDescriptor_t filterDesc, - const void *filterData, - cudnnConvolutionDescriptor_t convDesc, - cudnnTensor4dDescriptor_t destDesc, - void *destData, - cudnnAccumulateResult_t accumulate + CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0, + CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1, + CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2, + CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3 +} cudnnConvolutionFwdAlgo_t; + +cudnnStatus_t cudnnGetConvolutionForwardAlgorithm( cudnnHandle_t handle, + const cudnnTensorDescriptor_t srcDesc, + const cudnnFilterDescriptor_t filterDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t destDesc, + cudnnConvolutionFwdPreference_t preference, + size_t memoryLimitInbytes, + cudnnConvolutionFwdAlgo_t *algo + ); + +/* + * convolution algorithm (which requires potentially some workspace) + */ + + /* Helper function to return the minimum size of the workspace to be passed to the convolution given an algo*/ +cudnnStatus_t cudnnGetConvolutionForwardWorkspaceSize( cudnnHandle_t handle, + const cudnnTensorDescriptor_t srcDesc, + const cudnnFilterDescriptor_t filterDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t destDesc, + cudnnConvolutionFwdAlgo_t algo, + size_t *sizeInBytes + ); + + +/* Convolution functions: All of the form "output = alpha * Op(inputs) + beta * output" */ + +/* Function to perform the forward multiconvolution */ +cudnnStatus_t cudnnConvolutionForward( cudnnHandle_t handle, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const cudnnFilterDescriptor_t filterDesc, + const void *filterData, + const cudnnConvolutionDescriptor_t convDesc, + cudnnConvolutionFwdAlgo_t algo, + void *workSpace, + size_t workSpaceSizeInBytes, + const void *beta, + const cudnnTensorDescriptor_t destDesc, + void *destData ); -cudnnStatus_t cudnnConvolutionBackwardBias( cudnnHandle_t handle, - cudnnTensor4dDescriptor_t srcDesc, - const void *srcData, - cudnnTensor4dDescriptor_t destDesc, - void *destData, - cudnnAccumulateResult_t accumulate + +/* Functions to perform the backward multiconvolution */ +cudnnStatus_t cudnnConvolutionBackwardBias( cudnnHandle_t handle, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const void *beta, + const cudnnTensorDescriptor_t destDesc, + void *destData ); -cudnnStatus_t cudnnConvolutionBackwardFilter( cudnnHandle_t handle, - cudnnTensor4dDescriptor_t srcDesc, - const void *srcData, - cudnnTensor4dDescriptor_t diffDesc, - const void *diffData, - cudnnConvolutionDescriptor_t convDesc, - cudnnFilterDescriptor_t gradDesc, - void *gradData, - cudnnAccumulateResult_t accumulate + + + +cudnnStatus_t cudnnConvolutionBackwardFilter( cudnnHandle_t handle, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const cudnnTensorDescriptor_t diffDesc, + const void *diffData, + const cudnnConvolutionDescriptor_t convDesc, + const void *beta, + const cudnnFilterDescriptor_t gradDesc, + void *gradData ); -cudnnStatus_t cudnnConvolutionBackwardData( cudnnHandle_t handle, - cudnnFilterDescriptor_t filterDesc, - const void *filterData, - cudnnTensor4dDescriptor_t diffDesc, - const void *diffData, - cudnnConvolutionDescriptor_t convDesc, - cudnnTensor4dDescriptor_t gradDesc, - void *gradData, - cudnnAccumulateResult_t accumulate - ); + + +cudnnStatus_t cudnnConvolutionBackwardData( cudnnHandle_t handle, + const void *alpha, + const cudnnFilterDescriptor_t filterDesc, + const void *filterData, + const cudnnTensorDescriptor_t diffDesc, + const void *diffData, + const cudnnConvolutionDescriptor_t convDesc, + const void *beta, + const cudnnTensorDescriptor_t gradDesc, + void *gradData + ); + + +/* + * softmax algorithm + */ +typedef enum +{ + CUDNN_SOFTMAX_FAST = 0, /* straightforward implementation */ + CUDNN_SOFTMAX_ACCURATE = 1 /* subtract max from every point to avoid overflow */ +} cudnnSoftmaxAlgorithm_t; + +typedef enum +{ + CUDNN_SOFTMAX_MODE_INSTANCE = 0, /* compute the softmax over all C, H, W for each N */ + CUDNN_SOFTMAX_MODE_CHANNEL = 1 /* compute the softmax over all C for each H, W, N */ +} cudnnSoftmaxMode_t; + +/* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */ + +/* Function to perform forward softmax */ +cudnnStatus_t cudnnSoftmaxForward( cudnnHandle_t handle, + cudnnSoftmaxAlgorithm_t algorithm, + cudnnSoftmaxMode_t mode, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const void *beta, + const cudnnTensorDescriptor_t destDesc, + void *destData + ); + +/* Function to perform backward softmax */ +cudnnStatus_t cudnnSoftmaxBackward( cudnnHandle_t handle, + cudnnSoftmaxAlgorithm_t algorithm, + cudnnSoftmaxMode_t mode, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const cudnnTensorDescriptor_t srcDiffDesc, + const void *srcDiffData, + const void *beta, + const cudnnTensorDescriptor_t destDiffDesc, + void *destDiffData + ); + + + typedef enum { CUDNN_POOLING_MAX = 0, CUDNN_POOLING_AVERAGE = 1 } cudnnPoolingMode_t; -cudnnStatus_t cudnnCreatePoolingDescriptor( cudnnPoolingDescriptor_t *poolingDesc); -cudnnStatus_t cudnnSetPoolingDescriptor( cudnnPoolingDescriptor_t poolingDesc, - cudnnPoolingMode_t mode, - int windowHeight, - int windowWidth, - int verticalStride, - int horizontalStride + +/* Create an instance of pooling descriptor */ +cudnnStatus_t cudnnCreatePoolingDescriptor( cudnnPoolingDescriptor_t *poolingDesc); +cudnnStatus_t cudnnSetPoolingNdDescriptor( cudnnPoolingDescriptor_t poolingDesc, + const cudnnPoolingMode_t mode, + int nbDims, + const int windowDimA[], + const int paddingA[], + const int strideA[] ); -cudnnStatus_t cudnnGetPoolingDescriptor( const cudnnPoolingDescriptor_t poolingDesc, + +cudnnStatus_t cudnnGetPoolingNdDescriptor( const cudnnPoolingDescriptor_t poolingDesc, + const int nbDimsRequested, cudnnPoolingMode_t *mode, - int *windowHeight, - int *windowWidth, - int *verticalStride, - int *horizontalStride - ); -cudnnStatus_t cudnnDestroyPoolingDescriptor( cudnnPoolingDescriptor_t poolingDesc ); -cudnnStatus_t cudnnPoolingForward( cudnnHandle_t handle, - cudnnPoolingDescriptor_t poolingDesc, - cudnnTensor4dDescriptor_t srcDesc, - const void *srcData, - cudnnTensor4dDescriptor_t destDesc, - void *destData + int *nbDims, + int windowDimA[], + int paddingA[], + int strideA[] + ); + +cudnnStatus_t cudnnGetPoolingNdForwardOutputDim( const cudnnPoolingDescriptor_t poolingDesc, + const cudnnTensorDescriptor_t inputTensorDesc, + int nbDims, + int outputTensorDimA[]); +/* Destroy an instance of pooling descriptor */ +cudnnStatus_t cudnnDestroyPoolingDescriptor( cudnnPoolingDescriptor_t poolingDesc ); +/* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */ + +/* Function to perform forward pooling */ +cudnnStatus_t cudnnPoolingForward( cudnnHandle_t handle, + const cudnnPoolingDescriptor_t poolingDesc, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const void *beta, + const cudnnTensorDescriptor_t destDesc, + void *destData ); -cudnnStatus_t cudnnPoolingBackward( cudnnHandle_t handle, - cudnnPoolingDescriptor_t poolingDesc, - cudnnTensor4dDescriptor_t srcDesc, - const void *srcData, - cudnnTensor4dDescriptor_t srcDiffDesc, - const void *srcDiffData, - cudnnTensor4dDescriptor_t destDesc, - const void *destData, - cudnnTensor4dDescriptor_t destDiffDesc, - void *destDiffData + +/* Function to perform backward pooling */ +cudnnStatus_t cudnnPoolingBackward( cudnnHandle_t handle, + const cudnnPoolingDescriptor_t poolingDesc, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const cudnnTensorDescriptor_t srcDiffDesc, + const void *srcDiffData, + const cudnnTensorDescriptor_t destDesc, + const void *destData, + const void *beta, + const cudnnTensorDescriptor_t destDiffDesc, + void *destDiffData ); + typedef enum { CUDNN_ACTIVATION_SIGMOID = 0, CUDNN_ACTIVATION_RELU = 1, CUDNN_ACTIVATION_TANH = 2 } cudnnActivationMode_t; -cudnnStatus_t cudnnActivationForward( cudnnHandle_t handle, - cudnnActivationMode_t mode, - cudnnTensor4dDescriptor_t srcDesc, - const void *srcData, - cudnnTensor4dDescriptor_t destDesc, - void *destData - ); -cudnnStatus_t cudnnActivationBackward( cudnnHandle_t handle, - cudnnActivationMode_t mode, - cudnnTensor4dDescriptor_t srcDesc, - const void *srcData, - cudnnTensor4dDescriptor_t srcDiffDesc, - const void *srcDiffData, - cudnnTensor4dDescriptor_t destDesc, - const void *destData, - cudnnTensor4dDescriptor_t destDiffDesc, - void *destDiffData - ); - - -typedef enum -{ - CUDNN_SOFTMAX_FAST = 0, CUDNN_SOFTMAX_ACCURATE = 1 -} cudnnSoftmaxAlgorithm_t; - -typedef enum -{ - CUDNN_SOFTMAX_MODE_INSTANCE = 0, CUDNN_SOFTMAX_MODE_CHANNEL = 1 -} cudnnSoftmaxMode_t; - - -cudnnStatus_t cudnnSoftmaxForward( cudnnHandle_t handle, - cudnnSoftmaxAlgorithm_t algorithm, - cudnnSoftmaxMode_t mode, - cudnnTensor4dDescriptor_t srcDesc, - const void *srcData, - cudnnTensor4dDescriptor_t destDesc, - void *destData - ); - -cudnnStatus_t cudnnSoftmaxBackward( cudnnHandle_t handle, - cudnnSoftmaxAlgorithm_t algorithm, - cudnnSoftmaxMode_t mode, - cudnnTensor4dDescriptor_t srcDesc, - const void *srcData, - cudnnTensor4dDescriptor_t srcDiffDesc, - const void *srcDiffData, - cudnnTensor4dDescriptor_t destDiffDesc, - void *destDiffData - ); +/* Function to perform forward activation */ +cudnnStatus_t cudnnActivationForward( cudnnHandle_t handle, + cudnnActivationMode_t mode, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const void *beta, + const cudnnTensorDescriptor_t destDesc, + void *destData + ); +/* Function to perform backward activation */ +cudnnStatus_t cudnnActivationBackward( cudnnHandle_t handle, + cudnnActivationMode_t mode, + const void *alpha, + const cudnnTensorDescriptor_t srcDesc, + const void *srcData, + const cudnnTensorDescriptor_t srcDiffDesc, + const void *srcDiffData, + const cudnnTensorDescriptor_t destDesc, + const void *destData, + const void *beta, + const cudnnTensorDescriptor_t destDiffDesc, + void *destDiffData + ); ]] local ok -ok = pcall(function() cudnn.C = ffi.load('libcudnn') end) +ok,err = pcall(function() cudnn.C = ffi.load('libcudnn') end) if not ok then + print(err) error([['libcudnn.so not found in library path. Please install CuDNN from https://developer.nvidia.com/cuDNN Then make sure all the files named as libcudnn.so* are placed in your library load path (for example /usr/local/lib , or manually add a path to LD_LIBRARY_PATH) |