diff options
-rw-r--r-- | functional.lua | 350 | ||||
-rw-r--r-- | test/test.lua | 57 |
2 files changed, 400 insertions, 7 deletions
diff --git a/functional.lua b/functional.lua index 765f6fd..64c2e1c 100644 --- a/functional.lua +++ b/functional.lua @@ -3,6 +3,7 @@ -- There shouldn't be any reference to "self" in this file. local cudnn = require 'cudnn.env' +local ffi = require 'ffi' local errcheck = cudnn.errcheck cudnn.functional = {} @@ -10,12 +11,18 @@ cudnn.functional = {} local one = torch.FloatTensor({1}); local zero = torch.FloatTensor({0}); +local function Batch2D(t) + return t:view(1, t:size(1), t:size(2), t:size(3)) +end + -- accumulates the bias into output. -- output is assumed to be allocated and given. -cudnn.functional.SpatialBias_updateOutput = function(bias, output) +cudnn.functional.bias2D_updateOutput = function(handle, bias, output) + output = output:dim() == 3 and Batch2D(output) or output + local biasDesc = cudnn.toDescriptor(bias:view(1, bias:nElement(),1,1)) local oDesc = cudnn.toDescriptor(output) - errcheck('cudnnAddTensor', cudnn.getHandle(), + errcheck('cudnnAddTensor', handle, 'CUDNN_ADD_SAME_C', one:data(), biasDesc[0], bias:data(), one:data(), oDesc[0], output:data()) @@ -23,14 +30,349 @@ end -- accumulates the gradients into gradBias. -- gradBias is assumed to be allocated and given. -cudnn.functional.SpatialBias_accGradParameters = function(gradOutput, gradBias, scale) +cudnn.functional.bias2D_accGradParameters = function(handle, gradOutput, gradBias, scale) + gradOutput = gradOutput:dim() == 3 and Batch2D(gradOutput) or gradOutput scale = scale or 1.0 local scaleT = torch.FloatTensor({scale}) local oDesc = cudnn.toDescriptor(gradOutput) local biasDesc = cudnn.toDescriptor(gradBias:view(1, gradBias:nElement(),1,1)) - errcheck('cudnnConvolutionBackwardBias', cudnn.getHandle(), + errcheck('cudnnConvolutionBackwardBias', handle, scaleT:data(), oDesc[0], gradOutput:data(), one:data(), biasDesc[0], gradBias:data()) end + +-- Does a 2D Convolution (updateOutput) on input, weight +-- output is assumed to be allocated and given. +cudnn.functional.Convolution2D_updateOutput = function(handle, input, weight, output, + strideH, strideW, padH, padW, workspace) + input = input:dim() == 3 and Batch2D(input) or input + output = output:dim() == 3 and Batch2D(output) or output + + -- create a weight descriptor + local weightDesc = ffi.new('struct cudnnFilterStruct*[1]') + errcheck('cudnnCreateFilterDescriptor', weightDesc) + local nOutputPlane, nInputPlane, kH, kW + = weight:size(1), weight:size(2), weight:size(3), weight:size(4) + local desc = torch.IntTensor({nOutputPlane, nInputPlane, kH, kW}) + errcheck('cudnnSetFilterNdDescriptor', weightDesc[0], 'CUDNN_DATA_FLOAT', 4, + desc:data()); + local function destroyWDesc(d) + errcheck('cudnnDestroyFilterDescriptor', d[0]); + end + ffi.gc(weightDesc, destroyWDesc) + + -- create a convolution descriptor + local convDesc = ffi.new('struct cudnnConvolutionStruct*[1]') + errcheck('cudnnCreateConvolutionDescriptor', convDesc) + local pad = torch.IntTensor({padH, padW}) + local stride = torch.IntTensor({strideH, strideW}) + local upscale = torch.IntTensor({1,1}) + errcheck('cudnnSetConvolutionNdDescriptor_v3', convDesc[0], + 2, pad:data(), + stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION', + 'CUDNN_DATA_FLOAT'); + local function destroyConvDesc(d) + errcheck('cudnnDestroyConvolutionDescriptor', d[0]); + end + ffi.gc(convDesc, destroyConvDesc) + + -- create input descriptor + local iDesc = cudnn.toDescriptor(input) + + -- create output descriptor + local oSize = torch.IntTensor(4) + errcheck('cudnnGetConvolutionNdForwardOutputDim', + convDesc[0], iDesc[0], + weightDesc[0], 4, oSize:data()) + oSize = oSize:long() + assert(output:dim() == 4 and + output:size(1) == oSize[1] and + output:size(2) == oSize[2] and + output:size(3) == oSize[3] and + output:size(4) == oSize[4], + 'Output is of wrong size') + -- create descriptor for output + local oDesc = cudnn.toDescriptor(output) + + -- create forwardAlgorithm descriptors for + local algType = ffi.new("cudnnConvolutionFwdAlgo_t[?]", 1) + local algSearchMode = 'CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT' + local algWorkspaceLimit = 0 + if workspace then + algWorkspaceLimit = workspace:nElement() * 4 -- 4 = sizeof float + end + errcheck('cudnnGetConvolutionForwardAlgorithm', + handle, + iDesc[0], weightDesc[0], + convDesc[0], oDesc[0], + algSearchMode, algWorkspaceLimit, algType) + + -- do convolution + errcheck('cudnnConvolutionForward', handle, + one:data(), + iDesc[0], input:data(), + weightDesc[0], weight:data(), + convDesc[0], algType[0], + workspace and workspace:data() or nil, algWorkspaceLimit, + zero:data(), + oDesc[0], output:data()); +end + +-- Does a 2D Convolution (updateGradInput) on input, weight, output, gradOutput +-- gradInput is assumed to be allocated and given. +cudnn.functional.Convolution2D_updateGradInput = function(handle, input, weight, output, gradOutput, gradInput, + strideH, strideW, padH, padW) + input = input:dim() == 3 and Batch2D(input) or input + output = output:dim() == 3 and Batch2D(output) or output + gradOutput = gradOutput:dim() == 3 and Batch2D(gradOutput) or gradOutput + gradInput = gradInput:dim() == 3 and Batch2D(gradInput) or gradInput + + -- create a weight descriptor + local weightDesc = ffi.new('struct cudnnFilterStruct*[1]') + errcheck('cudnnCreateFilterDescriptor', weightDesc) + local nOutputPlane, nInputPlane, kH, kW + = weight:size(1), weight:size(2), weight:size(3), weight:size(4) + local desc = torch.IntTensor({nOutputPlane, nInputPlane, kH, kW}) + errcheck('cudnnSetFilterNdDescriptor', weightDesc[0], 'CUDNN_DATA_FLOAT', 4, + desc:data()); + local function destroyWDesc(d) + errcheck('cudnnDestroyFilterDescriptor', d[0]); + end + ffi.gc(weightDesc, destroyWDesc) + + -- create a convolution descriptor + local convDesc = ffi.new('struct cudnnConvolutionStruct*[1]') + errcheck('cudnnCreateConvolutionDescriptor', convDesc) + local pad = torch.IntTensor({padH, padW}) + local stride = torch.IntTensor({strideH, strideW}) + local upscale = torch.IntTensor({1,1}) + errcheck('cudnnSetConvolutionNdDescriptor_v3', convDesc[0], + 2, pad:data(), + stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION', + 'CUDNN_DATA_FLOAT'); + local function destroyConvDesc(d) + errcheck('cudnnDestroyConvolutionDescriptor', d[0]); + end + ffi.gc(convDesc, destroyConvDesc) + + -- create input, output descriptor + local iDesc = cudnn.toDescriptor(input) + local oDesc = cudnn.toDescriptor(output) + + local algType = ffi.new("cudnnConvolutionBwdDataAlgo_t[?]", 1) + local algSearchMode = 'CUDNN_CONVOLUTION_BWD_DATA_NO_WORKSPACE' + + errcheck('cudnnGetConvolutionBackwardDataAlgorithm', + cudnn.getHandle(), + weightDesc[0], oDesc[0], + convDesc[0], iDesc[0], + algSearchMode, 0, algType) + + -- do convolution + errcheck('cudnnConvolutionBackwardData_v3', handle, + one:data(), + weightDesc[0], weight:data(), + oDesc[0], gradOutput:data(), + convDesc[0], + algType[0], + ffi.C.NULL, 0, + zero:data(), + iDesc[0], gradInput:data()); + + +end + +-- accumulates the gradients into gradWeight. +-- gradWeight is assumed to be allocated and given. +local scaleT = torch.FloatTensor(1):fill(1.0) +cudnn.functional.Convolution2D_accGradParameters = function(handle, input, gradWeight, gradOutput, + strideH, strideW, padH, padW, scale) + input = input:dim() == 3 and Batch2D(input) or input + gradOutput = gradOutput:dim() == 3 and Batch2D(gradOutput) or gradOutput + + scale = scale or 1.0 + scaleT[1] = scale + -- create a weight descriptor + local weightDesc = ffi.new('struct cudnnFilterStruct*[1]') + errcheck('cudnnCreateFilterDescriptor', weightDesc) + local nOutputPlane, nInputPlane, kH, kW + = gradWeight:size(1), gradWeight:size(2), gradWeight:size(3), gradWeight:size(4) + local desc = torch.IntTensor({nOutputPlane, nInputPlane, kH, kW}) + errcheck('cudnnSetFilterNdDescriptor', weightDesc[0], 'CUDNN_DATA_FLOAT', 4, + desc:data()); + local function destroyWDesc(d) + errcheck('cudnnDestroyFilterDescriptor', d[0]); + end + ffi.gc(weightDesc, destroyWDesc) + + -- create a convolution descriptor + local convDesc = ffi.new('struct cudnnConvolutionStruct*[1]') + errcheck('cudnnCreateConvolutionDescriptor', convDesc) + local pad = torch.IntTensor({padH, padW}) + local stride = torch.IntTensor({strideH, strideW}) + local upscale = torch.IntTensor({1,1}) + errcheck('cudnnSetConvolutionNdDescriptor_v3', convDesc[0], + 2, pad:data(), + stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION', + 'CUDNN_DATA_FLOAT'); + local function destroyConvDesc(d) + errcheck('cudnnDestroyConvolutionDescriptor', d[0]); + end + ffi.gc(convDesc, destroyConvDesc) + + -- create input, output descriptor + local iDesc = cudnn.toDescriptor(input) + local oDesc = cudnn.toDescriptor(gradOutput) + + local algType = ffi.new("cudnnConvolutionBwdFilterAlgo_t[?]", 1) + local algSearchMode = 'CUDNN_CONVOLUTION_BWD_FILTER_NO_WORKSPACE' + local algWorkspaceLimit = 0 + + errcheck('cudnnGetConvolutionBackwardFilterAlgorithm', + cudnn.getHandle(), + iDesc[0], oDesc[0], + convDesc[0], weightDesc[0], + algSearchMode, algWorkspaceLimit, algType) + + + -- do convolution + errcheck('cudnnConvolutionBackwardFilter_v3', handle, + scaleT:data(), + iDesc[0], input:data(), + oDesc[0], gradOutput:data(), + convDesc[0], + algType[0], + ffi.C.NULL, 0, + one:data(), + weightDesc[0], gradWeight:data()); +end + + + +-- Does a 2D Pooling (updateOutput) on input, weight +-- output is assumed to be allocated and given. +cudnn.functional.Pooling_updateOutput = function(handle, mode, input, output, + kH, kW, dH, dW, padH, padW, ceil_mode) + input = input:dim() == 3 and Batch2D(input) or input + output = output:dim() == 3 and Batch2D(output) or output + + padH = padH or 0 + padW = padW or 0 + ceil_mode = ceil_mode or false + + local oW, oH + if ceil_mode then + oW = math.ceil((input:size(4)+padW*2 - kW)/dW + 1) + oH = math.ceil((input:size(3)+padH*2 - kH)/dH + 1) + else + oW = math.floor((input:size(4)+padW*2 - kW)/dW + 1) + oH = math.floor((input:size(3)+padH*2 - kH)/dH + 1) + end + assert(oH == output:size(3) and oW == output:size(4), + 'size mismatch: ' .. oH .. 'x' .. oW .. ' vs ' .. + output:size(3) .. 'x' .. output:size(4)) + + -- create pooling descriptor + local poolDesc = ffi.new('struct cudnnPoolingStruct*[1]') + errcheck('cudnnCreatePoolingDescriptor', poolDesc) + local ker = torch.IntTensor({kH, kW}) + local str = torch.IntTensor({dH, dW}) + local pad = torch.IntTensor({padH, padW}) + errcheck('cudnnSetPoolingNdDescriptor', poolDesc[0], mode, 2, + ker:data(), pad:data(), str:data()); + local function destroyPoolDesc(d) + errcheck('cudnnDestroyPoolingDescriptor', d[0]); + end + ffi.gc(poolDesc, destroyPoolDesc) + + -- create input, output descriptor + local iDesc = cudnn.toDescriptor(input) + local oDesc = cudnn.toDescriptor(output) + + -- pool + errcheck('cudnnPoolingForward', handle, + poolDesc[0], + one:data(), + iDesc[0], input:data(), + zero:data(), + oDesc[0], output:data()); +end + +cudnn.functional.MaxPooling2D_updateOutput = function(handle, input, output, + kH, kW, dH, dW, padH, padW, ceil_mode) + cudnn.functional.Pooling_updateOutput(handle, 'CUDNN_POOLING_MAX', input, output, + kH, kW, dH, dW, padH, padW, ceil_mode); +end + +cudnn.functional.AveragePooling2D_updateOutput = function(handle, input, output, + kH, kW, dH, dW, padH, padW, ceil_mode) + cudnn.functional.Pooling_updateOutput(handle, 'CUDNN_POOLING_AVERAGE', input, output, + kH, kW, dH, dW, padH, padW, ceil_mode); +end + +-- Does a 2D Pooling (updateGradInput) on input, weight +-- output is assumed to be allocated and given. +cudnn.functional.Pooling_updateGradInput = function(handle, mode, input, output, gradOutput, gradInput, + kH, kW, dH, dW, padH, padW, ceil_mode) + input = input:dim() == 3 and Batch2D(input) or input + output = output:dim() == 3 and Batch2D(output) or output + gradOutput = gradOutput:dim() == 3 and Batch2D(gradOutput) or gradOutput + gradInput = gradInput:dim() == 3 and Batch2D(gradInput) or gradInput + + padH = padH or 0 + padW = padW or 0 + ceil_mode = ceil_mode or false + + local oW, oH + if ceil_mode then + oW = math.ceil((input:size(4)+padW*2 - kW)/dW + 1) + oH = math.ceil((input:size(3)+padH*2 - kH)/dH + 1) + else + oW = math.floor((input:size(4)+padW*2 - kW)/dW + 1) + oH = math.floor((input:size(3)+padH*2 - kH)/dH + 1) + end + assert(oH == output:size(3) and oW == output:size(4), + 'size mismatch: ' .. oH .. 'x' .. oW .. ' vs ' .. + output:size(3) .. 'x' .. output:size(4)) + + -- create pooling descriptor + local poolDesc = ffi.new('struct cudnnPoolingStruct*[1]') + errcheck('cudnnCreatePoolingDescriptor', poolDesc) + local ker = torch.IntTensor({kH, kW}) + local str = torch.IntTensor({dH, dW}) + local pad = torch.IntTensor({padH, padW}) + errcheck('cudnnSetPoolingNdDescriptor', poolDesc[0], mode, 2, + ker:data(), pad:data(), str:data()); + local function destroyPoolDesc(d) + errcheck('cudnnDestroyPoolingDescriptor', d[0]); + end + ffi.gc(poolDesc, destroyPoolDesc) + + -- create input, output descriptor + local iDesc = cudnn.toDescriptor(input) + local oDesc = cudnn.toDescriptor(output) + + -- pool + errcheck('cudnnPoolingBackward', + handle, poolDesc[0], + one:data(), + oDesc[0], output:data(), + oDesc[0], gradOutput:data(), + iDesc[0], input:data(), + zero:data(), + iDesc[0], gradInput:data()); +end + +cudnn.functional.MaxPooling2D_updateGradInput = function(handle, input, output, gradOutput, gradInput, + kH, kW, dH, dW, padH, padW, ceil_mode) + cudnn.functional.Pooling_updateGradInput(handle, 'CUDNN_POOLING_MAX', input, output, gradOutput, gradInput, + kH, kW, dH, dW, padH, padW, ceil_mode); +end + +cudnn.functional.AveragePooling2D_updateGradInput = function(handle, input, output, gradOutput, gradInput, + kH, kW, dH, dW, padH, padW, ceil_mode) + cudnn.functional.Pooling_updateGradInput(handle, 'CUDNN_POOLING_AVERAGE', input, output, gradOutput, gradInput, + kH, kW, dH, dW, padH, padW, ceil_mode); +end diff --git a/test/test.lua b/test/test.lua index c2938de..4062425 100644 --- a/test/test.lua +++ b/test/test.lua @@ -724,7 +724,7 @@ function cudnntest.LogSoftMax_batch() precision_backward, 'error on state (backward) ') end -function cudnntest.functional_SpatialBias() +function cudnntest.functional_bias2D() local bs = math.random(1,32) local from = math.random(1,32) local to = math.random(1,64) @@ -742,7 +742,7 @@ function cudnntest.functional_SpatialBias() mod.weight:zero() local groundtruth = mod:forward(input) local result = groundtruth:clone():zero() - cudnn.functional.SpatialBias_updateOutput(mod.bias, result) + cudnn.functional.bias2D_updateOutput(cudnn.getHandle(), mod.bias, result) local error = result:float() - groundtruth:float() mytester:assertlt(error:abs():max(), precision_forward, 'error on forward ') @@ -752,12 +752,63 @@ function cudnntest.functional_SpatialBias() mod:backward(input, gradOutput, scale) local groundtruth = mod.gradBias local result = groundtruth:clone():zero() - cudnn.functional.SpatialBias_accGradParameters(gradOutput, result, scale) + cudnn.functional.bias2D_accGradParameters(cudnn.getHandle(), gradOutput, result, scale) error = result:float() - groundtruth:float() mytester:assertlt(error:abs():max(), precision_backward, 'error on accGradParameters ') end +function cudnntest.functional_convolution2d() + local a=cudnn.SpatialConvolution(3,16,5,5):cuda() + a.bias:zero(); + local input = torch.randn(10,3,10,10):cuda() + a:zeroGradParameters() + a:forward(input); + local output = a.output:clone():normal() + local gradOutput = a.output:clone():normal() + local gradInput = a:backward(input, gradOutput):clone():normal() + local gradWeight = a.gradWeight:clone():zero() + cudnn.functional.Convolution2D_updateOutput(cudnn.getHandle(), input, + a.weight, output, a.dH, + a.dW, a.padH, a.padW) + mytester:assertlt((output - a.output):abs():max(), + precision_forward, 'error on forward ') + + cudnn.functional.Convolution2D_updateGradInput(cudnn.getHandle(), input, + a.weight, output, gradOutput, + gradInput, + a.dH, a.dW, a.padH, a.padW) + mytester:assertlt((gradInput - a.gradInput):abs():max(), + precision_forward, 'error on updateGradInput ') + + cudnn.functional.Convolution2D_accGradParameters(cudnn.getHandle(), input, + gradWeight, gradOutput, + a.dH, a.dW, a.padH, a.padW) + mytester:assertlt((gradWeight - a.gradWeight):abs():max(), + precision_forward, 'error on accGradParameters ') +end + +function cudnntest.functional_maxpooling2d() + local a=cudnn.SpatialMaxPooling(2,2,2,2):cuda() + local input = torch.randn(10,3,10,10):cuda() + a:forward(input); + local output = a.output:clone():normal() + local gradOutput = a.output:clone():normal() + local gradInput = a:backward(input, gradOutput):clone():normal() + cudnn.functional.MaxPooling2D_updateOutput(cudnn.getHandle(), input, + output, a.kH, a.kW, + a.dH, a.dW, a.padH, a.padW) + mytester:assertlt((output - a.output):abs():max(), + precision_forward, 'error on forward ') + + cudnn.functional.MaxPooling2D_updateGradInput(cudnn.getHandle(), input, + output, gradOutput, gradInput, + a.kH, a.kW, a.dH, a.dW, + a.padH, a.padW) + mytester:assertlt((gradInput - a.gradInput):abs():max(), + precision_forward, 'error on updateGradInput ') +end + torch.setdefaulttensortype('torch.FloatTensor') math.randomseed(os.time()) |