diff options
Diffstat (limited to 'SpatialConvolution.lua')
-rw-r--r-- | SpatialConvolution.lua | 35 |
1 files changed, 23 insertions, 12 deletions
diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua index 8292536..cb5464c 100644 --- a/SpatialConvolution.lua +++ b/SpatialConvolution.lua @@ -1,8 +1,10 @@ -local SpatialConvolution, parent = torch.class('cudnn.SpatialConvolution', 'nn.SpatialConvolution') +local SpatialConvolution, parent = + torch.class('cudnn.SpatialConvolution', 'nn.SpatialConvolution') local ffi = require 'ffi' local errcheck = cudnn.errcheck -function SpatialConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH, padW, padH) +function SpatialConvolution:__init(nInputPlane, nOutputPlane, + kW, kH, dW, dH, padW, padH) parent.__init(self, nInputPlane, nOutputPlane, kW, kH, dW, dH) self.padW = padW or 0 self.padH = padH or 0 @@ -12,12 +14,15 @@ end -- if you change the configuration of the module manually, call this function SpatialConvolution:resetWeightDescriptors() - assert(torch.typename(self.weight) == 'torch.CudaTensor', 'Only Cuda supported duh!') - assert(torch.typename(self.bias) == 'torch.CudaTensor', 'Only Cuda supported duh!') + assert(torch.typename(self.weight) == 'torch.CudaTensor', + 'Only Cuda supported duh!') + assert(torch.typename(self.bias) == 'torch.CudaTensor', + 'Only Cuda supported duh!') -- create filterDescriptor for weight self.weightDesc = ffi.new('struct cudnnFilterStruct*[1]') errcheck('cudnnCreateFilterDescriptor', self.weightDesc) - local desc = torch.IntTensor({self.nOutputPlane, self.nInputPlane, self.kH, self.kW}) + local desc = torch.IntTensor({self.nOutputPlane, self.nInputPlane, + self.kH, self.kW}) errcheck('cudnnSetFilterNdDescriptor', self.weightDesc[0], 'CUDNN_DATA_FLOAT', 4, desc:data()); @@ -27,7 +32,7 @@ function SpatialConvolution:resetWeightDescriptors() ffi.gc(self.weightDesc, destroyWDesc) -- create descriptor for bias - self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane, 1, 1)) + self.biasDesc = cudnn.toDescriptor(self.bias:view(1, self.nOutputPlane,1,1)) end function SpatialConvolution:createIODescriptors(input) @@ -51,7 +56,8 @@ function SpatialConvolution:createIODescriptors(input) local pad = torch.IntTensor({self.padH, self.padW}) local stride = torch.IntTensor({self.dH, self.dW}) local upscale = torch.IntTensor({1,1}) - errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0], 2, pad:data(), + errcheck('cudnnSetConvolutionNdDescriptor', self.convDesc[0], + 2, pad:data(), stride:data(), upscale:data(), 'CUDNN_CROSS_CORRELATION'); local function destroyConvDesc(d) errcheck('cudnnDestroyConvolutionDescriptor', d[0]); @@ -61,7 +67,8 @@ function SpatialConvolution:createIODescriptors(input) -- create output descriptor and resize output local oSize = torch.IntTensor(4) local oSizeD = oSize:data() - errcheck('cudnnGetConvolutionNdForwardOutputDim', self.convDesc[0], self.iDesc[0], + errcheck('cudnnGetConvolutionNdForwardOutputDim', + self.convDesc[0], self.iDesc[0], self.weightDesc[0], 4, oSizeD) self.output:resize(oSize:long():storage()) -- create descriptor for output @@ -71,13 +78,15 @@ function SpatialConvolution:createIODescriptors(input) local algType = ffi.new("cudnnConvolutionFwdAlgo_t[?]", 1) errcheck('cudnnGetConvolutionForwardAlgorithm', cudnn.handle[cutorch.getDevice()-1], - self.iDesc[0], self.weightDesc[0], self.convDesc[0], self.oDesc[0], + self.iDesc[0], self.weightDesc[0], + self.convDesc[0], self.oDesc[0], 'CUDNN_CONVOLUTION_FWD_PREFER_FASTEST', -1, algType) self.algType = algType local bufSize = torch.LongTensor(1) errcheck('cudnnGetConvolutionForwardWorkspaceSize', cudnn.handle[cutorch.getDevice()-1], - self.iDesc[0], self.weightDesc[0], self.convDesc[0], self.oDesc[0], + self.iDesc[0], self.weightDesc[0], + self.convDesc[0], self.oDesc[0], algType[0], bufSize:data()) self.extraBuffer = self.extraBuffer or input.new(1) if bufSize[1] ~= 0 then self.extraBuffer:resize(bufSize[1]) end @@ -107,7 +116,8 @@ function SpatialConvolution:updateOutput(input) self.extraBuffer:data(), self.extraBuffer:nElement(), zero:data(), self.oDesc[0], self.output:data()); - errcheck('cudnnAddTensor', cudnn.handle[cutorch.getDevice()-1], 'CUDNN_ADD_SAME_C', + errcheck('cudnnAddTensor', cudnn.handle[cutorch.getDevice()-1], + 'CUDNN_ADD_SAME_C', one:data(), self.biasDesc[0], self.bias:data(), one:data(), self.oDesc[0], self.output:data()); return self.output @@ -131,7 +141,8 @@ end function SpatialConvolution:accGradParameters(input, gradOutput, scale) self.scaleT = self.scaleT or torch.FloatTensor(1):fill(1.0) - self.scaleT = self.scaleT:float() -- this line forces this member to always be on CPU (needed for cudnn) + -- this line forces this member to always be on CPU (needed for cudnn) + self.scaleT = self.scaleT:float() scale = scale or 1.0 self.scaleT[1] = scale assert((gradOutput:dim() == 3 or gradOutput:dim() == 4) |