diff options
author | soumith <soumith@gmail.com> | 2015-11-06 01:44:15 +0300 |
---|---|---|
committer | soumith <soumith@gmail.com> | 2015-11-06 01:44:15 +0300 |
commit | 507c3a35e3bfbba2af102fbead0c2fb41e9db9b0 (patch) | |
tree | add28ed63809ef677e224128e31706bb9df7dc4f | |
parent | 3532bf81ab15df72a6dc900a1788d3e1d3d1fa2e (diff) |
integrating changes from master
-rw-r--r-- | README.md | 5 | ||||
-rw-r--r-- | SpatialAveragePooling.lua | 4 | ||||
-rw-r--r-- | SpatialConvolution.lua | 6 | ||||
-rw-r--r-- | SpatialCrossEntropyCriterion.lua | 78 | ||||
-rw-r--r-- | SpatialMaxPooling.lua | 4 | ||||
-rw-r--r-- | ffi.lua | 23 | ||||
-rw-r--r-- | init.lua | 1 | ||||
-rw-r--r-- | test/test.lua | 67 |
8 files changed, 176 insertions, 12 deletions
@@ -30,6 +30,8 @@ cudnn.LogSoftMax() -- LogSoftMax across each image (just cudnn.SpatialSoftMax(fastMode [= false]) -- SoftMax across feature-maps (per spatial location) cudnn.SpatialLogSoftMax() -- LogSoftMax across feature-maps (per spatial location) +cudnn.SpatialCrossEntropyCriterion() -- A spatial version of LogSoftMax + ClassNLLCriterion in one shot + -- Volumetric inputs (4D or 5D batched mode) cudnn.VolumetricConvolution(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH) cudnn.VolumetricMaxPooling(kT, kW, kH, dT, dW, dH, padT, padW, padH) @@ -68,7 +70,7 @@ R4 Release Notes: - Rather than resolving v3-v4 diffs, I have imported new cudnn.h with its entirety and converted comments and defines. This should be less error-prone. - addTensor_v2 uses changed to new AddTensor API. -R4 TODO: +R4 TODO: per-activation BN code needs to be added (new .lua similar to SpatialBN.lua, as per Andrei: I believe we have at least one thing missing - per-activation BN (Torch implementation in nn.BatchNormalization.lua). What I believe we have now is an integration of implementation for nn.SpatialBatchNormalization.lua @@ -77,4 +79,3 @@ This is very similar to SpatialBatchNormalizaiton.lua but should use a different For Spatial BN normalization is performed over N with 1CHW result and for per-activation it's done over NHW with 1C11 result. Per-activation BN is only used after non-convolutional layers where spatially-invariant behavior is not expected. - diff --git a/SpatialAveragePooling.lua b/SpatialAveragePooling.lua index 51a4119..e94affe 100644 --- a/SpatialAveragePooling.lua +++ b/SpatialAveragePooling.lua @@ -5,3 +5,7 @@ function SpatialAveragePooling:__init(kW, kH, dW, dH, padW, padH) parent.__init(self, kW, kH, dW, dH, padW, padH) self.mode = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING' end + +function SpatialAveragePooling:__tostring__() + return nn.SpatialAveragePooling.__tostring__(self) +end diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua index bbdffe2..1d6dbf1 100644 --- a/SpatialConvolution.lua +++ b/SpatialConvolution.lua @@ -21,9 +21,11 @@ function SpatialConvolution:__init(nInputPlane, nOutputPlane, 'nInputPlane should be divisible by nGroups') assert(nOutputPlane % self.groups == 0, 'nOutputPlane should be divisible by nGroups') - self.weight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH) - self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH) + self.weight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kH, kW) + self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kH, kW) self:reset() + -- should nil for serialization, the reset will still work + self.reset = nil self.iSize = torch.LongStorage(4):fill(0) end diff --git a/SpatialCrossEntropyCriterion.lua b/SpatialCrossEntropyCriterion.lua new file mode 100644 index 0000000..0c5da87 --- /dev/null +++ b/SpatialCrossEntropyCriterion.lua @@ -0,0 +1,78 @@ +require 'nn' + +local SpatialCrossEntropyCriterion, parent = torch.class('cudnn.SpatialCrossEntropyCriterion', 'nn.Criterion') + +--[[ + This criterion does the SpatialCrossEntropyCriterion across + the feature dimension for a N-channel image of HxW in size. + + It only supports mini-batches (4D input, 3D target) + + It does a LogSoftMax on the input (over the channel dimension), + so no LogSoftMax is needed in the network at the end + + input = batchSize x nClasses x H x W + target = batchSize x H x W +]]-- +function SpatialCrossEntropyCriterion:__init() + parent.__init(self) + self.slsm = cudnn.SpatialLogSoftMax() + self.nll = nn.ClassNLLCriterion() + self.nll.sizeAverage = false + self.sizeAverage = true +end + +local transpose = function(input) + input = input:transpose(2,4):transpose(2,3):contiguous() -- bdhw -> bwhd -> bhwd + input = input:view(input:size(1)*input:size(2)*input:size(3), input:size(4)) + return input +end + +local transposeBack = function(input, originalInput) + input = input:view(originalInput:size(1), originalInput:size(3), + originalInput:size(4), originalInput:size(2)) + input = input:transpose(2,4):transpose(3,4):contiguous() -- bhwd -> bdwh -> bdhw + return input +end + +function SpatialCrossEntropyCriterion:updateOutput(input, target) + assert(input:dim() == 4, 'mini-batch supported only') + assert(target:dim() == 3, 'mini-batch supported only') + assert(input:size(1) == target:size(1), 'input and target should be of same size') + assert(input:size(3) == target:size(2), 'input and target should be of same size') + assert(input:size(4) == target:size(3), 'input and target should be of same size') + -- apply SpatialLogSoftMax to input + self.slsm:updateOutput(input) + + -- fold the height and width dims into the mini-batch dim. + self.nll:updateOutput(transpose(self.slsm.output), target:view(-1)) + self.output = self.nll.output + return self.output +end + +function SpatialCrossEntropyCriterion:updateGradInput(input, target) + assert(input:dim() == 4, 'mini-batch supported only') + assert(target:dim() == 3, 'mini-batch supported only') + assert(input:size(1) == target:size(1), 'input and target should be of same size') + assert(input:size(3) == target:size(2), 'input and target should be of same size') + assert(input:size(4) == target:size(3), 'input and target should be of same size') + + self.nll:updateGradInput(transpose(self.slsm.output), target:view(-1)) + + -- unfold the height and width dims back + self.slsm:updateGradInput(input, transposeBack(self.nll.gradInput, input)) + self.gradInput = self.slsm.gradInput + if self.sizeAverage then + self.gradInput:div(input:size(1)) + end + return self.gradInput +end + +function SpatialCrossEntropyCriterion:type(type) + if type then + self.nll:type(type) + self.slsm:type(type) + end + parent.type(self, type) + return self +end diff --git a/SpatialMaxPooling.lua b/SpatialMaxPooling.lua index e9a7b89..bd6bdbc 100644 --- a/SpatialMaxPooling.lua +++ b/SpatialMaxPooling.lua @@ -4,3 +4,7 @@ function SpatialMaxPooling:__init(kW, kH, dW, dH, padW, padH) parent.__init(self, kW, kH, dW, dH, padW, padH) self.mode = 'CUDNN_POOLING_MAX' end + +function SpatialMaxPooling:__tostring__() + return nn.SpatialMaxPooling.__tostring__(self) +end @@ -567,7 +567,7 @@ cudnnStatus_t cudnnConvolutionBackwardFilter_v3( const void *beta, const cudnnFilterDescriptor_t dwDesc, void *dw ); - + /*********************************************************/ /* helper function to provide the convolution algo that fit best the requirement */ typedef enum @@ -937,7 +937,7 @@ cudnnStatus_t cudnnCreateLRNDescriptor( typedef enum { CUDNN_LRN_MIN_N = 1, /* minimum allowed lrnN */ CUDNN_LRN_MAX_N = 16 } /* maximum allowed lrnN */ LRN_MinMaxFakeEnum; - + /* define CUDNN_LRN_MIN_K 1e-5 -- minimum allowed lrnK */ /* define CUDNN_LRN_MIN_BETA 0.01 -- minimum allowed lrnBeta */ @@ -1228,7 +1228,7 @@ cudnnStatus_t cudnnGetConvolutionNdDescriptor_v2( int strideA[], int upscaleA[], cudnnConvolutionMode_t *mode ); - + cudnnStatus_t cudnnAddTensor_v2( cudnnHandle_t handle, cudnnAddMode_t mode, @@ -1238,7 +1238,7 @@ cudnnStatus_t cudnnAddTensor_v2( const void *beta, cudnnTensorDescriptor_t yDesc, void *y ); - + cudnnStatus_t cudnnConvolutionBackwardFilter_v2( cudnnHandle_t handle, const void *alpha, @@ -1250,7 +1250,7 @@ cudnnStatus_t cudnnConvolutionBackwardFilter_v2( const void *beta, const cudnnFilterDescriptor_t dxDesc, void *dx ); - + cudnnStatus_t cudnnConvolutionBackwardData_v2( cudnnHandle_t handle, const void *alpha, @@ -1264,12 +1264,19 @@ cudnnStatus_t cudnnConvolutionBackwardData_v2( void *dx ); ]] -local ok,err = pcall(function() cudnn.C = ffi.load('libcudnn') end) +local libnames = {'libcudnn.so.4', 'libcudnn.4.dylib'} + +local ok = false +for i=1,#libnames do + ok = pcall(function () cudnn.C = ffi.load(libnames[i]) end) + if ok then break; end +end + if not ok then print(err) - error([['libcudnn not found in library path. + error([['libcudnn (R4) not found in library path. Please install CuDNN from https://developer.nvidia.com/cuDNN -Then make sure all the files named as libcudnn.so* are placed in your library load path (for example /usr/local/lib , or manually add a path to LD_LIBRARY_PATH) +Then make sure files named as libcudnn.so.4 or libcudnn.4.dylib are placed in your library load path (for example /usr/local/lib , or manually add a path to LD_LIBRARY_PATH) ]]) end @@ -111,6 +111,7 @@ include 'SoftMax.lua' include 'LogSoftMax.lua' include 'SpatialCrossMapLRN.lua' include 'SpatialBatchNormalization.lua' +include 'SpatialCrossEntropyCriterion.lua' include 'functional.lua' diff --git a/test/test.lua b/test/test.lua index 0a2fb01..6dbd0d8 100644 --- a/test/test.lua +++ b/test/test.lua @@ -730,6 +730,73 @@ function cudnntest.LogSoftMax_batch() precision_backward, 'error on state (backward) ') end +function cudnntest.SpatialLogSoftMax() + -- batch + local numLabels = math.random(5,10) + local h = math.random(5,10) + local w = math.random(5,10) + local bsz = math.random(3, 7) + local input = torch.zeros(bsz, numLabels, h, w):normal():cuda() + local target = torch.zeros(bsz, numLabels, h, w):normal():cuda() + + local cri = cudnn.SpatialLogSoftMax():cuda() + local gcri = nn.LogSoftMax():cuda() + + local op = cri:forward(input, target) + local gi = cri:backward(input, target) + + local gop = op:clone():zero() + local ggi = gi:clone():zero() + + for i=1,h do + for j=1,w do + local i1 = input[{{}, {}, {i}, {j}}]:contiguous():squeeze() + local t1 = target[{{}, {}, {i}, {j}}]:contiguous():squeeze() + local gop1 = gcri:forward(i1, t1) + local ggi1 = gcri:backward(i1, t1) + gop[{{}, {}, {i}, {j}}]:copy(gop1) + ggi[{{}, {}, {i}, {j}}]:copy(ggi1) + end + end + local err = (gi - ggi):abs():max() + mytester:assertlt(err, precision_backward, 'error in difference between central difference and :backward') + local err = (op - gop):abs():max() + mytester:assertlt(err, precision_backward, 'error in difference between central difference and :backward') +end + +function cudnntest.SpatialCrossEntropyCriterion() + -- batch + local numLabels = math.random(5,10) + local h = math.random(5,10) + local w = math.random(5,10) + local bsz = math.random(3, 7) + local input = torch.zeros(bsz, numLabels, h, w):normal():cuda() + local target = torch.Tensor(bsz, h, w):random(1, numLabels):cuda() + + local cri = cudnn.SpatialCrossEntropyCriterion():cuda() + + local gcri = nn.CrossEntropyCriterion():cuda() + + local op = cri:forward(input, target) + local gi = cri:backward(input, target) + + local ggi = gi:clone():zero() + + for i=1,h do + for j=1,w do + local i1 = input[{{}, {}, {i}, {j}}]:contiguous():squeeze() + local t1 = target[{{}, {i}, {j}}]:contiguous():squeeze() + local gop1 = gcri:forward(i1, t1) + local ggi1 = gcri:backward(i1, t1) + ggi[{{}, {}, {i}, {j}}]:copy(ggi1) + end + end + local err = (gi - ggi):abs():max() + mytester:assertlt(err, precision_backward, 'error in difference between central difference and :backward') + +end + + function cudnntest.functional_bias2D() local bs = math.random(1,32) local from = math.random(1,32) |