integrating changes from master

author: soumith <soumith@gmail.com> 2015-11-06 01:44:15 +0300
committer: soumith <soumith@gmail.com> 2015-11-06 01:44:15 +0300
commit: 507c3a35e3bfbba2af102fbead0c2fb41e9db9b0 (patch)
tree: add28ed63809ef677e224128e31706bb9df7dc4f
parent: 3532bf81ab15df72a6dc900a1788d3e1d3d1fa2e (diff)
8 files changed, 176 insertions, 12 deletions
diff --git a/README.md b/README.md
index 3c4b79f..94278b5 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,8 @@ cudnn.LogSoftMax()                         -- LogSoftMax across each image (just
 cudnn.SpatialSoftMax(fastMode [= false])   -- SoftMax across feature-maps (per spatial location)
 cudnn.SpatialLogSoftMax()                  -- LogSoftMax across feature-maps (per spatial location)
 
+cudnn.SpatialCrossEntropyCriterion()       -- A spatial version of LogSoftMax + ClassNLLCriterion in one shot
+
 -- Volumetric inputs (4D or 5D batched mode)
 cudnn.VolumetricConvolution(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH)
 cudnn.VolumetricMaxPooling(kT, kW, kH, dT, dW, dH, padT, padW, padH)
@@ -68,7 +70,7 @@ R4 Release Notes:
 - Rather than resolving v3-v4 diffs, I have imported new cudnn.h with its entirety and converted comments and defines. This should be less error-prone.
 - addTensor_v2 uses changed to new AddTensor API.
 
-R4 TODO: 
+R4 TODO:
 per-activation BN code needs to be added (new .lua similar to SpatialBN.lua, as per Andrei:
 I believe we have at least one thing missing - per-activation BN (Torch implementation in nn.BatchNormalization.lua).
 What I believe we have now is an integration of implementation for nn.SpatialBatchNormalization.lua
@@ -77,4 +79,3 @@ This is very similar to SpatialBatchNormalizaiton.lua but should use a different
 For Spatial BN normalization is performed over N with 1CHW result and for per-activation it's done over NHW with 1C11 result.
 
 Per-activation BN is only used after non-convolutional layers where spatially-invariant behavior is not expected.
-
diff --git a/SpatialAveragePooling.lua b/SpatialAveragePooling.lua
index 51a4119..e94affe 100644
--- a/SpatialAveragePooling.lua
+++ b/SpatialAveragePooling.lua
@@ -5,3 +5,7 @@ function SpatialAveragePooling:__init(kW, kH, dW, dH, padW, padH)
    parent.__init(self, kW, kH, dW, dH, padW, padH)
    self.mode = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
 end
+
+function SpatialAveragePooling:__tostring__()
+   return nn.SpatialAveragePooling.__tostring__(self)
+end
diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua
index bbdffe2..1d6dbf1 100644
--- a/SpatialConvolution.lua
+++ b/SpatialConvolution.lua
@@ -21,9 +21,11 @@ function SpatialConvolution:__init(nInputPlane, nOutputPlane,
            'nInputPlane should be divisible by nGroups')
     assert(nOutputPlane % self.groups == 0,
            'nOutputPlane should be divisible by nGroups')
-    self.weight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
-    self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
+    self.weight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kH, kW)
+    self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kH, kW)
     self:reset()
+    -- should nil for serialization, the reset will still work
+    self.reset = nil
     self.iSize = torch.LongStorage(4):fill(0)
 end
 
diff --git a/SpatialCrossEntropyCriterion.lua b/SpatialCrossEntropyCriterion.lua
new file mode 100644
index 0000000..0c5da87
--- /dev/null
+++ b/SpatialCrossEntropyCriterion.lua
@@ -0,0 +1,78 @@
+require 'nn'
+
+local SpatialCrossEntropyCriterion, parent = torch.class('cudnn.SpatialCrossEntropyCriterion', 'nn.Criterion')
+
+--[[
+   This criterion does the SpatialCrossEntropyCriterion across
+   the feature dimension for a N-channel image of HxW in size.
+
+   It only supports mini-batches (4D input, 3D target)
+
+   It does a LogSoftMax on the input (over the channel dimension),
+   so no LogSoftMax is needed in the network at the end
+
+   input = batchSize x nClasses x H x W
+   target = batchSize x H x W
+]]--
+function SpatialCrossEntropyCriterion:__init()
+   parent.__init(self)
+   self.slsm = cudnn.SpatialLogSoftMax()
+   self.nll = nn.ClassNLLCriterion()
+   self.nll.sizeAverage = false
+   self.sizeAverage = true
+end
+
+local transpose = function(input)
+   input = input:transpose(2,4):transpose(2,3):contiguous() -- bdhw -> bwhd -> bhwd
+   input = input:view(input:size(1)*input:size(2)*input:size(3), input:size(4))
+   return input
+end
+
+local transposeBack = function(input, originalInput)
+   input = input:view(originalInput:size(1), originalInput:size(3),
+                      originalInput:size(4), originalInput:size(2))
+   input = input:transpose(2,4):transpose(3,4):contiguous()  -- bhwd -> bdwh -> bdhw
+   return input
+end
+
+function SpatialCrossEntropyCriterion:updateOutput(input, target)
+   assert(input:dim() == 4, 'mini-batch supported only')
+   assert(target:dim() == 3, 'mini-batch supported only')
+   assert(input:size(1) == target:size(1), 'input and target should be of same size')
+   assert(input:size(3) == target:size(2), 'input and target should be of same size')
+   assert(input:size(4) == target:size(3), 'input and target should be of same size')
+   -- apply SpatialLogSoftMax to input
+   self.slsm:updateOutput(input)
+
+   -- fold the height and width dims into the mini-batch dim.
+   self.nll:updateOutput(transpose(self.slsm.output), target:view(-1))
+   self.output = self.nll.output
+   return self.output
+end
+
+function SpatialCrossEntropyCriterion:updateGradInput(input, target)
+   assert(input:dim() == 4, 'mini-batch supported only')
+   assert(target:dim() == 3, 'mini-batch supported only')
+   assert(input:size(1) == target:size(1), 'input and target should be of same size')
+   assert(input:size(3) == target:size(2), 'input and target should be of same size')
+   assert(input:size(4) == target:size(3), 'input and target should be of same size')
+
+   self.nll:updateGradInput(transpose(self.slsm.output), target:view(-1))
+
+   -- unfold the height and width dims back
+   self.slsm:updateGradInput(input, transposeBack(self.nll.gradInput, input))
+   self.gradInput = self.slsm.gradInput
+   if self.sizeAverage then
+      self.gradInput:div(input:size(1))
+   end
+   return self.gradInput
+end
+
+function SpatialCrossEntropyCriterion:type(type)
+   if type then
+      self.nll:type(type)
+      self.slsm:type(type)
+   end
+   parent.type(self, type)
+   return self
+end
diff --git a/SpatialMaxPooling.lua b/SpatialMaxPooling.lua
index e9a7b89..bd6bdbc 100644
--- a/SpatialMaxPooling.lua
+++ b/SpatialMaxPooling.lua
@@ -4,3 +4,7 @@ function SpatialMaxPooling:__init(kW, kH, dW, dH, padW, padH)
    parent.__init(self, kW, kH, dW, dH, padW, padH)
    self.mode = 'CUDNN_POOLING_MAX'
 end
+
+function SpatialMaxPooling:__tostring__()
+   return nn.SpatialMaxPooling.__tostring__(self)
+end
diff --git a/ffi.lua b/ffi.lua
index c8ee963..e74c5cb 100644
--- a/ffi.lua
+++ b/ffi.lua
@@ -567,7 +567,7 @@ cudnnStatus_t             cudnnConvolutionBackwardFilter_v3(
                                 const void                         *beta,
                                 const cudnnFilterDescriptor_t       dwDesc,
                                 void                               *dw );
-                                                           
+
 /*********************************************************/
 /* helper function to provide the convolution algo that fit best the requirement */
 typedef enum
@@ -937,7 +937,7 @@ cudnnStatus_t             cudnnCreateLRNDescriptor(
 typedef enum { CUDNN_LRN_MIN_N     = 1,       /*  minimum allowed lrnN */
                CUDNN_LRN_MAX_N     = 16 }      /*  maximum allowed lrnN */
              LRN_MinMaxFakeEnum;
- 
+
 /*  define CUDNN_LRN_MIN_K     1e-5    -- minimum allowed lrnK */
 /*  define CUDNN_LRN_MIN_BETA  0.01    -- minimum allowed lrnBeta */
 
@@ -1228,7 +1228,7 @@ cudnnStatus_t             cudnnGetConvolutionNdDescriptor_v2(
                                 int                                 strideA[],
                                 int                                 upscaleA[],
                                 cudnnConvolutionMode_t             *mode );
-                                                         
+
 cudnnStatus_t             cudnnAddTensor_v2(
                                 cudnnHandle_t                       handle,
                                 cudnnAddMode_t                      mode,
@@ -1238,7 +1238,7 @@ cudnnStatus_t             cudnnAddTensor_v2(
                                 const void                         *beta,
                                 cudnnTensorDescriptor_t             yDesc,
                                 void                               *y );
-                                            
+
 cudnnStatus_t             cudnnConvolutionBackwardFilter_v2(
                                 cudnnHandle_t                       handle,
                                 const void                         *alpha,
@@ -1250,7 +1250,7 @@ cudnnStatus_t             cudnnConvolutionBackwardFilter_v2(
                                 const void                         *beta,
                                 const cudnnFilterDescriptor_t       dxDesc,
                                 void                               *dx );
-                                                           
+
 cudnnStatus_t             cudnnConvolutionBackwardData_v2(
                                 cudnnHandle_t                       handle,
                                 const void                         *alpha,
@@ -1264,12 +1264,19 @@ cudnnStatus_t             cudnnConvolutionBackwardData_v2(
                                 void                               *dx );
 ]]
 
-local ok,err = pcall(function() cudnn.C = ffi.load('libcudnn') end)
+local libnames = {'libcudnn.so.4', 'libcudnn.4.dylib'}
+
+local ok = false
+for i=1,#libnames do
+   ok = pcall(function () cudnn.C = ffi.load(libnames[i]) end)
+   if ok then break; end
+end
+
 if not ok then
    print(err)
-   error([['libcudnn not found in library path.
+   error([['libcudnn (R4) not found in library path.
 Please install CuDNN from https://developer.nvidia.com/cuDNN
-Then make sure all the files named as libcudnn.so* are placed in your library load path (for example /usr/local/lib , or manually add a path to LD_LIBRARY_PATH)
+Then make sure files named as libcudnn.so.4 or libcudnn.4.dylib are placed in your library load path (for example /usr/local/lib , or manually add a path to LD_LIBRARY_PATH)
 ]])
 end
 
diff --git a/init.lua b/init.lua
index 105f9d7..dea1d3d 100644
--- a/init.lua
+++ b/init.lua
@@ -111,6 +111,7 @@ include 'SoftMax.lua'
 include 'LogSoftMax.lua'
 include 'SpatialCrossMapLRN.lua'
 include 'SpatialBatchNormalization.lua'
+include 'SpatialCrossEntropyCriterion.lua'
 
 include 'functional.lua'
 
diff --git a/test/test.lua b/test/test.lua
index 0a2fb01..6dbd0d8 100644
--- a/test/test.lua
+++ b/test/test.lua
@@ -730,6 +730,73 @@ function cudnntest.LogSoftMax_batch()
                      precision_backward, 'error on state (backward) ')
 end
 
+function cudnntest.SpatialLogSoftMax()
+   -- batch
+   local numLabels = math.random(5,10)
+   local h = math.random(5,10)
+   local w = math.random(5,10)
+   local bsz = math.random(3, 7)
+   local input = torch.zeros(bsz, numLabels, h, w):normal():cuda()
+   local target = torch.zeros(bsz, numLabels, h, w):normal():cuda()
+
+   local cri = cudnn.SpatialLogSoftMax():cuda()
+   local gcri = nn.LogSoftMax():cuda()
+
+   local op = cri:forward(input, target)
+   local gi = cri:backward(input, target)
+
+   local gop = op:clone():zero()
+   local ggi = gi:clone():zero()
+
+   for i=1,h do
+      for j=1,w do
+         local i1 = input[{{}, {}, {i}, {j}}]:contiguous():squeeze()
+         local t1 = target[{{}, {}, {i}, {j}}]:contiguous():squeeze()
+         local gop1 = gcri:forward(i1, t1)
+         local ggi1 = gcri:backward(i1, t1)
+         gop[{{}, {}, {i}, {j}}]:copy(gop1)
+         ggi[{{}, {}, {i}, {j}}]:copy(ggi1)
+      end
+   end
+   local err = (gi - ggi):abs():max()
+   mytester:assertlt(err, precision_backward, 'error in difference between central difference and :backward')
+   local err = (op - gop):abs():max()
+   mytester:assertlt(err, precision_backward, 'error in difference between central difference and :backward')
+end
+
+function cudnntest.SpatialCrossEntropyCriterion()
+   -- batch
+   local numLabels = math.random(5,10)
+   local h = math.random(5,10)
+   local w = math.random(5,10)
+   local bsz = math.random(3, 7)
+   local input = torch.zeros(bsz, numLabels, h, w):normal():cuda()
+   local target = torch.Tensor(bsz, h, w):random(1, numLabels):cuda()
+
+   local cri = cudnn.SpatialCrossEntropyCriterion():cuda()
+
+   local gcri = nn.CrossEntropyCriterion():cuda()
+
+   local op = cri:forward(input, target)
+   local gi = cri:backward(input, target)
+
+   local ggi = gi:clone():zero()
+
+   for i=1,h do
+      for j=1,w do
+         local i1 = input[{{}, {}, {i}, {j}}]:contiguous():squeeze()
+         local t1 = target[{{}, {i}, {j}}]:contiguous():squeeze()
+         local gop1 = gcri:forward(i1, t1)
+         local ggi1 = gcri:backward(i1, t1)
+         ggi[{{}, {}, {i}, {j}}]:copy(ggi1)
+      end
+   end
+   local err = (gi - ggi):abs():max()
+   mytester:assertlt(err, precision_backward, 'error in difference between central difference and :backward')
+
+end
+
+
 function cudnntest.functional_bias2D()
    local bs = math.random(1,32)
    local from = math.random(1,32)
author	soumith <soumith@gmail.com>	2015-11-06 01:44:15 +0300
committer	soumith <soumith@gmail.com>	2015-11-06 01:44:15 +0300
commit	507c3a35e3bfbba2af102fbead0c2fb41e9db9b0 (patch)
tree	add28ed63809ef677e224128e31706bb9df7dc4f
parent	3532bf81ab15df72a6dc900a1788d3e1d3d1fa2e (diff)