Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/soumith/cudnn.torch.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsoumith <soumith@gmail.com>2015-11-06 01:44:15 +0300
committersoumith <soumith@gmail.com>2015-11-06 01:44:15 +0300
commit507c3a35e3bfbba2af102fbead0c2fb41e9db9b0 (patch)
treeadd28ed63809ef677e224128e31706bb9df7dc4f
parent3532bf81ab15df72a6dc900a1788d3e1d3d1fa2e (diff)
integrating changes from master
-rw-r--r--README.md5
-rw-r--r--SpatialAveragePooling.lua4
-rw-r--r--SpatialConvolution.lua6
-rw-r--r--SpatialCrossEntropyCriterion.lua78
-rw-r--r--SpatialMaxPooling.lua4
-rw-r--r--ffi.lua23
-rw-r--r--init.lua1
-rw-r--r--test/test.lua67
8 files changed, 176 insertions, 12 deletions
diff --git a/README.md b/README.md
index 3c4b79f..94278b5 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,8 @@ cudnn.LogSoftMax() -- LogSoftMax across each image (just
cudnn.SpatialSoftMax(fastMode [= false]) -- SoftMax across feature-maps (per spatial location)
cudnn.SpatialLogSoftMax() -- LogSoftMax across feature-maps (per spatial location)
+cudnn.SpatialCrossEntropyCriterion() -- A spatial version of LogSoftMax + ClassNLLCriterion in one shot
+
-- Volumetric inputs (4D or 5D batched mode)
cudnn.VolumetricConvolution(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH, padT, padW, padH)
cudnn.VolumetricMaxPooling(kT, kW, kH, dT, dW, dH, padT, padW, padH)
@@ -68,7 +70,7 @@ R4 Release Notes:
- Rather than resolving v3-v4 diffs, I have imported new cudnn.h with its entirety and converted comments and defines. This should be less error-prone.
- addTensor_v2 uses changed to new AddTensor API.
-R4 TODO:
+R4 TODO:
per-activation BN code needs to be added (new .lua similar to SpatialBN.lua, as per Andrei:
I believe we have at least one thing missing - per-activation BN (Torch implementation in nn.BatchNormalization.lua).
What I believe we have now is an integration of implementation for nn.SpatialBatchNormalization.lua
@@ -77,4 +79,3 @@ This is very similar to SpatialBatchNormalizaiton.lua but should use a different
For Spatial BN normalization is performed over N with 1CHW result and for per-activation it's done over NHW with 1C11 result.
Per-activation BN is only used after non-convolutional layers where spatially-invariant behavior is not expected.
-
diff --git a/SpatialAveragePooling.lua b/SpatialAveragePooling.lua
index 51a4119..e94affe 100644
--- a/SpatialAveragePooling.lua
+++ b/SpatialAveragePooling.lua
@@ -5,3 +5,7 @@ function SpatialAveragePooling:__init(kW, kH, dW, dH, padW, padH)
parent.__init(self, kW, kH, dW, dH, padW, padH)
self.mode = 'CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING'
end
+
+function SpatialAveragePooling:__tostring__()
+ return nn.SpatialAveragePooling.__tostring__(self)
+end
diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua
index bbdffe2..1d6dbf1 100644
--- a/SpatialConvolution.lua
+++ b/SpatialConvolution.lua
@@ -21,9 +21,11 @@ function SpatialConvolution:__init(nInputPlane, nOutputPlane,
'nInputPlane should be divisible by nGroups')
assert(nOutputPlane % self.groups == 0,
'nOutputPlane should be divisible by nGroups')
- self.weight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
- self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kW, kH)
+ self.weight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kH, kW)
+ self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane/self.groups, kH, kW)
self:reset()
+ -- should nil for serialization, the reset will still work
+ self.reset = nil
self.iSize = torch.LongStorage(4):fill(0)
end
diff --git a/SpatialCrossEntropyCriterion.lua b/SpatialCrossEntropyCriterion.lua
new file mode 100644
index 0000000..0c5da87
--- /dev/null
+++ b/SpatialCrossEntropyCriterion.lua
@@ -0,0 +1,78 @@
+require 'nn'
+
+local SpatialCrossEntropyCriterion, parent = torch.class('cudnn.SpatialCrossEntropyCriterion', 'nn.Criterion')
+
+--[[
+ This criterion does the SpatialCrossEntropyCriterion across
+ the feature dimension for a N-channel image of HxW in size.
+
+ It only supports mini-batches (4D input, 3D target)
+
+ It does a LogSoftMax on the input (over the channel dimension),
+ so no LogSoftMax is needed in the network at the end
+
+ input = batchSize x nClasses x H x W
+ target = batchSize x H x W
+]]--
+function SpatialCrossEntropyCriterion:__init()
+ parent.__init(self)
+ self.slsm = cudnn.SpatialLogSoftMax()
+ self.nll = nn.ClassNLLCriterion()
+ self.nll.sizeAverage = false
+ self.sizeAverage = true
+end
+
+local transpose = function(input)
+ input = input:transpose(2,4):transpose(2,3):contiguous() -- bdhw -> bwhd -> bhwd
+ input = input:view(input:size(1)*input:size(2)*input:size(3), input:size(4))
+ return input
+end
+
+local transposeBack = function(input, originalInput)
+ input = input:view(originalInput:size(1), originalInput:size(3),
+ originalInput:size(4), originalInput:size(2))
+ input = input:transpose(2,4):transpose(3,4):contiguous() -- bhwd -> bdwh -> bdhw
+ return input
+end
+
+function SpatialCrossEntropyCriterion:updateOutput(input, target)
+ assert(input:dim() == 4, 'mini-batch supported only')
+ assert(target:dim() == 3, 'mini-batch supported only')
+ assert(input:size(1) == target:size(1), 'input and target should be of same size')
+ assert(input:size(3) == target:size(2), 'input and target should be of same size')
+ assert(input:size(4) == target:size(3), 'input and target should be of same size')
+ -- apply SpatialLogSoftMax to input
+ self.slsm:updateOutput(input)
+
+ -- fold the height and width dims into the mini-batch dim.
+ self.nll:updateOutput(transpose(self.slsm.output), target:view(-1))
+ self.output = self.nll.output
+ return self.output
+end
+
+function SpatialCrossEntropyCriterion:updateGradInput(input, target)
+ assert(input:dim() == 4, 'mini-batch supported only')
+ assert(target:dim() == 3, 'mini-batch supported only')
+ assert(input:size(1) == target:size(1), 'input and target should be of same size')
+ assert(input:size(3) == target:size(2), 'input and target should be of same size')
+ assert(input:size(4) == target:size(3), 'input and target should be of same size')
+
+ self.nll:updateGradInput(transpose(self.slsm.output), target:view(-1))
+
+ -- unfold the height and width dims back
+ self.slsm:updateGradInput(input, transposeBack(self.nll.gradInput, input))
+ self.gradInput = self.slsm.gradInput
+ if self.sizeAverage then
+ self.gradInput:div(input:size(1))
+ end
+ return self.gradInput
+end
+
+function SpatialCrossEntropyCriterion:type(type)
+ if type then
+ self.nll:type(type)
+ self.slsm:type(type)
+ end
+ parent.type(self, type)
+ return self
+end
diff --git a/SpatialMaxPooling.lua b/SpatialMaxPooling.lua
index e9a7b89..bd6bdbc 100644
--- a/SpatialMaxPooling.lua
+++ b/SpatialMaxPooling.lua
@@ -4,3 +4,7 @@ function SpatialMaxPooling:__init(kW, kH, dW, dH, padW, padH)
parent.__init(self, kW, kH, dW, dH, padW, padH)
self.mode = 'CUDNN_POOLING_MAX'
end
+
+function SpatialMaxPooling:__tostring__()
+ return nn.SpatialMaxPooling.__tostring__(self)
+end
diff --git a/ffi.lua b/ffi.lua
index c8ee963..e74c5cb 100644
--- a/ffi.lua
+++ b/ffi.lua
@@ -567,7 +567,7 @@ cudnnStatus_t cudnnConvolutionBackwardFilter_v3(
const void *beta,
const cudnnFilterDescriptor_t dwDesc,
void *dw );
-
+
/*********************************************************/
/* helper function to provide the convolution algo that fit best the requirement */
typedef enum
@@ -937,7 +937,7 @@ cudnnStatus_t cudnnCreateLRNDescriptor(
typedef enum { CUDNN_LRN_MIN_N = 1, /* minimum allowed lrnN */
CUDNN_LRN_MAX_N = 16 } /* maximum allowed lrnN */
LRN_MinMaxFakeEnum;
-
+
/* define CUDNN_LRN_MIN_K 1e-5 -- minimum allowed lrnK */
/* define CUDNN_LRN_MIN_BETA 0.01 -- minimum allowed lrnBeta */
@@ -1228,7 +1228,7 @@ cudnnStatus_t cudnnGetConvolutionNdDescriptor_v2(
int strideA[],
int upscaleA[],
cudnnConvolutionMode_t *mode );
-
+
cudnnStatus_t cudnnAddTensor_v2(
cudnnHandle_t handle,
cudnnAddMode_t mode,
@@ -1238,7 +1238,7 @@ cudnnStatus_t cudnnAddTensor_v2(
const void *beta,
cudnnTensorDescriptor_t yDesc,
void *y );
-
+
cudnnStatus_t cudnnConvolutionBackwardFilter_v2(
cudnnHandle_t handle,
const void *alpha,
@@ -1250,7 +1250,7 @@ cudnnStatus_t cudnnConvolutionBackwardFilter_v2(
const void *beta,
const cudnnFilterDescriptor_t dxDesc,
void *dx );
-
+
cudnnStatus_t cudnnConvolutionBackwardData_v2(
cudnnHandle_t handle,
const void *alpha,
@@ -1264,12 +1264,19 @@ cudnnStatus_t cudnnConvolutionBackwardData_v2(
void *dx );
]]
-local ok,err = pcall(function() cudnn.C = ffi.load('libcudnn') end)
+local libnames = {'libcudnn.so.4', 'libcudnn.4.dylib'}
+
+local ok = false
+for i=1,#libnames do
+ ok = pcall(function () cudnn.C = ffi.load(libnames[i]) end)
+ if ok then break; end
+end
+
if not ok then
print(err)
- error([['libcudnn not found in library path.
+ error([['libcudnn (R4) not found in library path.
Please install CuDNN from https://developer.nvidia.com/cuDNN
-Then make sure all the files named as libcudnn.so* are placed in your library load path (for example /usr/local/lib , or manually add a path to LD_LIBRARY_PATH)
+Then make sure files named as libcudnn.so.4 or libcudnn.4.dylib are placed in your library load path (for example /usr/local/lib , or manually add a path to LD_LIBRARY_PATH)
]])
end
diff --git a/init.lua b/init.lua
index 105f9d7..dea1d3d 100644
--- a/init.lua
+++ b/init.lua
@@ -111,6 +111,7 @@ include 'SoftMax.lua'
include 'LogSoftMax.lua'
include 'SpatialCrossMapLRN.lua'
include 'SpatialBatchNormalization.lua'
+include 'SpatialCrossEntropyCriterion.lua'
include 'functional.lua'
diff --git a/test/test.lua b/test/test.lua
index 0a2fb01..6dbd0d8 100644
--- a/test/test.lua
+++ b/test/test.lua
@@ -730,6 +730,73 @@ function cudnntest.LogSoftMax_batch()
precision_backward, 'error on state (backward) ')
end
+function cudnntest.SpatialLogSoftMax()
+ -- batch
+ local numLabels = math.random(5,10)
+ local h = math.random(5,10)
+ local w = math.random(5,10)
+ local bsz = math.random(3, 7)
+ local input = torch.zeros(bsz, numLabels, h, w):normal():cuda()
+ local target = torch.zeros(bsz, numLabels, h, w):normal():cuda()
+
+ local cri = cudnn.SpatialLogSoftMax():cuda()
+ local gcri = nn.LogSoftMax():cuda()
+
+ local op = cri:forward(input, target)
+ local gi = cri:backward(input, target)
+
+ local gop = op:clone():zero()
+ local ggi = gi:clone():zero()
+
+ for i=1,h do
+ for j=1,w do
+ local i1 = input[{{}, {}, {i}, {j}}]:contiguous():squeeze()
+ local t1 = target[{{}, {}, {i}, {j}}]:contiguous():squeeze()
+ local gop1 = gcri:forward(i1, t1)
+ local ggi1 = gcri:backward(i1, t1)
+ gop[{{}, {}, {i}, {j}}]:copy(gop1)
+ ggi[{{}, {}, {i}, {j}}]:copy(ggi1)
+ end
+ end
+ local err = (gi - ggi):abs():max()
+ mytester:assertlt(err, precision_backward, 'error in difference between central difference and :backward')
+ local err = (op - gop):abs():max()
+ mytester:assertlt(err, precision_backward, 'error in difference between central difference and :backward')
+end
+
+function cudnntest.SpatialCrossEntropyCriterion()
+ -- batch
+ local numLabels = math.random(5,10)
+ local h = math.random(5,10)
+ local w = math.random(5,10)
+ local bsz = math.random(3, 7)
+ local input = torch.zeros(bsz, numLabels, h, w):normal():cuda()
+ local target = torch.Tensor(bsz, h, w):random(1, numLabels):cuda()
+
+ local cri = cudnn.SpatialCrossEntropyCriterion():cuda()
+
+ local gcri = nn.CrossEntropyCriterion():cuda()
+
+ local op = cri:forward(input, target)
+ local gi = cri:backward(input, target)
+
+ local ggi = gi:clone():zero()
+
+ for i=1,h do
+ for j=1,w do
+ local i1 = input[{{}, {}, {i}, {j}}]:contiguous():squeeze()
+ local t1 = target[{{}, {i}, {j}}]:contiguous():squeeze()
+ local gop1 = gcri:forward(i1, t1)
+ local ggi1 = gcri:backward(i1, t1)
+ ggi[{{}, {}, {i}, {j}}]:copy(ggi1)
+ end
+ end
+ local err = (gi - ggi):abs():max()
+ mytester:assertlt(err, precision_backward, 'error in difference between central difference and :backward')
+
+end
+
+
function cudnntest.functional_bias2D()
local bs = math.random(1,32)
local from = math.random(1,32)