added doc + test case for CrossEntropyCriterion

author: Ronan Collobert <ronan@collobert.com> 2015-03-13 21:56:48 +0300
committer: Ronan Collobert <ronan@collobert.com> 2015-03-13 21:56:48 +0300
commit: aec8e83dc8e7183008b6f989adeb27c8ef31e67d (patch)
tree: b3970fb968e8d142b163d6c0a85c1cb9b83424ac
parent: cef0cb0cba92f88d8bf076e6fb4088503a2b7845 (diff)
3 files changed, 81 insertions, 18 deletions
diff --git a/CrossEntropyCriterion.lua b/CrossEntropyCriterion.lua
index 1350afc..2b3c78c 100644
--- a/CrossEntropyCriterion.lua
+++ b/CrossEntropyCriterion.lua
@@ -2,8 +2,8 @@ local CrossEntropyCriterion, Criterion = torch.class('nn.CrossEntropyCriterion',
 
 function CrossEntropyCriterion:__init(weights)
    Criterion.__init(self)
-   self.nll = nn.ClassNLLCriterion(weights)
    self.lsm = nn.LogSoftMax()
+   self.nll = nn.ClassNLLCriterion(weights)
 end
 
 function CrossEntropyCriterion:updateOutput(input, target)
@@ -21,7 +21,7 @@ function CrossEntropyCriterion:updateGradInput(input, target)
    target = type(target) == 'number' and target or target:squeeze()
    self.nll:updateGradInput(self.lsm.output, target)
    self.lsm:updateGradInput(input, self.nll.gradInput)
-   self.gradInput:view(self.nll.gradInput, size)
+   self.gradInput:view(self.lsm.gradInput, size)
    return self.gradInput
 end
 
diff --git a/doc/criterion.md b/doc/criterion.md
index 457fd05..eade7f7 100755
--- a/doc/criterion.md
+++ b/doc/criterion.md
@@ -5,8 +5,9 @@ Criterions are helpful to train a neural network. Given an input and a
 target, they compute a gradient according to a given loss
 function. [AbsCriterion](#nn.AbsCriterion) and
 [MSECriterion](#nn.MSECriterion) are perfect for regression problems, while
-[ClassNLLCriterion](#nn.ClassNLLCriterion) is the criterion of choice when
-dealing with classification.
+[ClassNLLCriterion](#nn.ClassNLLCriterion) or
+[CrossEntropyCriterion](#nn.CrossEntropyCriterion) are the criteria of
+choice when dealing with classification.
 
 Criterions are [serializable](https://github.com/torch/torch7/blob/master/doc/file.md#serialization-methods).
 
@@ -79,15 +80,17 @@ criterion = nn.ClassNLLCriterion(weights)
 ```
 
 The negative log likelihood criterion. It is useful to train a classication
-problem with `n` classes. 
-If provided, the optional argument `weights` should be a 1D Tensor assigning weight to each of the classes. This is particularly useful when you have an unbalanced training set.
-
-The `input` given through a `forward()` is
-expected to contain _log-probabilities_ of each class: `input` has to be a
-1D tensor of size `n`. 
-Obtaining log-probabilities in a neural network is
-easily achieved by adding a [LogSoftMax](#nn.LogSoftMax) layer in the last
-layer of your neural network.
+problem with `n` classes.  If provided, the optional argument `weights`
+should be a 1D Tensor assigning weight to each of the classes. This is
+particularly useful when you have an unbalanced training set.
+
+The `input` given through a `forward()` is expected to contain
+_log-probabilities_ of each class: `input` has to be a 1D tensor of size
+`n`.  Obtaining log-probabilities in a neural network is easily achieved by
+adding a [LogSoftMax](#nn.LogSoftMax) layer in the last layer of your
+neural network. You may use
+[CrossEntropyCriterion](#nn.CrossEntropyCriterion) instead, if you prefer
+not to add an extra layer to your network.
 This criterion expect a class index (1 to the number of class) as `target`
 when calling [forward(input, target)](#nn.CriterionForward) and
 [backward(input, target)](#nn.CriterionBackward).
@@ -119,6 +122,39 @@ function gradUpdate(mlp,x,y,learningRate)
 end
 ```
 
+<a name="nn.CrossEntropyCriterion"/>
+## CrossEntropyCriterion ##
+
+```lua
+criterion = nn.CrossEntropyCriterion(weights)
+```
+
+This criterion combines [LogSoftMax](#nn.LogSoftMax) and
+[CrossEntropyCriterion](#nn.CrossEntropyCriterion) in one single class.
+
+It is useful to train a classication problem with `n` classes.  If
+provided, the optional argument `weights` should be a 1D Tensor assigning
+weight to each of the classes. This is particularly useful when you have an
+unbalanced training set.
+
+The `input` given through a `forward()` is expected to contain scores for
+each class: `input` has to be a 1D tensor of size `n`. This criterion
+expect a class index (1 to the number of class) as `target` when calling
+[forward(input, target)](#nn.CriterionForward) and
+[backward(input, target)](#nn.CriterionBackward).
+
+The loss can be described as:
+
+```lua
+loss(x, class) = forward(x, class) = -log( e^x[class] / (\sum_j e^x[j]) )
+                                   = -x[class] + log( \sum_j e^x[j] )
+```
+or in the case of the `weights` argument being specified:
+
+```lua
+loss(x, class) = forward(x, class) = weights[class]*( -x[class] + log( \sum_j e^x[j] ) )
+```
+
 <a name="nn.DistKLDivCriterion"/>
 ## DistKLDivCriterion ##
 
diff --git a/test.lua b/test.lua
index 6174db2..3661d03 100644
--- a/test.lua
+++ b/test.lua
@@ -723,19 +723,21 @@ local function criterionJacobianTest1D(cri, input, target)
    local dfdx = cri:backward(input, target)
    -- for each input perturbation, do central difference
    local centraldiff_dfdx = torch.Tensor():resizeAs(dfdx)
-   for i=1,input:size(1) do
+   local input_s = input:storage()
+   local centraldiff_dfdx_s = centraldiff_dfdx:storage()
+   for i=1,input:nElement() do
       -- f(xi + h)
-      input[i] = input[i] + eps
+      input_s[i] = input_s[i] + eps
       local fx1 = cri:forward(input, target)
       -- f(xi - h)
-      input[i] = input[i] - 2*eps
+      input_s[i] = input_s[i] - 2*eps
       local fx2 = cri:forward(input, target)
       -- f'(xi) = (f(xi + h) - f(xi - h)) / 2h
       local cdfx = (fx1 - fx2) / (2*eps)
       -- store f' in appropriate place
-      centraldiff_dfdx[i] = cdfx
+      centraldiff_dfdx_s[i] = cdfx
       -- reset input[i]
-      input[i] = input[i] + eps
+      input_s[i] = input_s[i] + eps
    end
 
    -- compare centraldiff_dfdx with :backward()
@@ -804,6 +806,31 @@ function nntest.ClassNLLCriterion()
    criterionJacobianTest1D(cri, input, target)
 end
 
+function nntest.CrossEntropyCriterion()
+   -- stochastic
+   local numLabels = math.random(5, 10)
+   local input = torch.zeros(numLabels)
+   local target = torch.random(1, numLabels)
+
+   local cri = nn.CrossEntropyCriterion()
+   criterionJacobianTest1D(cri, input, target)
+
+   -- batch
+   local numLabels = math.random(5,10)
+   local bsz = math.random(3, 7)
+   local input = torch.zeros(bsz, numLabels)
+   local target = torch.Tensor(bsz):random(1, numLabels)
+
+   local cri = nn.CrossEntropyCriterion()
+   criterionJacobianTest1D(cri, input, target)
+
+   -- with weights
+   local weights = torch.rand(numLabels)
+   weights = weights / weights:sum()
+   cri = nn.CrossEntropyCriterion(weights)
+   criterionJacobianTest1D(cri, input, target)
+end
+
 function nntest.LogSigmoid()
    local ini = math.random(3,5)
    local inj = math.random(3,5)
author	Ronan Collobert <ronan@collobert.com>	2015-03-13 21:56:48 +0300
committer	Ronan Collobert <ronan@collobert.com>	2015-03-13 21:56:48 +0300
commit	aec8e83dc8e7183008b6f989adeb27c8ef31e67d (patch)
tree	b3970fb968e8d142b163d6c0a85c1cb9b83424ac
parent	cef0cb0cba92f88d8bf076e6fb4088503a2b7845 (diff)