Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/nn.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRonan Collobert <ronan@collobert.com>2015-03-13 21:56:48 +0300
committerRonan Collobert <ronan@collobert.com>2015-03-13 21:56:48 +0300
commitaec8e83dc8e7183008b6f989adeb27c8ef31e67d (patch)
treeb3970fb968e8d142b163d6c0a85c1cb9b83424ac
parentcef0cb0cba92f88d8bf076e6fb4088503a2b7845 (diff)
added doc + test case for CrossEntropyCriterion
-rw-r--r--CrossEntropyCriterion.lua4
-rwxr-xr-xdoc/criterion.md58
-rw-r--r--test.lua37
3 files changed, 81 insertions, 18 deletions
diff --git a/CrossEntropyCriterion.lua b/CrossEntropyCriterion.lua
index 1350afc..2b3c78c 100644
--- a/CrossEntropyCriterion.lua
+++ b/CrossEntropyCriterion.lua
@@ -2,8 +2,8 @@ local CrossEntropyCriterion, Criterion = torch.class('nn.CrossEntropyCriterion',
function CrossEntropyCriterion:__init(weights)
Criterion.__init(self)
- self.nll = nn.ClassNLLCriterion(weights)
self.lsm = nn.LogSoftMax()
+ self.nll = nn.ClassNLLCriterion(weights)
end
function CrossEntropyCriterion:updateOutput(input, target)
@@ -21,7 +21,7 @@ function CrossEntropyCriterion:updateGradInput(input, target)
target = type(target) == 'number' and target or target:squeeze()
self.nll:updateGradInput(self.lsm.output, target)
self.lsm:updateGradInput(input, self.nll.gradInput)
- self.gradInput:view(self.nll.gradInput, size)
+ self.gradInput:view(self.lsm.gradInput, size)
return self.gradInput
end
diff --git a/doc/criterion.md b/doc/criterion.md
index 457fd05..eade7f7 100755
--- a/doc/criterion.md
+++ b/doc/criterion.md
@@ -5,8 +5,9 @@ Criterions are helpful to train a neural network. Given an input and a
target, they compute a gradient according to a given loss
function. [AbsCriterion](#nn.AbsCriterion) and
[MSECriterion](#nn.MSECriterion) are perfect for regression problems, while
-[ClassNLLCriterion](#nn.ClassNLLCriterion) is the criterion of choice when
-dealing with classification.
+[ClassNLLCriterion](#nn.ClassNLLCriterion) or
+[CrossEntropyCriterion](#nn.CrossEntropyCriterion) are the criteria of
+choice when dealing with classification.
Criterions are [serializable](https://github.com/torch/torch7/blob/master/doc/file.md#serialization-methods).
@@ -79,15 +80,17 @@ criterion = nn.ClassNLLCriterion(weights)
```
The negative log likelihood criterion. It is useful to train a classication
-problem with `n` classes.
-If provided, the optional argument `weights` should be a 1D Tensor assigning weight to each of the classes. This is particularly useful when you have an unbalanced training set.
-
-The `input` given through a `forward()` is
-expected to contain _log-probabilities_ of each class: `input` has to be a
-1D tensor of size `n`.
-Obtaining log-probabilities in a neural network is
-easily achieved by adding a [LogSoftMax](#nn.LogSoftMax) layer in the last
-layer of your neural network.
+problem with `n` classes. If provided, the optional argument `weights`
+should be a 1D Tensor assigning weight to each of the classes. This is
+particularly useful when you have an unbalanced training set.
+
+The `input` given through a `forward()` is expected to contain
+_log-probabilities_ of each class: `input` has to be a 1D tensor of size
+`n`. Obtaining log-probabilities in a neural network is easily achieved by
+adding a [LogSoftMax](#nn.LogSoftMax) layer in the last layer of your
+neural network. You may use
+[CrossEntropyCriterion](#nn.CrossEntropyCriterion) instead, if you prefer
+not to add an extra layer to your network.
This criterion expect a class index (1 to the number of class) as `target`
when calling [forward(input, target)](#nn.CriterionForward) and
[backward(input, target)](#nn.CriterionBackward).
@@ -119,6 +122,39 @@ function gradUpdate(mlp,x,y,learningRate)
end
```
+<a name="nn.CrossEntropyCriterion"/>
+## CrossEntropyCriterion ##
+
+```lua
+criterion = nn.CrossEntropyCriterion(weights)
+```
+
+This criterion combines [LogSoftMax](#nn.LogSoftMax) and
+[CrossEntropyCriterion](#nn.CrossEntropyCriterion) in one single class.
+
+It is useful to train a classication problem with `n` classes. If
+provided, the optional argument `weights` should be a 1D Tensor assigning
+weight to each of the classes. This is particularly useful when you have an
+unbalanced training set.
+
+The `input` given through a `forward()` is expected to contain scores for
+each class: `input` has to be a 1D tensor of size `n`. This criterion
+expect a class index (1 to the number of class) as `target` when calling
+[forward(input, target)](#nn.CriterionForward) and
+[backward(input, target)](#nn.CriterionBackward).
+
+The loss can be described as:
+
+```lua
+loss(x, class) = forward(x, class) = -log( e^x[class] / (\sum_j e^x[j]) )
+ = -x[class] + log( \sum_j e^x[j] )
+```
+or in the case of the `weights` argument being specified:
+
+```lua
+loss(x, class) = forward(x, class) = weights[class]*( -x[class] + log( \sum_j e^x[j] ) )
+```
+
<a name="nn.DistKLDivCriterion"/>
## DistKLDivCriterion ##
diff --git a/test.lua b/test.lua
index 6174db2..3661d03 100644
--- a/test.lua
+++ b/test.lua
@@ -723,19 +723,21 @@ local function criterionJacobianTest1D(cri, input, target)
local dfdx = cri:backward(input, target)
-- for each input perturbation, do central difference
local centraldiff_dfdx = torch.Tensor():resizeAs(dfdx)
- for i=1,input:size(1) do
+ local input_s = input:storage()
+ local centraldiff_dfdx_s = centraldiff_dfdx:storage()
+ for i=1,input:nElement() do
-- f(xi + h)
- input[i] = input[i] + eps
+ input_s[i] = input_s[i] + eps
local fx1 = cri:forward(input, target)
-- f(xi - h)
- input[i] = input[i] - 2*eps
+ input_s[i] = input_s[i] - 2*eps
local fx2 = cri:forward(input, target)
-- f'(xi) = (f(xi + h) - f(xi - h)) / 2h
local cdfx = (fx1 - fx2) / (2*eps)
-- store f' in appropriate place
- centraldiff_dfdx[i] = cdfx
+ centraldiff_dfdx_s[i] = cdfx
-- reset input[i]
- input[i] = input[i] + eps
+ input_s[i] = input_s[i] + eps
end
-- compare centraldiff_dfdx with :backward()
@@ -804,6 +806,31 @@ function nntest.ClassNLLCriterion()
criterionJacobianTest1D(cri, input, target)
end
+function nntest.CrossEntropyCriterion()
+ -- stochastic
+ local numLabels = math.random(5, 10)
+ local input = torch.zeros(numLabels)
+ local target = torch.random(1, numLabels)
+
+ local cri = nn.CrossEntropyCriterion()
+ criterionJacobianTest1D(cri, input, target)
+
+ -- batch
+ local numLabels = math.random(5,10)
+ local bsz = math.random(3, 7)
+ local input = torch.zeros(bsz, numLabels)
+ local target = torch.Tensor(bsz):random(1, numLabels)
+
+ local cri = nn.CrossEntropyCriterion()
+ criterionJacobianTest1D(cri, input, target)
+
+ -- with weights
+ local weights = torch.rand(numLabels)
+ weights = weights / weights:sum()
+ cri = nn.CrossEntropyCriterion(weights)
+ criterionJacobianTest1D(cri, input, target)
+end
+
function nntest.LogSigmoid()
local ini = math.random(3,5)
local inj = math.random(3,5)