diff options
author | Ronan Collobert <ronan@collobert.com> | 2015-03-13 21:56:48 +0300 |
---|---|---|
committer | Ronan Collobert <ronan@collobert.com> | 2015-03-13 21:56:48 +0300 |
commit | aec8e83dc8e7183008b6f989adeb27c8ef31e67d (patch) | |
tree | b3970fb968e8d142b163d6c0a85c1cb9b83424ac | |
parent | cef0cb0cba92f88d8bf076e6fb4088503a2b7845 (diff) |
added doc + test case for CrossEntropyCriterion
-rw-r--r-- | CrossEntropyCriterion.lua | 4 | ||||
-rwxr-xr-x | doc/criterion.md | 58 | ||||
-rw-r--r-- | test.lua | 37 |
3 files changed, 81 insertions, 18 deletions
diff --git a/CrossEntropyCriterion.lua b/CrossEntropyCriterion.lua index 1350afc..2b3c78c 100644 --- a/CrossEntropyCriterion.lua +++ b/CrossEntropyCriterion.lua @@ -2,8 +2,8 @@ local CrossEntropyCriterion, Criterion = torch.class('nn.CrossEntropyCriterion', function CrossEntropyCriterion:__init(weights) Criterion.__init(self) - self.nll = nn.ClassNLLCriterion(weights) self.lsm = nn.LogSoftMax() + self.nll = nn.ClassNLLCriterion(weights) end function CrossEntropyCriterion:updateOutput(input, target) @@ -21,7 +21,7 @@ function CrossEntropyCriterion:updateGradInput(input, target) target = type(target) == 'number' and target or target:squeeze() self.nll:updateGradInput(self.lsm.output, target) self.lsm:updateGradInput(input, self.nll.gradInput) - self.gradInput:view(self.nll.gradInput, size) + self.gradInput:view(self.lsm.gradInput, size) return self.gradInput end diff --git a/doc/criterion.md b/doc/criterion.md index 457fd05..eade7f7 100755 --- a/doc/criterion.md +++ b/doc/criterion.md @@ -5,8 +5,9 @@ Criterions are helpful to train a neural network. Given an input and a target, they compute a gradient according to a given loss function. [AbsCriterion](#nn.AbsCriterion) and [MSECriterion](#nn.MSECriterion) are perfect for regression problems, while -[ClassNLLCriterion](#nn.ClassNLLCriterion) is the criterion of choice when -dealing with classification. +[ClassNLLCriterion](#nn.ClassNLLCriterion) or +[CrossEntropyCriterion](#nn.CrossEntropyCriterion) are the criteria of +choice when dealing with classification. Criterions are [serializable](https://github.com/torch/torch7/blob/master/doc/file.md#serialization-methods). @@ -79,15 +80,17 @@ criterion = nn.ClassNLLCriterion(weights) ``` The negative log likelihood criterion. It is useful to train a classication -problem with `n` classes. -If provided, the optional argument `weights` should be a 1D Tensor assigning weight to each of the classes. This is particularly useful when you have an unbalanced training set. - -The `input` given through a `forward()` is -expected to contain _log-probabilities_ of each class: `input` has to be a -1D tensor of size `n`. -Obtaining log-probabilities in a neural network is -easily achieved by adding a [LogSoftMax](#nn.LogSoftMax) layer in the last -layer of your neural network. +problem with `n` classes. If provided, the optional argument `weights` +should be a 1D Tensor assigning weight to each of the classes. This is +particularly useful when you have an unbalanced training set. + +The `input` given through a `forward()` is expected to contain +_log-probabilities_ of each class: `input` has to be a 1D tensor of size +`n`. Obtaining log-probabilities in a neural network is easily achieved by +adding a [LogSoftMax](#nn.LogSoftMax) layer in the last layer of your +neural network. You may use +[CrossEntropyCriterion](#nn.CrossEntropyCriterion) instead, if you prefer +not to add an extra layer to your network. This criterion expect a class index (1 to the number of class) as `target` when calling [forward(input, target)](#nn.CriterionForward) and [backward(input, target)](#nn.CriterionBackward). @@ -119,6 +122,39 @@ function gradUpdate(mlp,x,y,learningRate) end ``` +<a name="nn.CrossEntropyCriterion"/> +## CrossEntropyCriterion ## + +```lua +criterion = nn.CrossEntropyCriterion(weights) +``` + +This criterion combines [LogSoftMax](#nn.LogSoftMax) and +[CrossEntropyCriterion](#nn.CrossEntropyCriterion) in one single class. + +It is useful to train a classication problem with `n` classes. If +provided, the optional argument `weights` should be a 1D Tensor assigning +weight to each of the classes. This is particularly useful when you have an +unbalanced training set. + +The `input` given through a `forward()` is expected to contain scores for +each class: `input` has to be a 1D tensor of size `n`. This criterion +expect a class index (1 to the number of class) as `target` when calling +[forward(input, target)](#nn.CriterionForward) and +[backward(input, target)](#nn.CriterionBackward). + +The loss can be described as: + +```lua +loss(x, class) = forward(x, class) = -log( e^x[class] / (\sum_j e^x[j]) ) + = -x[class] + log( \sum_j e^x[j] ) +``` +or in the case of the `weights` argument being specified: + +```lua +loss(x, class) = forward(x, class) = weights[class]*( -x[class] + log( \sum_j e^x[j] ) ) +``` + <a name="nn.DistKLDivCriterion"/> ## DistKLDivCriterion ## @@ -723,19 +723,21 @@ local function criterionJacobianTest1D(cri, input, target) local dfdx = cri:backward(input, target) -- for each input perturbation, do central difference local centraldiff_dfdx = torch.Tensor():resizeAs(dfdx) - for i=1,input:size(1) do + local input_s = input:storage() + local centraldiff_dfdx_s = centraldiff_dfdx:storage() + for i=1,input:nElement() do -- f(xi + h) - input[i] = input[i] + eps + input_s[i] = input_s[i] + eps local fx1 = cri:forward(input, target) -- f(xi - h) - input[i] = input[i] - 2*eps + input_s[i] = input_s[i] - 2*eps local fx2 = cri:forward(input, target) -- f'(xi) = (f(xi + h) - f(xi - h)) / 2h local cdfx = (fx1 - fx2) / (2*eps) -- store f' in appropriate place - centraldiff_dfdx[i] = cdfx + centraldiff_dfdx_s[i] = cdfx -- reset input[i] - input[i] = input[i] + eps + input_s[i] = input_s[i] + eps end -- compare centraldiff_dfdx with :backward() @@ -804,6 +806,31 @@ function nntest.ClassNLLCriterion() criterionJacobianTest1D(cri, input, target) end +function nntest.CrossEntropyCriterion() + -- stochastic + local numLabels = math.random(5, 10) + local input = torch.zeros(numLabels) + local target = torch.random(1, numLabels) + + local cri = nn.CrossEntropyCriterion() + criterionJacobianTest1D(cri, input, target) + + -- batch + local numLabels = math.random(5,10) + local bsz = math.random(3, 7) + local input = torch.zeros(bsz, numLabels) + local target = torch.Tensor(bsz):random(1, numLabels) + + local cri = nn.CrossEntropyCriterion() + criterionJacobianTest1D(cri, input, target) + + -- with weights + local weights = torch.rand(numLabels) + weights = weights / weights:sum() + cri = nn.CrossEntropyCriterion(weights) + criterionJacobianTest1D(cri, input, target) +end + function nntest.LogSigmoid() local ini = math.random(3,5) local inj = math.random(3,5) |