diff options
author | Sagar M. Waghmare <SW@discoverelement.com> | 2016-11-21 02:05:10 +0300 |
---|---|---|
committer | Soumith Chintala <soumith@fb.com> | 2016-12-30 19:26:46 +0300 |
commit | 5fb3129a92cf84dedc547629079f04b904380d56 (patch) | |
tree | d1ec72e4f30e768b63edb6182e6a2752328d9754 | |
parent | 883571767f06a1ffaa4afdc9b6bc503c09168d7b (diff) |
Initial Commit for DistanceRatioCriterion for triplet siamese model training.
-rw-r--r-- | DistanceRatioCriterion.lua | 142 | ||||
-rw-r--r-- | doc/criterion.md | 56 | ||||
-rw-r--r-- | init.lua | 1 | ||||
-rw-r--r-- | test.lua | 22 |
4 files changed, 221 insertions, 0 deletions
diff --git a/DistanceRatioCriterion.lua b/DistanceRatioCriterion.lua new file mode 100644 index 0000000..271d374 --- /dev/null +++ b/DistanceRatioCriterion.lua @@ -0,0 +1,142 @@ +--[[ + Probabilistic Criterion for Triplet Siamese Model for learning embedding. + Ref: https://arxiv.org/pdf/1610.00243.pdf + + loss = -log( exp(-X) / ( exp(-X) + exp(-Y) ) ) + where + X : Distance between similar samples + Y : Distance between dissimilar samples + + The loss could be break down to following log expansion + + loss = -log( exp(-X) ) - (-log( exp(-X) + exp(-Y) )) + = -log( exp(-X) ) + log( exp(-X) + exp(-Y) ) + = -(-X) + log( exp(-X) + exp(-Y) ) + = X + log( exp(-X) + exp(-Y) ) + + Gradients: + dLoss/dX = 1 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-X) + = 1 - exp(-X) / (exp(-X) + exp(-Y)) + + dLoss/dY = 0 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-Y) + = -exp(-Y) / (exp(-X) + exp(-Y)) + +--]] + +local DistanceRatioCriterion, parent = torch.class('nn.DistanceRatioCriterion', + 'nn.Criterion') + +function DistanceRatioCriterion:__init(sizeAverage) + parent.__init(self) + if sizeAverage ~= nil then + self.sizeAverage = sizeAverage + else + self.sizeAverage = true + end +end + +-- Forward +--[[ +-- X : Distance between similar samples +-- Y : Distance between dissimilar samples + loss = -log( exp(-X) ) - (-log( exp(-X) + exp(-Y) )) + = -log( exp(-X) ) + log( exp(-X) + exp(-Y) ) + = -(-X) + log( exp(-X) + exp(-Y) ) + = X + log( exp(-X) + exp(-Y) ) +--]] +function DistanceRatioCriterion:updateOutput(input) + assert(#input == 2, "Invalid number of inputs") + + local X = input[1] + local Y = input[2] + + assert(X:nElement() == Y:nElement(), "Number of distances don't match.") + assert(X:size(1) == Y:size(1), "Invalid distances' size.") + + -- Compute exp(-X) and exp(-Y) + self._expMinusX = self._expMinusX or X.new() + self._expMinusY = self._expMinusY or Y.new() + + -- Compute ( exp(-X) + exp(-Y) ) + self._expMinusX:resizeAs(X):copy(X):mul(-1):exp() + self._expMinusY:resizeAs(Y):copy(Y):mul(-1):exp() + + self._sumExpMinusXY = self.sumExpMinusExp or X.new() + self._sumExpMinusXY:resizeAs(self._expMinusX):copy(self._expMinusX) + :add(self._expMinusY) + + -- Compute log( exp(-X) + exp(-Y) ) + self._logSumExpMinusXY = self._logSumExpMinusXY or self._sumExpMinusXY.new() + self._logSumExpMinusXY:resizeAs(self._sumExpMinusXY) + :copy(self._sumExpMinusXY):log() + + -- Compute log( exp(-X) + exp(-Y) ) + self.loss = self.loss or self._logSumExpMinusXY.new() + self.loss:resizeAs(X):copy(X):add(self._logSumExpMinusXY) + + if self.sizeAverage then + return self.loss:sum()/X:size(1) + else + return self.loss:sum() + end +end + +-- Backward +--[[ +-- X : Distance between similar samples +-- Y : Distance between dissimilar samples + + Gradients: + dLoss/dX = 1 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-X) + = 1 - exp(-X) / (exp(-X) + exp(-Y)) + + dLoss/dY = 0 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-Y) + = -exp(-Y) / (exp(-X) + exp(-Y)) + +--]] +function DistanceRatioCriterion:updateGradInput(input) + assert(#input == 2, "Invalid number of inputs") + local X = input[1] + local Y = input[2] + assert(X:nElement() == Y:nElement(), "Number of distances don't match.") + assert(X:size(1) == Y:size(1), "Invalid distances' size.") + + -- dLoss/dX + -- -exp(-X) + self.dX = self.dX or X.new() + self.dX:resizeAs(self._expMinusX):copy(self._expMinusX):mul(-1) + + -- -exp(-X) / (exp(-X) + exp(-Y)) + self.dX:cdiv(self._sumExpMinusXY) + + -- 1 - exp(-X) / (exp(-X) + exp(-Y)) + self.dX:add(1) + + -- dLoss/dY + -- -exp(-Y) + self.dY = self.dY or Y.new() + self.dY:resizeAs(self._expMinusY):copy(self._expMinusY):mul(-1) + + -- -exp(-Y) / (exp(-X) + exp(-Y)) + self.dY:cdiv(self._sumExpMinusXY) + + if self.sizeAverage then + self.dX:div(X:size(1)) + self.dY:div(X:size(1)) + end + + return {self.dX, self.dY} +end + +function DistanceRatioCriterion:type(type, tensorCache) + if type then + self._expMinusX = nil + self._expMinusY = nil + self._sumExpMinusXY = nil + self._logSumExpMinusXY = nil + self.loss = nil + self.dX = nil + self.dY = nil + end + return parent.type(self, type, tensorCache) +end diff --git a/doc/criterion.md b/doc/criterion.md index 337d873..fb32232 100644 --- a/doc/criterion.md +++ b/doc/criterion.md @@ -23,6 +23,7 @@ target, they compute a gradient according to a given loss function. * [`HingeEmbeddingCriterion`](#nn.HingeEmbeddingCriterion): takes a distance as input; * [`L1HingeEmbeddingCriterion`](#nn.L1HingeEmbeddingCriterion): L1 distance between two inputs; * [`CosineEmbeddingCriterion`](#nn.CosineEmbeddingCriterion): cosine distance between two inputs; + * [`DistanceRatioCriterion`](#nn.DistanceRatioCriterion): Probabilistic criterion for training siamese model with triplets. * Miscelaneus criterions: * [`MultiCriterion`](#nn.MultiCriterion) : a weighted sum of other criterions each applied to the same input and target; * [`ParallelCriterion`](#nn.ParallelCriterion) : a weighted sum of other criterions each applied to a different input and target; @@ -709,6 +710,61 @@ For batched inputs, if the internal variable `sizeAverage` is equal to `true`, t By default, the losses are averaged over observations for each minibatch. However, if the field `sizeAverage` is set to `false`, the losses are instead summed. +<a name="nn.DistanceRatioCriterion"></a> +## DistanceRatioCriterion ## +Ref A. [Unsupervised Learning through Spatial Contrasting](https://arxiv.org/pdf/1610.00243.pdf) + +```lua +criterion = nn.DistanceRatioCriterion(sizeAverage) +``` + +This criterion is probabilistic treatment of margin cost. The model is trained using sample triplets `{Xs, Xa, Xd}` where `Xa` is anchor sample, `Xs` is sample similar to anchor sample and `Xd` is a sample not similar to anchor sample. Let `Ds` be distance between embeddings of `{Xs, Xa}` and `Dd` be distance between embeddings of `{Xa, Xd}` then the loss is defined as follow + +```lua + loss = -log( exp(-Ds) / ( exp(-Ds) + exp(-Dd) ) ) +``` + +Sample example +```lua + torch.setdefaulttensortype("torch.FloatTensor") + + require 'nn' + + -- triplet : with batchSize of 32 and dimensionality 512 + sample = {torch.rand(32, 512), torch.rand(32, 512), torch.rand(32, 512)} + + embeddingModel = nn.Sequential() + embeddingModel:add(nn.Linear(512, 96)):add(nn.ReLU()) + + tripleModel = nn.ParallelTable() + tripleModel:add(embeddingModel) + tripleModel:add(embeddingModel:clone('weight', 'bias', + 'gradWeight', 'gradBias')) + tripleModel:add(embeddingModel:clone('weight', 'bias', + 'gradWeight', 'gradBias')) + + -- Similar sample distance w.r.t anchor sample + posDistModel = nn.Sequential() + posDistModel:add(nn.NarrowTable(1,2)):add(nn.PairwiseDistance()) + + -- Different sample distance w.r.t anchor sample + negDistModel = nn.Sequential() + negDistModel:add(nn.NarrowTable(2,2)):add(nn.PairwiseDistance()) + + distanceModel = nn.ConcatTable():add(posDistModel):add(negDistModel) + + -- Complete Model + model = nn.Sequential():add(tripleModel):add(distanceModel) + + -- DistanceRatioCriterion + criterion = nn.DistanceRatioCriterion(true) + + -- Forward & Backward + output = model:forward(sample) + loss = criterion:forward(output) + dLoss = criterion:backward(output) + model:backward(sample, dLoss) +``` <a name="nn.MarginRankingCriterion"></a> ## MarginRankingCriterion ## @@ -174,6 +174,7 @@ require('nn.WeightedMSECriterion') require('nn.BCECriterion') require('nn.CrossEntropyCriterion') require('nn.ParallelCriterion') +require('nn.DistanceRatioCriterion') require('nn.PixelShuffle') @@ -7190,6 +7190,28 @@ function nntest.Cosine() mytester:assertTensorEq(cosine.gradWeight, cosine2.gradWeight, 0.000001, "Cosine gradWeight 2D err") end +function nntest.DistanceRatioCriterion() + local sizeAverage = true + local crit = nn.DistanceRatioCriterion(sizeAverage) + local X = torch.rand(32,1):fill(1) + local Y = torch.rand(32,1):fill(1) + + -- Unit Test updateOutput + local loss = crit:forward({X, Y}) + local trueLoss = 1 + math.log(math.exp(-1) + math.exp(-1)) + assert(math.abs(loss - trueLoss) < 0.000001, + "DistanceRatioCriterion forward incorrect output") + + -- Unit Test updateGradInput + local dxdy = crit:backward({X, Y}) + local dx = dxdy[1] + local dy = dxdy[2] + assert(math.abs(dx:sum() - 0.5) < 0.000001, + "DistanceRatioCriterion backward (dx) incorrect output") + assert(math.abs(dy:sum() + 0.5) < 0.000001, + "DistanceRatioCriterion backward (dy) incorrect output") +end + function nntest.ErrorHandling() local l = nn.Linear(1, 1) local p = nn.Parallel(1, 1):add(l) |