Initial Commit for DistanceRatioCriterion for triplet siamese model training.

author: Sagar M. Waghmare <SW@discoverelement.com> 2016-11-21 02:05:10 +0300
committer: Soumith Chintala <soumith@fb.com> 2016-12-30 19:26:46 +0300
commit: 5fb3129a92cf84dedc547629079f04b904380d56 (patch)
tree: d1ec72e4f30e768b63edb6182e6a2752328d9754
parent: 883571767f06a1ffaa4afdc9b6bc503c09168d7b (diff)
4 files changed, 221 insertions, 0 deletions
diff --git a/DistanceRatioCriterion.lua b/DistanceRatioCriterion.lua
new file mode 100644
index 0000000..271d374
--- /dev/null
+++ b/DistanceRatioCriterion.lua
@@ -0,0 +1,142 @@
+--[[
+   Probabilistic Criterion for Triplet Siamese Model for learning embedding.
+   Ref: https://arxiv.org/pdf/1610.00243.pdf
+
+   loss = -log( exp(-X) / ( exp(-X) + exp(-Y) ) )
+   where
+   X : Distance between similar samples
+   Y : Distance between dissimilar samples
+
+   The loss could be break down to following log expansion
+
+   loss = -log( exp(-X) ) - (-log( exp(-X) + exp(-Y) ))
+        = -log( exp(-X) ) + log( exp(-X) + exp(-Y) )
+        = -(-X) + log( exp(-X) + exp(-Y) )
+        = X + log( exp(-X) + exp(-Y) )
+
+   Gradients:
+      dLoss/dX = 1 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-X)
+               = 1 - exp(-X) / (exp(-X) + exp(-Y))
+
+      dLoss/dY = 0 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-Y)
+               = -exp(-Y) / (exp(-X) + exp(-Y))
+
+--]]
+
+local DistanceRatioCriterion, parent = torch.class('nn.DistanceRatioCriterion',
+                                                   'nn.Criterion')
+
+function DistanceRatioCriterion:__init(sizeAverage)
+   parent.__init(self)
+   if sizeAverage ~= nil then
+     self.sizeAverage = sizeAverage
+   else
+     self.sizeAverage = true
+   end
+end
+
+-- Forward
+--[[
+-- X : Distance between similar samples
+-- Y : Distance between dissimilar samples
+   loss = -log( exp(-X) ) - (-log( exp(-X) + exp(-Y) ))
+        = -log( exp(-X) ) + log( exp(-X) + exp(-Y) )
+        = -(-X) + log( exp(-X) + exp(-Y) )
+        = X + log( exp(-X) + exp(-Y) )
+--]]
+function DistanceRatioCriterion:updateOutput(input)
+   assert(#input == 2, "Invalid number of inputs")
+   
+   local X = input[1]
+   local Y = input[2]
+
+   assert(X:nElement() == Y:nElement(), "Number of distances don't match.")
+   assert(X:size(1) == Y:size(1), "Invalid distances' size.")
+
+   -- Compute exp(-X) and exp(-Y)
+   self._expMinusX = self._expMinusX or X.new()
+   self._expMinusY = self._expMinusY or Y.new()
+
+   -- Compute ( exp(-X) + exp(-Y) )
+   self._expMinusX:resizeAs(X):copy(X):mul(-1):exp()
+   self._expMinusY:resizeAs(Y):copy(Y):mul(-1):exp()
+
+   self._sumExpMinusXY = self.sumExpMinusExp or X.new()
+   self._sumExpMinusXY:resizeAs(self._expMinusX):copy(self._expMinusX)
+                     :add(self._expMinusY)
+
+   -- Compute log( exp(-X) + exp(-Y) )
+   self._logSumExpMinusXY = self._logSumExpMinusXY or self._sumExpMinusXY.new()
+   self._logSumExpMinusXY:resizeAs(self._sumExpMinusXY)
+                         :copy(self._sumExpMinusXY):log()
+
+   -- Compute log( exp(-X) + exp(-Y) )
+   self.loss = self.loss or self._logSumExpMinusXY.new()
+   self.loss:resizeAs(X):copy(X):add(self._logSumExpMinusXY)
+
+   if self.sizeAverage then
+      return self.loss:sum()/X:size(1)
+   else
+      return self.loss:sum()
+   end
+end
+
+-- Backward
+--[[
+-- X : Distance between similar samples
+-- Y : Distance between dissimilar samples
+
+   Gradients:
+      dLoss/dX = 1 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-X)
+               = 1 - exp(-X) / (exp(-X) + exp(-Y))
+
+      dLoss/dY = 0 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-Y)
+               = -exp(-Y) / (exp(-X) + exp(-Y))
+
+--]]
+function DistanceRatioCriterion:updateGradInput(input)
+   assert(#input == 2, "Invalid number of inputs")
+   local X = input[1]
+   local Y = input[2]
+   assert(X:nElement() == Y:nElement(), "Number of distances don't match.")
+   assert(X:size(1) == Y:size(1), "Invalid distances' size.")
+
+   -- dLoss/dX
+   -- -exp(-X)
+   self.dX = self.dX or X.new()
+   self.dX:resizeAs(self._expMinusX):copy(self._expMinusX):mul(-1)
+
+   -- -exp(-X) / (exp(-X) + exp(-Y))
+   self.dX:cdiv(self._sumExpMinusXY)
+               
+   -- 1 - exp(-X) / (exp(-X) + exp(-Y))
+   self.dX:add(1)
+
+   -- dLoss/dY
+   -- -exp(-Y)
+   self.dY = self.dY or Y.new()
+   self.dY:resizeAs(self._expMinusY):copy(self._expMinusY):mul(-1)
+
+   -- -exp(-Y) / (exp(-X) + exp(-Y))
+   self.dY:cdiv(self._sumExpMinusXY)
+
+   if self.sizeAverage then
+      self.dX:div(X:size(1))
+      self.dY:div(X:size(1))
+   end
+
+   return {self.dX, self.dY}
+end
+
+function DistanceRatioCriterion:type(type, tensorCache)
+   if type then
+      self._expMinusX = nil 
+      self._expMinusY = nil
+      self._sumExpMinusXY = nil
+      self._logSumExpMinusXY = nil
+      self.loss = nil
+      self.dX = nil
+      self.dY = nil
+   end
+   return parent.type(self, type, tensorCache)
+end
diff --git a/doc/criterion.md b/doc/criterion.md
index 337d873..fb32232 100644
--- a/doc/criterion.md
+++ b/doc/criterion.md
@@ -23,6 +23,7 @@ target, they compute a gradient according to a given loss function.
     * [`HingeEmbeddingCriterion`](#nn.HingeEmbeddingCriterion): takes a distance as input;
     * [`L1HingeEmbeddingCriterion`](#nn.L1HingeEmbeddingCriterion): L1 distance between two inputs;
     * [`CosineEmbeddingCriterion`](#nn.CosineEmbeddingCriterion): cosine distance between two inputs;
+    * [`DistanceRatioCriterion`](#nn.DistanceRatioCriterion): Probabilistic criterion for training siamese model with triplets.
   * Miscelaneus criterions:
     * [`MultiCriterion`](#nn.MultiCriterion) : a weighted sum of other criterions each applied to the same input and target;
     * [`ParallelCriterion`](#nn.ParallelCriterion) : a weighted sum of other criterions each applied to a different input and target;
@@ -709,6 +710,61 @@ For batched inputs, if the internal variable `sizeAverage` is equal to `true`, t
 
 By default, the losses are averaged over observations for each minibatch. However, if the field `sizeAverage` is set to `false`, the losses are instead summed.
 
+<a name="nn.DistanceRatioCriterion"></a>
+## DistanceRatioCriterion ##
+Ref A. [Unsupervised Learning through Spatial Contrasting](https://arxiv.org/pdf/1610.00243.pdf)
+
+```lua
+criterion = nn.DistanceRatioCriterion(sizeAverage)
+```
+
+This criterion is probabilistic treatment of margin cost. The model is trained using sample triplets `{Xs, Xa, Xd}` where `Xa` is anchor sample, `Xs` is sample similar to anchor sample and `Xd` is a sample not similar to anchor sample. Let `Ds` be distance between embeddings of `{Xs, Xa}` and `Dd` be distance between embeddings of `{Xa, Xd}` then the loss is defined as follow
+
+```lua
+   loss = -log( exp(-Ds) / ( exp(-Ds) + exp(-Dd) ) )
+```
+
+Sample example
+```lua
+   torch.setdefaulttensortype("torch.FloatTensor")
+
+   require 'nn'
+
+   -- triplet : with batchSize of 32 and dimensionality 512
+   sample = {torch.rand(32, 512), torch.rand(32, 512), torch.rand(32, 512)}
+
+   embeddingModel = nn.Sequential()
+   embeddingModel:add(nn.Linear(512, 96)):add(nn.ReLU())
+
+   tripleModel = nn.ParallelTable()
+   tripleModel:add(embeddingModel)
+   tripleModel:add(embeddingModel:clone('weight', 'bias', 
+                                        'gradWeight', 'gradBias'))
+   tripleModel:add(embeddingModel:clone('weight', 'bias',
+                                        'gradWeight', 'gradBias'))
+
+   -- Similar sample distance w.r.t anchor sample
+   posDistModel = nn.Sequential()
+   posDistModel:add(nn.NarrowTable(1,2)):add(nn.PairwiseDistance())
+
+   -- Different sample distance w.r.t anchor sample
+   negDistModel = nn.Sequential()
+   negDistModel:add(nn.NarrowTable(2,2)):add(nn.PairwiseDistance())
+
+   distanceModel = nn.ConcatTable():add(posDistModel):add(negDistModel)
+
+   -- Complete Model
+   model = nn.Sequential():add(tripleModel):add(distanceModel)
+
+   -- DistanceRatioCriterion
+   criterion = nn.DistanceRatioCriterion(true)
+
+   -- Forward & Backward
+   output = model:forward(sample)
+   loss   = criterion:forward(output)
+   dLoss  = criterion:backward(output)
+   model:backward(sample, dLoss)
+```
 
 <a name="nn.MarginRankingCriterion"></a>
 ## MarginRankingCriterion ##
diff --git a/init.lua b/init.lua
index 1e3924b..d675773 100644
--- a/init.lua
+++ b/init.lua
@@ -174,6 +174,7 @@ require('nn.WeightedMSECriterion')
 require('nn.BCECriterion')
 require('nn.CrossEntropyCriterion')
 require('nn.ParallelCriterion')
+require('nn.DistanceRatioCriterion')
 
 require('nn.PixelShuffle')
 
diff --git a/test.lua b/test.lua
index 774fba1..201f280 100644
--- a/test.lua
+++ b/test.lua
@@ -7190,6 +7190,28 @@ function nntest.Cosine()
    mytester:assertTensorEq(cosine.gradWeight, cosine2.gradWeight, 0.000001, "Cosine gradWeight 2D err")
 end
 
+function nntest.DistanceRatioCriterion()
+   local sizeAverage = true
+   local crit = nn.DistanceRatioCriterion(sizeAverage)
+   local X = torch.rand(32,1):fill(1)
+   local Y = torch.rand(32,1):fill(1)
+
+   -- Unit Test updateOutput
+   local loss = crit:forward({X, Y})
+   local trueLoss = 1 + math.log(math.exp(-1) + math.exp(-1))
+   assert(math.abs(loss - trueLoss) < 0.000001,
+          "DistanceRatioCriterion forward incorrect output")
+
+   -- Unit Test updateGradInput
+   local dxdy = crit:backward({X, Y})
+   local dx = dxdy[1]
+   local dy = dxdy[2]
+   assert(math.abs(dx:sum() - 0.5) < 0.000001,
+          "DistanceRatioCriterion backward (dx) incorrect output")
+   assert(math.abs(dy:sum() + 0.5) < 0.000001,
+          "DistanceRatioCriterion backward (dy) incorrect output")
+end
+
 function nntest.ErrorHandling()
    local l = nn.Linear(1, 1)
    local p = nn.Parallel(1, 1):add(l)
author	Sagar M. Waghmare <SW@discoverelement.com>	2016-11-21 02:05:10 +0300
committer	Soumith Chintala <soumith@fb.com>	2016-12-30 19:26:46 +0300
commit	5fb3129a92cf84dedc547629079f04b904380d56 (patch)
tree	d1ec72e4f30e768b63edb6182e6a2752328d9754
parent	883571767f06a1ffaa4afdc9b6bc503c09168d7b (diff)