Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/nn.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSagar M. Waghmare <SW@discoverelement.com>2016-11-21 02:05:10 +0300
committerSoumith Chintala <soumith@fb.com>2016-12-30 19:26:46 +0300
commit5fb3129a92cf84dedc547629079f04b904380d56 (patch)
treed1ec72e4f30e768b63edb6182e6a2752328d9754
parent883571767f06a1ffaa4afdc9b6bc503c09168d7b (diff)
Initial Commit for DistanceRatioCriterion for triplet siamese model training.
-rw-r--r--DistanceRatioCriterion.lua142
-rw-r--r--doc/criterion.md56
-rw-r--r--init.lua1
-rw-r--r--test.lua22
4 files changed, 221 insertions, 0 deletions
diff --git a/DistanceRatioCriterion.lua b/DistanceRatioCriterion.lua
new file mode 100644
index 0000000..271d374
--- /dev/null
+++ b/DistanceRatioCriterion.lua
@@ -0,0 +1,142 @@
+--[[
+ Probabilistic Criterion for Triplet Siamese Model for learning embedding.
+ Ref: https://arxiv.org/pdf/1610.00243.pdf
+
+ loss = -log( exp(-X) / ( exp(-X) + exp(-Y) ) )
+ where
+ X : Distance between similar samples
+ Y : Distance between dissimilar samples
+
+ The loss could be break down to following log expansion
+
+ loss = -log( exp(-X) ) - (-log( exp(-X) + exp(-Y) ))
+ = -log( exp(-X) ) + log( exp(-X) + exp(-Y) )
+ = -(-X) + log( exp(-X) + exp(-Y) )
+ = X + log( exp(-X) + exp(-Y) )
+
+ Gradients:
+ dLoss/dX = 1 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-X)
+ = 1 - exp(-X) / (exp(-X) + exp(-Y))
+
+ dLoss/dY = 0 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-Y)
+ = -exp(-Y) / (exp(-X) + exp(-Y))
+
+--]]
+
+local DistanceRatioCriterion, parent = torch.class('nn.DistanceRatioCriterion',
+ 'nn.Criterion')
+
+function DistanceRatioCriterion:__init(sizeAverage)
+ parent.__init(self)
+ if sizeAverage ~= nil then
+ self.sizeAverage = sizeAverage
+ else
+ self.sizeAverage = true
+ end
+end
+
+-- Forward
+--[[
+-- X : Distance between similar samples
+-- Y : Distance between dissimilar samples
+ loss = -log( exp(-X) ) - (-log( exp(-X) + exp(-Y) ))
+ = -log( exp(-X) ) + log( exp(-X) + exp(-Y) )
+ = -(-X) + log( exp(-X) + exp(-Y) )
+ = X + log( exp(-X) + exp(-Y) )
+--]]
+function DistanceRatioCriterion:updateOutput(input)
+ assert(#input == 2, "Invalid number of inputs")
+
+ local X = input[1]
+ local Y = input[2]
+
+ assert(X:nElement() == Y:nElement(), "Number of distances don't match.")
+ assert(X:size(1) == Y:size(1), "Invalid distances' size.")
+
+ -- Compute exp(-X) and exp(-Y)
+ self._expMinusX = self._expMinusX or X.new()
+ self._expMinusY = self._expMinusY or Y.new()
+
+ -- Compute ( exp(-X) + exp(-Y) )
+ self._expMinusX:resizeAs(X):copy(X):mul(-1):exp()
+ self._expMinusY:resizeAs(Y):copy(Y):mul(-1):exp()
+
+ self._sumExpMinusXY = self.sumExpMinusExp or X.new()
+ self._sumExpMinusXY:resizeAs(self._expMinusX):copy(self._expMinusX)
+ :add(self._expMinusY)
+
+ -- Compute log( exp(-X) + exp(-Y) )
+ self._logSumExpMinusXY = self._logSumExpMinusXY or self._sumExpMinusXY.new()
+ self._logSumExpMinusXY:resizeAs(self._sumExpMinusXY)
+ :copy(self._sumExpMinusXY):log()
+
+ -- Compute log( exp(-X) + exp(-Y) )
+ self.loss = self.loss or self._logSumExpMinusXY.new()
+ self.loss:resizeAs(X):copy(X):add(self._logSumExpMinusXY)
+
+ if self.sizeAverage then
+ return self.loss:sum()/X:size(1)
+ else
+ return self.loss:sum()
+ end
+end
+
+-- Backward
+--[[
+-- X : Distance between similar samples
+-- Y : Distance between dissimilar samples
+
+ Gradients:
+ dLoss/dX = 1 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-X)
+ = 1 - exp(-X) / (exp(-X) + exp(-Y))
+
+ dLoss/dY = 0 + 1 / (exp(-X) + exp(-Y)) * -1 * exp(-Y)
+ = -exp(-Y) / (exp(-X) + exp(-Y))
+
+--]]
+function DistanceRatioCriterion:updateGradInput(input)
+ assert(#input == 2, "Invalid number of inputs")
+ local X = input[1]
+ local Y = input[2]
+ assert(X:nElement() == Y:nElement(), "Number of distances don't match.")
+ assert(X:size(1) == Y:size(1), "Invalid distances' size.")
+
+ -- dLoss/dX
+ -- -exp(-X)
+ self.dX = self.dX or X.new()
+ self.dX:resizeAs(self._expMinusX):copy(self._expMinusX):mul(-1)
+
+ -- -exp(-X) / (exp(-X) + exp(-Y))
+ self.dX:cdiv(self._sumExpMinusXY)
+
+ -- 1 - exp(-X) / (exp(-X) + exp(-Y))
+ self.dX:add(1)
+
+ -- dLoss/dY
+ -- -exp(-Y)
+ self.dY = self.dY or Y.new()
+ self.dY:resizeAs(self._expMinusY):copy(self._expMinusY):mul(-1)
+
+ -- -exp(-Y) / (exp(-X) + exp(-Y))
+ self.dY:cdiv(self._sumExpMinusXY)
+
+ if self.sizeAverage then
+ self.dX:div(X:size(1))
+ self.dY:div(X:size(1))
+ end
+
+ return {self.dX, self.dY}
+end
+
+function DistanceRatioCriterion:type(type, tensorCache)
+ if type then
+ self._expMinusX = nil
+ self._expMinusY = nil
+ self._sumExpMinusXY = nil
+ self._logSumExpMinusXY = nil
+ self.loss = nil
+ self.dX = nil
+ self.dY = nil
+ end
+ return parent.type(self, type, tensorCache)
+end
diff --git a/doc/criterion.md b/doc/criterion.md
index 337d873..fb32232 100644
--- a/doc/criterion.md
+++ b/doc/criterion.md
@@ -23,6 +23,7 @@ target, they compute a gradient according to a given loss function.
* [`HingeEmbeddingCriterion`](#nn.HingeEmbeddingCriterion): takes a distance as input;
* [`L1HingeEmbeddingCriterion`](#nn.L1HingeEmbeddingCriterion): L1 distance between two inputs;
* [`CosineEmbeddingCriterion`](#nn.CosineEmbeddingCriterion): cosine distance between two inputs;
+ * [`DistanceRatioCriterion`](#nn.DistanceRatioCriterion): Probabilistic criterion for training siamese model with triplets.
* Miscelaneus criterions:
* [`MultiCriterion`](#nn.MultiCriterion) : a weighted sum of other criterions each applied to the same input and target;
* [`ParallelCriterion`](#nn.ParallelCriterion) : a weighted sum of other criterions each applied to a different input and target;
@@ -709,6 +710,61 @@ For batched inputs, if the internal variable `sizeAverage` is equal to `true`, t
By default, the losses are averaged over observations for each minibatch. However, if the field `sizeAverage` is set to `false`, the losses are instead summed.
+<a name="nn.DistanceRatioCriterion"></a>
+## DistanceRatioCriterion ##
+Ref A. [Unsupervised Learning through Spatial Contrasting](https://arxiv.org/pdf/1610.00243.pdf)
+
+```lua
+criterion = nn.DistanceRatioCriterion(sizeAverage)
+```
+
+This criterion is probabilistic treatment of margin cost. The model is trained using sample triplets `{Xs, Xa, Xd}` where `Xa` is anchor sample, `Xs` is sample similar to anchor sample and `Xd` is a sample not similar to anchor sample. Let `Ds` be distance between embeddings of `{Xs, Xa}` and `Dd` be distance between embeddings of `{Xa, Xd}` then the loss is defined as follow
+
+```lua
+ loss = -log( exp(-Ds) / ( exp(-Ds) + exp(-Dd) ) )
+```
+
+Sample example
+```lua
+ torch.setdefaulttensortype("torch.FloatTensor")
+
+ require 'nn'
+
+ -- triplet : with batchSize of 32 and dimensionality 512
+ sample = {torch.rand(32, 512), torch.rand(32, 512), torch.rand(32, 512)}
+
+ embeddingModel = nn.Sequential()
+ embeddingModel:add(nn.Linear(512, 96)):add(nn.ReLU())
+
+ tripleModel = nn.ParallelTable()
+ tripleModel:add(embeddingModel)
+ tripleModel:add(embeddingModel:clone('weight', 'bias',
+ 'gradWeight', 'gradBias'))
+ tripleModel:add(embeddingModel:clone('weight', 'bias',
+ 'gradWeight', 'gradBias'))
+
+ -- Similar sample distance w.r.t anchor sample
+ posDistModel = nn.Sequential()
+ posDistModel:add(nn.NarrowTable(1,2)):add(nn.PairwiseDistance())
+
+ -- Different sample distance w.r.t anchor sample
+ negDistModel = nn.Sequential()
+ negDistModel:add(nn.NarrowTable(2,2)):add(nn.PairwiseDistance())
+
+ distanceModel = nn.ConcatTable():add(posDistModel):add(negDistModel)
+
+ -- Complete Model
+ model = nn.Sequential():add(tripleModel):add(distanceModel)
+
+ -- DistanceRatioCriterion
+ criterion = nn.DistanceRatioCriterion(true)
+
+ -- Forward & Backward
+ output = model:forward(sample)
+ loss = criterion:forward(output)
+ dLoss = criterion:backward(output)
+ model:backward(sample, dLoss)
+```
<a name="nn.MarginRankingCriterion"></a>
## MarginRankingCriterion ##
diff --git a/init.lua b/init.lua
index 1e3924b..d675773 100644
--- a/init.lua
+++ b/init.lua
@@ -174,6 +174,7 @@ require('nn.WeightedMSECriterion')
require('nn.BCECriterion')
require('nn.CrossEntropyCriterion')
require('nn.ParallelCriterion')
+require('nn.DistanceRatioCriterion')
require('nn.PixelShuffle')
diff --git a/test.lua b/test.lua
index 774fba1..201f280 100644
--- a/test.lua
+++ b/test.lua
@@ -7190,6 +7190,28 @@ function nntest.Cosine()
mytester:assertTensorEq(cosine.gradWeight, cosine2.gradWeight, 0.000001, "Cosine gradWeight 2D err")
end
+function nntest.DistanceRatioCriterion()
+ local sizeAverage = true
+ local crit = nn.DistanceRatioCriterion(sizeAverage)
+ local X = torch.rand(32,1):fill(1)
+ local Y = torch.rand(32,1):fill(1)
+
+ -- Unit Test updateOutput
+ local loss = crit:forward({X, Y})
+ local trueLoss = 1 + math.log(math.exp(-1) + math.exp(-1))
+ assert(math.abs(loss - trueLoss) < 0.000001,
+ "DistanceRatioCriterion forward incorrect output")
+
+ -- Unit Test updateGradInput
+ local dxdy = crit:backward({X, Y})
+ local dx = dxdy[1]
+ local dy = dxdy[2]
+ assert(math.abs(dx:sum() - 0.5) < 0.000001,
+ "DistanceRatioCriterion backward (dx) incorrect output")
+ assert(math.abs(dy:sum() + 0.5) < 0.000001,
+ "DistanceRatioCriterion backward (dy) incorrect output")
+end
+
function nntest.ErrorHandling()
local l = nn.Linear(1, 1)
local p = nn.Parallel(1, 1):add(l)