diff options
author | Soumith Chintala <soumith@gmail.com> | 2016-04-28 07:32:20 +0300 |
---|---|---|
committer | Soumith Chintala <soumith@gmail.com> | 2016-04-28 07:32:20 +0300 |
commit | 09b172a83a589ebecf40c0f4429c352d400865bf (patch) | |
tree | 7bbd1296c96d3ab1bfa24738fa6656cd107c24e5 | |
parent | 6283ef3d9fed244422788d0329998a485e4f2a75 (diff) | |
parent | 3b669b13d31cae16ea7a61d7eb1d1e7b8fb35e1c (diff) |
Merge pull request #58 from gmarceaucaron/master
Adding QDRiemaNNLinear, a module for Riemannian gradient descent with the Outer Product metric
-rw-r--r-- | QDRiemaNNLinear.lua | 59 | ||||
-rw-r--r-- | README.md | 16 | ||||
-rw-r--r-- | init.lua | 1 |
3 files changed, 75 insertions, 1 deletions
diff --git a/QDRiemaNNLinear.lua b/QDRiemaNNLinear.lua new file mode 100644 index 0000000..961a467 --- /dev/null +++ b/QDRiemaNNLinear.lua @@ -0,0 +1,59 @@ +-- +-- Author: Gaetan Marceau Caron (gaetan.marceau-caron@inria.fr) and Yann Ollivier +-- Description: Implementation of the quasi-diagonal reduction +-- based on the Practical Riemannian Neural Networks paper (http://arxiv.org/abs/1602.08007) +-- +local QDRiemaNNLinear, parent = torch.class('nnx.QDRiemaNNLinear', 'nn.Linear') + +function QDRiemaNNLinear:__init(inputSize, outputSize, gamma, qdFlag) + parent.__init(self,inputSize, outputSize) + if qdFlag == nil then -- Flag for choosing between diagonal or quasi-diagonal reductions + self.qdFlag = true + else + self.qdFlag = qdFlag + end + self.gamma = gamma or 0.01 -- update rate of the metric + self.matReg = 1e-12 -- numerical regularization + self.initMetric = true -- flag for first update + self.Mii = torch.Tensor(outputSize, inputSize) + if self.qdFlag then self.M0i = torch.Tensor(outputSize, inputSize) end + self.M00 = torch.Tensor(outputSize) +end + +function QDRiemaNNLinear:accGradParameters(input, gradOutput) + parent.accGradParameters(self,input,gradOutput) + + local gradOutputSqT = torch.pow(gradOutput,2):t() + + if self.initMetric then + self.Mii:mm(gradOutputSqT,torch.pow(input,2)) + self.M00:mv(gradOutputSqT,self.addBuffer) + if self.qdFlag then self.M0i:mm(gradOutputSqT,input) end + self.initMetric = false + else + self.Mii:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,torch.pow(input,2)) + if self.qdFlag then self.M0i:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,input) end + self.M00:mul(1.-self.gamma):addmv(self.gamma,gradOutputSqT,self.addBuffer) + end + + if self.qdFlag then + local numerator = torch.add(torch.cmul(self.gradWeight,self.M00:view(-1,1):expandAs(self.gradWeight)), -1.0, torch.cmul(self.M0i,self.gradBias:view(-1,1):expandAs(self.M0i))) + local denominator = torch.add(torch.cmul(self.Mii,self.M00:view(-1,1):expandAs(self.Mii)),-1.0,torch.pow(self.M0i,2)):clamp(self.matReg,1e25) + self.gradWeight:copy(numerator:cdiv(denominator)) + + local temp = torch.cmul(self.M0i,self.gradWeight):sum(2) + self.gradBias:add(-1.,temp):cdiv(torch.add(self.M00,self.matReg)) + + else + self.gradWeight:cdiv(self.Mii:add(self.matReg)) + self.gradBias:cdiv(self.M00:add(self.matReg)) + end +end + +function QDRiemaNNLinear:reset() + self.initMetric = true + stdv = 1./math.sqrt(self.weight:size(2)) + self.weight:normal(0, stdv) + self.bias:zero() + return self +end @@ -14,8 +14,9 @@ This section includes documentation for the following objects: * [PushTable (and PullTable)](#nnx.PushTable) : extracts a table element and inserts it later in the network; * [MultiSoftMax](#nnx.MultiSoftMax) : performs a softmax over the last dimension of a 2D or 3D input; * [SpatialReSampling](#nnx.SpatialReSampling) : performs bilinear resampling of a 3D or 4D input image; + * [QDRiemaNNLinear] (#nnx.QDRiemaNNLinear) : quasi-diagonal reduction for Riemannian gradient descent * [Recurrent](#nnx.Recurrent) : a generalized recurrent neural network container; - + <a name='nnx.SoftMaxTree'/> ### SoftMaxTree ### A hierarchy of parameterized log-softmaxes. Used for computing the likelihood of a leaf class. @@ -224,6 +225,19 @@ The re-sampled output: ![Lenna re-sampled](doc/image/Lenna-150x150-bilinear.png) +<a name='nnx.QDRiemaNNLinear'/> +### QDRiemaNNLinear ### +The Quasi-Diagonal Riemannian Neural Network Linear (QDRiemaNNLinear) module is an implementation +of the quasi-diagonal reduction of metrics, used for Riemannian gradient descent. +The algorithm is defined in Riemannian metrics for neural networks I: feedforward networks by Yann Ollivier (http://arxiv.org/abs/1303.0818) and an efficient implementation is described in Practical Riemannian Neural Networks by Yann Ollivier and Gaetan Marceau-Caron (http://arxiv.org/abs/1602.08007). +To use this module, simply replace `nn.Linear(ninput,noutput)` with `nnx.QDRiemaNNLinear(ninput,noutput)`. +As always, the step-size must be chosen accordingly. +Two additional arguments are also possible: +* gamma (default=0.01): determine the update rate of the metric for a minibatch setting, i.e., (1-gamma) * oldMetric + gamma newMetric. Smaller minibatches require a smaller gamma. A default value depending on the size of the minibatches is `gamma = 1. - torch.pow(1.-1./nTraining,miniBatchSize)` where `nTraining` is the number of training examples of the dataset and `miniBatchSize` is the number of training examples per minibatch. +* qdFlag (default=true): Whether to use the quasi-diagonal reduction (true) or only the diagonal (false). The former should be better. + +This module is a straightforward implementation of the outer product gradient descent. + ## Requirements * Torch7 (www.torch.ch) @@ -74,6 +74,7 @@ require('nnx.Balance') require('nnx.PushTable') require('nnx.PullTable') require('nnx.ZeroGrad') +require('nnx.QDRiemaNNLinear') -- criterions: require('nnx.SuperCriterion') |