From 1415827005e085fc475e88733538f42ed4270e71 Mon Sep 17 00:00:00 2001 From: GaetanMarceauCaron Date: Wed, 13 Apr 2016 15:39:08 +0200 Subject: Adding the description of the QDRiemaNNLinear module --- README.md | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index d28d8b4..5a7fa4e 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,9 @@ This section includes documentation for the following objects: * [PushTable (and PullTable)](#nnx.PushTable) : extracts a table element and inserts it later in the network; * [MultiSoftMax](#nnx.MultiSoftMax) : performs a softmax over the last dimension of a 2D or 3D input; * [SpatialReSampling](#nnx.SpatialReSampling) : performs bilinear resampling of a 3D or 4D input image; + * [QDRiemaNNLinear] (#nnx.QDRiemaNNLinear) : quasi-diagonal reduction for Riemannian gradient descent * [Recurrent](#nnx.Recurrent) : a generalized recurrent neural network container; - + ### SoftMaxTree ### A hierarchy of parameterized log-softmaxes. Used for computing the likelihood of a leaf class. @@ -224,6 +225,20 @@ The re-sampled output: ![Lenna re-sampled](doc/image/Lenna-150x150-bilinear.png) + +### QDRiemaNNLinear ### +The Quasi-Diagonal Riemannian Neural Network Linear (QDRiemaNNLinear) module is an implementation +of the quasi-diagonal reduction of metrics, used for Riemannian gradient descent. +The algorithm is defined in http://arxiv.org/abs/1303.0818 and an efficient implementation is described in http://arxiv.org/abs/1602.08007. +To use this module, simply replace nn.Linear(ninput,noutput) with nnx.QDRiemaNNLinear(ninput,noutput). +As always, the step-size must be chosen accordingly. +Two other arguments are also possible: +gamma (default=0.01): determine the update rate of the metric for a minibatch setting, i.e., (1-gamma) * oldMetric + gamma newMetric. Should be set to 1/#minibatch +qdFlag (default=true): Whether to use the quasi-diagonal reduction (true) or only the diagonal (false). The former should be better. + +To implement a natural gradient descent, one should also use a module for generating the pseudo-labels. + + ## Requirements * Torch7 (www.torch.ch) -- cgit v1.2.3 From 01a4689e0232ccbb6b579dbfd5599598b9c781f5 Mon Sep 17 00:00:00 2001 From: GaetanMarceauCaron Date: Wed, 13 Apr 2016 15:39:38 +0200 Subject: Adding the QDRiemaNNLinear module to the nnx package --- QDRiemaNNLinear.lua | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 QDRiemaNNLinear.lua diff --git a/QDRiemaNNLinear.lua b/QDRiemaNNLinear.lua new file mode 100644 index 0000000..76befe6 --- /dev/null +++ b/QDRiemaNNLinear.lua @@ -0,0 +1,55 @@ +-- +-- Author: Gaetan Marceau Caron (gaetan.marceau-caron@inria.fr) +-- Description: Implementation of the quasi-diagonal reduction +-- based on the Practical Riemannian Neural Networks (Yann Ollivier and Gaetan Marceau Caron) paper (http://arxiv.org/abs/1602.08007) +-- +local QDRiemaNNLinear, parent = torch.class('nnx.QDRiemaNNLinear', 'nn.Linear') + +function QDRiemaNNLinear:__init(inputSize, outputSize, gamma, qdFlag) + parent.__init(self,inputSize, outputSize) + self.qdFlag = qdFlag or true -- Flag for choosing between diagonal or quasi-diagonal reductions + self.gamma = gamma or 0.01 -- update rate of the metric + self.matReg = 1e-12 -- numerical regularization + self.initMetric = true -- flag for first update + self.Mii = torch.Tensor(outputSize, inputSize) + if self.qdFlag then self.M0i = torch.Tensor(outputSize, inputSize) end + self.M00 = torch.Tensor(outputSize) +end + +function QDRiemaNNLinear:accGradParameters(input, gradOutput) + parent.accGradParameters(self,input,gradOutput) + + gradOutputSqT = torch.pow(gradOutput,2):t() + + if self.initMetric then + self.Mii:mm(gradOutputSqT,torch.pow(input,2)) + self.M00:mv(gradOutputSqT,self.addBuffer) + if self.qdFlag then self.M0i:mm(gradOutputSqT,input) end + self.initMetric = false + else + self.Mii:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,torch.pow(input,2)) + if self.qdFlag then self.M0i:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,input) end + self.M00:mul(1.-self.gamma):addmv(self.gamma,gradOutputSqT,self.addBuffer) + end + + if self.qdFlag then + local numerator = torch.add(torch.cmul(self.gradWeight,self.M00:view(-1,1):expandAs(self.gradWeight)), -1.0, torch.cmul(self.M0i,self.gradBias:view(-1,1):expandAs(self.M0i))) + local denominator = torch.add(torch.cmul(self.Mii,self.M00:view(-1,1):expandAs(self.Mii)),-1.0,torch.pow(self.M0i,2)):clamp(self.matReg,1e25) + self.gradWeight:copy(torch.cdiv(numerator,denominator)) + + local temp = torch.cmul(torch.cdiv(self.M0i,self.M00:view(-1,1):expandAs(self.M0i)),self.gradWeight) + self.gradBias:copy(torch.add(torch.cdiv(self.gradBias,self.M00),-1.0,torch.sum(temp,2))) + + else + self.gradWeight:cdiv(self.Mii:add(self.matReg)) + self.gradBias:cdiv(self.M00:add(self.matReg)) + end +end + +function QDRiemaNNLinear:reset() + self.initMetric = true + stdv = 1./math.sqrt(self.weight:size(2)) + self.weight:normal(0, stdv) + self.bias:zero() + return self +end -- cgit v1.2.3 From dad5a49aafccc7d0ca7f65c40b87b90f7c6e1330 Mon Sep 17 00:00:00 2001 From: GaetanMarceauCaron Date: Wed, 13 Apr 2016 15:40:08 +0200 Subject: Adding the QDRiemaNNLinear module --- init.lua | 1 + 1 file changed, 1 insertion(+) diff --git a/init.lua b/init.lua index b1e874b..4abe66a 100644 --- a/init.lua +++ b/init.lua @@ -74,6 +74,7 @@ require('nnx.Balance') require('nnx.PushTable') require('nnx.PullTable') require('nnx.ZeroGrad') +require('nnx.QDRiemaNNLinear') -- criterions: require('nnx.SuperCriterion') -- cgit v1.2.3 From 9727ba29aa3e04c2e39c264fa93e45fb703f4021 Mon Sep 17 00:00:00 2001 From: GaetanMarceauCaron Date: Wed, 13 Apr 2016 15:46:54 +0200 Subject: reformatting --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5a7fa4e..8c17f5d 100644 --- a/README.md +++ b/README.md @@ -229,12 +229,12 @@ The re-sampled output: ### QDRiemaNNLinear ### The Quasi-Diagonal Riemannian Neural Network Linear (QDRiemaNNLinear) module is an implementation of the quasi-diagonal reduction of metrics, used for Riemannian gradient descent. -The algorithm is defined in http://arxiv.org/abs/1303.0818 and an efficient implementation is described in http://arxiv.org/abs/1602.08007. +The algorithm is defined in Riemannian metrics for neural networks I: feedforward networks by Yann Ollivier (http://arxiv.org/abs/1303.0818) and an efficient implementation is described in Practical Riemannian Neural Networks by Yann Ollivier and Gaetan Marceau-Caron (http://arxiv.org/abs/1602.08007). To use this module, simply replace nn.Linear(ninput,noutput) with nnx.QDRiemaNNLinear(ninput,noutput). As always, the step-size must be chosen accordingly. -Two other arguments are also possible: -gamma (default=0.01): determine the update rate of the metric for a minibatch setting, i.e., (1-gamma) * oldMetric + gamma newMetric. Should be set to 1/#minibatch -qdFlag (default=true): Whether to use the quasi-diagonal reduction (true) or only the diagonal (false). The former should be better. +Two additional arguments are also possible: +* gamma (default=0.01): determine the update rate of the metric for a minibatch setting, i.e., (1-gamma) * oldMetric + gamma newMetric. Should be set to 1/#minibatch +* qdFlag (default=true): Whether to use the quasi-diagonal reduction (true) or only the diagonal (false). The former should be better. To implement a natural gradient descent, one should also use a module for generating the pseudo-labels. -- cgit v1.2.3 From f8c6839417cc50c866fc13c951105661941faa55 Mon Sep 17 00:00:00 2001 From: GaetanMarceauCaron Date: Wed, 13 Apr 2016 15:50:41 +0200 Subject: reformatting --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8c17f5d..96947fe 100644 --- a/README.md +++ b/README.md @@ -230,7 +230,7 @@ The re-sampled output: The Quasi-Diagonal Riemannian Neural Network Linear (QDRiemaNNLinear) module is an implementation of the quasi-diagonal reduction of metrics, used for Riemannian gradient descent. The algorithm is defined in Riemannian metrics for neural networks I: feedforward networks by Yann Ollivier (http://arxiv.org/abs/1303.0818) and an efficient implementation is described in Practical Riemannian Neural Networks by Yann Ollivier and Gaetan Marceau-Caron (http://arxiv.org/abs/1602.08007). -To use this module, simply replace nn.Linear(ninput,noutput) with nnx.QDRiemaNNLinear(ninput,noutput). +To use this module, simply replace `nn.Linear(ninput,noutput)` with `nnx.QDRiemaNNLinear(ninput,noutput)`. As always, the step-size must be chosen accordingly. Two additional arguments are also possible: * gamma (default=0.01): determine the update rate of the metric for a minibatch setting, i.e., (1-gamma) * oldMetric + gamma newMetric. Should be set to 1/#minibatch -- cgit v1.2.3 From dcab6930c5c0869248ecf3482a1bc64555f0c253 Mon Sep 17 00:00:00 2001 From: GaetanMarceauCaron Date: Thu, 14 Apr 2016 10:52:33 +0200 Subject: Changing the default value for gamma --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 96947fe..dffeb17 100644 --- a/README.md +++ b/README.md @@ -233,7 +233,7 @@ The algorithm is defined in Riemannian metrics for neural networks I: feedforwar To use this module, simply replace `nn.Linear(ninput,noutput)` with `nnx.QDRiemaNNLinear(ninput,noutput)`. As always, the step-size must be chosen accordingly. Two additional arguments are also possible: -* gamma (default=0.01): determine the update rate of the metric for a minibatch setting, i.e., (1-gamma) * oldMetric + gamma newMetric. Should be set to 1/#minibatch +* gamma (default=0.01): determine the update rate of the metric for a minibatch setting, i.e., (1-gamma) * oldMetric + gamma newMetric. Smaller minibatches require a smaller gamma. * qdFlag (default=true): Whether to use the quasi-diagonal reduction (true) or only the diagonal (false). The former should be better. To implement a natural gradient descent, one should also use a module for generating the pseudo-labels. -- cgit v1.2.3 From 0575d389496c71350ca304bd80e0b97153ad8fdb Mon Sep 17 00:00:00 2001 From: GaetanMarceauCaron Date: Fri, 15 Apr 2016 16:18:17 +0200 Subject: Code optimization and verification --- QDRiemaNNLinear.lua | 69 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 42 insertions(+), 27 deletions(-) diff --git a/QDRiemaNNLinear.lua b/QDRiemaNNLinear.lua index 76befe6..8dc4793 100644 --- a/QDRiemaNNLinear.lua +++ b/QDRiemaNNLinear.lua @@ -1,48 +1,63 @@ -- --- Author: Gaetan Marceau Caron (gaetan.marceau-caron@inria.fr) +-- Author: Gaetan Marceau Caron (gaetan.marceau-caron@inria.fr) and Yann Ollivier -- Description: Implementation of the quasi-diagonal reduction --- based on the Practical Riemannian Neural Networks (Yann Ollivier and Gaetan Marceau Caron) paper (http://arxiv.org/abs/1602.08007) +-- based on the Practical Riemannian Neural Networks paper (http://arxiv.org/abs/1602.08007) -- local QDRiemaNNLinear, parent = torch.class('nnx.QDRiemaNNLinear', 'nn.Linear') function QDRiemaNNLinear:__init(inputSize, outputSize, gamma, qdFlag) parent.__init(self,inputSize, outputSize) - self.qdFlag = qdFlag or true -- Flag for choosing between diagonal or quasi-diagonal reductions + if qdFlag == nil then -- Flag for choosing between diagonal or quasi-diagonal reductions + self.qdFlag = true + else + self.qdFlag = qdFlag + end self.gamma = gamma or 0.01 -- update rate of the metric self.matReg = 1e-12 -- numerical regularization self.initMetric = true -- flag for first update self.Mii = torch.Tensor(outputSize, inputSize) if self.qdFlag then self.M0i = torch.Tensor(outputSize, inputSize) end self.M00 = torch.Tensor(outputSize) + self.accGradientFlag = true + self.accMetricFlag = true +end + +function QDRiemaNNLinear:setAccFlag(accGradientFlag,accMetricFlag) + self.accGradientFlag = accGradientFlag + self.accMetricFlag = accMetricFlag end function QDRiemaNNLinear:accGradParameters(input, gradOutput) - parent.accGradParameters(self,input,gradOutput) - - gradOutputSqT = torch.pow(gradOutput,2):t() - - if self.initMetric then - self.Mii:mm(gradOutputSqT,torch.pow(input,2)) - self.M00:mv(gradOutputSqT,self.addBuffer) - if self.qdFlag then self.M0i:mm(gradOutputSqT,input) end - self.initMetric = false - else - self.Mii:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,torch.pow(input,2)) - if self.qdFlag then self.M0i:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,input) end - self.M00:mul(1.-self.gamma):addmv(self.gamma,gradOutputSqT,self.addBuffer) + if self.accGradientFlag then + parent.accGradParameters(self,input,gradOutput) end - - if self.qdFlag then - local numerator = torch.add(torch.cmul(self.gradWeight,self.M00:view(-1,1):expandAs(self.gradWeight)), -1.0, torch.cmul(self.M0i,self.gradBias:view(-1,1):expandAs(self.M0i))) - local denominator = torch.add(torch.cmul(self.Mii,self.M00:view(-1,1):expandAs(self.Mii)),-1.0,torch.pow(self.M0i,2)):clamp(self.matReg,1e25) - self.gradWeight:copy(torch.cdiv(numerator,denominator)) + + if self.accMetricFlag then + local gradOutputSqT = torch.pow(gradOutput,2):t() - local temp = torch.cmul(torch.cdiv(self.M0i,self.M00:view(-1,1):expandAs(self.M0i)),self.gradWeight) - self.gradBias:copy(torch.add(torch.cdiv(self.gradBias,self.M00),-1.0,torch.sum(temp,2))) - - else - self.gradWeight:cdiv(self.Mii:add(self.matReg)) - self.gradBias:cdiv(self.M00:add(self.matReg)) + if self.initMetric then + self.Mii:mm(gradOutputSqT,torch.pow(input,2)) + self.M00:mv(gradOutputSqT,self.addBuffer) + if self.qdFlag then self.M0i:mm(gradOutputSqT,input) end + self.initMetric = false + else + self.Mii:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,torch.pow(input,2)) + if self.qdFlag then self.M0i:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,input) end + self.M00:mul(1.-self.gamma):addmv(self.gamma,gradOutputSqT,self.addBuffer) + end + + if self.qdFlag then + local numerator = torch.add(torch.cmul(self.gradWeight,self.M00:view(-1,1):expandAs(self.gradWeight)), -1.0, torch.cmul(self.M0i,self.gradBias:view(-1,1):expandAs(self.M0i))) + local denominator = torch.add(torch.cmul(self.Mii,self.M00:view(-1,1):expandAs(self.Mii)),-1.0,torch.pow(self.M0i,2)):clamp(self.matReg,1e25) + self.gradWeight:copy(numerator:cdiv(denominator)) + + local temp = torch.cmul(self.M0i,self.gradWeight):sum(2) + self.gradBias:add(-1.,temp):cdiv(torch.add(self.M00,self.matReg)) + + else + self.gradWeight:cdiv(self.Mii:add(self.matReg)) + self.gradBias:cdiv(self.M00:add(self.matReg)) + end end end -- cgit v1.2.3 From 4a72160aade7d3c286643dfe270b75810c775ab8 Mon Sep 17 00:00:00 2001 From: GaetanMarceauCaron Date: Fri, 15 Apr 2016 16:18:46 +0200 Subject: Adding a default value for gamma depending on the minibatch size --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index dffeb17..c613822 100644 --- a/README.md +++ b/README.md @@ -233,7 +233,7 @@ The algorithm is defined in Riemannian metrics for neural networks I: feedforwar To use this module, simply replace `nn.Linear(ninput,noutput)` with `nnx.QDRiemaNNLinear(ninput,noutput)`. As always, the step-size must be chosen accordingly. Two additional arguments are also possible: -* gamma (default=0.01): determine the update rate of the metric for a minibatch setting, i.e., (1-gamma) * oldMetric + gamma newMetric. Smaller minibatches require a smaller gamma. +* gamma (default=0.01): determine the update rate of the metric for a minibatch setting, i.e., (1-gamma) * oldMetric + gamma newMetric. Smaller minibatches require a smaller gamma. A default value depending on the size of the minibatches is `gamma = 1. - torch.pow(1.-1./nTraining,miniBatchSize)` where `nTraining` is the number of training examples of the dataset and `miniBatchSize` is the number of training examples per minibatch. * qdFlag (default=true): Whether to use the quasi-diagonal reduction (true) or only the diagonal (false). The former should be better. To implement a natural gradient descent, one should also use a module for generating the pseudo-labels. -- cgit v1.2.3 From d0befcaecd0bccd38863d7dc29dff919478e5d00 Mon Sep 17 00:00:00 2001 From: GaetanMarceauCaron Date: Fri, 15 Apr 2016 16:25:02 +0200 Subject: minor modif --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index c613822..9c6e86d 100644 --- a/README.md +++ b/README.md @@ -236,8 +236,7 @@ Two additional arguments are also possible: * gamma (default=0.01): determine the update rate of the metric for a minibatch setting, i.e., (1-gamma) * oldMetric + gamma newMetric. Smaller minibatches require a smaller gamma. A default value depending on the size of the minibatches is `gamma = 1. - torch.pow(1.-1./nTraining,miniBatchSize)` where `nTraining` is the number of training examples of the dataset and `miniBatchSize` is the number of training examples per minibatch. * qdFlag (default=true): Whether to use the quasi-diagonal reduction (true) or only the diagonal (false). The former should be better. -To implement a natural gradient descent, one should also use a module for generating the pseudo-labels. - +Replacing Linear by QDRiemmaNNLinear is a straightforward implementation of the outer product gradient descent. ## Requirements -- cgit v1.2.3 From 3eb226834d822191027b914c366757fa81c8fcbe Mon Sep 17 00:00:00 2001 From: GaetanMarceauCaron Date: Fri, 15 Apr 2016 16:27:55 +0200 Subject: small modif --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 9c6e86d..1dfa31d 100644 --- a/README.md +++ b/README.md @@ -235,8 +235,7 @@ As always, the step-size must be chosen accordingly. Two additional arguments are also possible: * gamma (default=0.01): determine the update rate of the metric for a minibatch setting, i.e., (1-gamma) * oldMetric + gamma newMetric. Smaller minibatches require a smaller gamma. A default value depending on the size of the minibatches is `gamma = 1. - torch.pow(1.-1./nTraining,miniBatchSize)` where `nTraining` is the number of training examples of the dataset and `miniBatchSize` is the number of training examples per minibatch. * qdFlag (default=true): Whether to use the quasi-diagonal reduction (true) or only the diagonal (false). The former should be better. - -Replacing Linear by QDRiemmaNNLinear is a straightforward implementation of the outer product gradient descent. +This module is a straightforward implementation of the outer product gradient descent. ## Requirements -- cgit v1.2.3 From f9cd545edb06bede2a3f5b98987a65fcad777c81 Mon Sep 17 00:00:00 2001 From: GaetanMarceauCaron Date: Fri, 15 Apr 2016 16:28:29 +0200 Subject: small modif --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 1dfa31d..422a758 100644 --- a/README.md +++ b/README.md @@ -235,6 +235,7 @@ As always, the step-size must be chosen accordingly. Two additional arguments are also possible: * gamma (default=0.01): determine the update rate of the metric for a minibatch setting, i.e., (1-gamma) * oldMetric + gamma newMetric. Smaller minibatches require a smaller gamma. A default value depending on the size of the minibatches is `gamma = 1. - torch.pow(1.-1./nTraining,miniBatchSize)` where `nTraining` is the number of training examples of the dataset and `miniBatchSize` is the number of training examples per minibatch. * qdFlag (default=true): Whether to use the quasi-diagonal reduction (true) or only the diagonal (false). The former should be better. + This module is a straightforward implementation of the outer product gradient descent. ## Requirements -- cgit v1.2.3 From 3b669b13d31cae16ea7a61d7eb1d1e7b8fb35e1c Mon Sep 17 00:00:00 2001 From: GaetanMarceauCaron Date: Fri, 15 Apr 2016 16:37:19 +0200 Subject: Removing useless flags for OP metric --- QDRiemaNNLinear.lua | 57 +++++++++++++++++++++-------------------------------- 1 file changed, 23 insertions(+), 34 deletions(-) diff --git a/QDRiemaNNLinear.lua b/QDRiemaNNLinear.lua index 8dc4793..961a467 100644 --- a/QDRiemaNNLinear.lua +++ b/QDRiemaNNLinear.lua @@ -18,46 +18,35 @@ function QDRiemaNNLinear:__init(inputSize, outputSize, gamma, qdFlag) self.Mii = torch.Tensor(outputSize, inputSize) if self.qdFlag then self.M0i = torch.Tensor(outputSize, inputSize) end self.M00 = torch.Tensor(outputSize) - self.accGradientFlag = true - self.accMetricFlag = true -end - -function QDRiemaNNLinear:setAccFlag(accGradientFlag,accMetricFlag) - self.accGradientFlag = accGradientFlag - self.accMetricFlag = accMetricFlag end function QDRiemaNNLinear:accGradParameters(input, gradOutput) - if self.accGradientFlag then - parent.accGradParameters(self,input,gradOutput) - end + parent.accGradParameters(self,input,gradOutput) - if self.accMetricFlag then - local gradOutputSqT = torch.pow(gradOutput,2):t() + local gradOutputSqT = torch.pow(gradOutput,2):t() + + if self.initMetric then + self.Mii:mm(gradOutputSqT,torch.pow(input,2)) + self.M00:mv(gradOutputSqT,self.addBuffer) + if self.qdFlag then self.M0i:mm(gradOutputSqT,input) end + self.initMetric = false + else + self.Mii:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,torch.pow(input,2)) + if self.qdFlag then self.M0i:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,input) end + self.M00:mul(1.-self.gamma):addmv(self.gamma,gradOutputSqT,self.addBuffer) + end + + if self.qdFlag then + local numerator = torch.add(torch.cmul(self.gradWeight,self.M00:view(-1,1):expandAs(self.gradWeight)), -1.0, torch.cmul(self.M0i,self.gradBias:view(-1,1):expandAs(self.M0i))) + local denominator = torch.add(torch.cmul(self.Mii,self.M00:view(-1,1):expandAs(self.Mii)),-1.0,torch.pow(self.M0i,2)):clamp(self.matReg,1e25) + self.gradWeight:copy(numerator:cdiv(denominator)) - if self.initMetric then - self.Mii:mm(gradOutputSqT,torch.pow(input,2)) - self.M00:mv(gradOutputSqT,self.addBuffer) - if self.qdFlag then self.M0i:mm(gradOutputSqT,input) end - self.initMetric = false - else - self.Mii:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,torch.pow(input,2)) - if self.qdFlag then self.M0i:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,input) end - self.M00:mul(1.-self.gamma):addmv(self.gamma,gradOutputSqT,self.addBuffer) - end + local temp = torch.cmul(self.M0i,self.gradWeight):sum(2) + self.gradBias:add(-1.,temp):cdiv(torch.add(self.M00,self.matReg)) - if self.qdFlag then - local numerator = torch.add(torch.cmul(self.gradWeight,self.M00:view(-1,1):expandAs(self.gradWeight)), -1.0, torch.cmul(self.M0i,self.gradBias:view(-1,1):expandAs(self.M0i))) - local denominator = torch.add(torch.cmul(self.Mii,self.M00:view(-1,1):expandAs(self.Mii)),-1.0,torch.pow(self.M0i,2)):clamp(self.matReg,1e25) - self.gradWeight:copy(numerator:cdiv(denominator)) - - local temp = torch.cmul(self.M0i,self.gradWeight):sum(2) - self.gradBias:add(-1.,temp):cdiv(torch.add(self.M00,self.matReg)) - - else - self.gradWeight:cdiv(self.Mii:add(self.matReg)) - self.gradBias:cdiv(self.M00:add(self.matReg)) - end + else + self.gradWeight:cdiv(self.Mii:add(self.matReg)) + self.gradBias:cdiv(self.M00:add(self.matReg)) end end -- cgit v1.2.3