diff options
author | GaetanMarceauCaron <gaetan.marceau-caron@inria.fr> | 2016-04-15 17:18:17 +0300 |
---|---|---|
committer | GaetanMarceauCaron <gaetan.marceau-caron@inria.fr> | 2016-04-15 17:18:17 +0300 |
commit | 0575d389496c71350ca304bd80e0b97153ad8fdb (patch) | |
tree | daa30d18a3cc574fdff65dcfa2df22866a6b062e | |
parent | dcab6930c5c0869248ecf3482a1bc64555f0c253 (diff) |
Code optimization and verification
-rw-r--r-- | QDRiemaNNLinear.lua | 69 |
1 files changed, 42 insertions, 27 deletions
diff --git a/QDRiemaNNLinear.lua b/QDRiemaNNLinear.lua index 76befe6..8dc4793 100644 --- a/QDRiemaNNLinear.lua +++ b/QDRiemaNNLinear.lua @@ -1,48 +1,63 @@ -- --- Author: Gaetan Marceau Caron (gaetan.marceau-caron@inria.fr) +-- Author: Gaetan Marceau Caron (gaetan.marceau-caron@inria.fr) and Yann Ollivier -- Description: Implementation of the quasi-diagonal reduction --- based on the Practical Riemannian Neural Networks (Yann Ollivier and Gaetan Marceau Caron) paper (http://arxiv.org/abs/1602.08007) +-- based on the Practical Riemannian Neural Networks paper (http://arxiv.org/abs/1602.08007) -- local QDRiemaNNLinear, parent = torch.class('nnx.QDRiemaNNLinear', 'nn.Linear') function QDRiemaNNLinear:__init(inputSize, outputSize, gamma, qdFlag) parent.__init(self,inputSize, outputSize) - self.qdFlag = qdFlag or true -- Flag for choosing between diagonal or quasi-diagonal reductions + if qdFlag == nil then -- Flag for choosing between diagonal or quasi-diagonal reductions + self.qdFlag = true + else + self.qdFlag = qdFlag + end self.gamma = gamma or 0.01 -- update rate of the metric self.matReg = 1e-12 -- numerical regularization self.initMetric = true -- flag for first update self.Mii = torch.Tensor(outputSize, inputSize) if self.qdFlag then self.M0i = torch.Tensor(outputSize, inputSize) end self.M00 = torch.Tensor(outputSize) + self.accGradientFlag = true + self.accMetricFlag = true +end + +function QDRiemaNNLinear:setAccFlag(accGradientFlag,accMetricFlag) + self.accGradientFlag = accGradientFlag + self.accMetricFlag = accMetricFlag end function QDRiemaNNLinear:accGradParameters(input, gradOutput) - parent.accGradParameters(self,input,gradOutput) - - gradOutputSqT = torch.pow(gradOutput,2):t() - - if self.initMetric then - self.Mii:mm(gradOutputSqT,torch.pow(input,2)) - self.M00:mv(gradOutputSqT,self.addBuffer) - if self.qdFlag then self.M0i:mm(gradOutputSqT,input) end - self.initMetric = false - else - self.Mii:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,torch.pow(input,2)) - if self.qdFlag then self.M0i:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,input) end - self.M00:mul(1.-self.gamma):addmv(self.gamma,gradOutputSqT,self.addBuffer) + if self.accGradientFlag then + parent.accGradParameters(self,input,gradOutput) end - - if self.qdFlag then - local numerator = torch.add(torch.cmul(self.gradWeight,self.M00:view(-1,1):expandAs(self.gradWeight)), -1.0, torch.cmul(self.M0i,self.gradBias:view(-1,1):expandAs(self.M0i))) - local denominator = torch.add(torch.cmul(self.Mii,self.M00:view(-1,1):expandAs(self.Mii)),-1.0,torch.pow(self.M0i,2)):clamp(self.matReg,1e25) - self.gradWeight:copy(torch.cdiv(numerator,denominator)) + + if self.accMetricFlag then + local gradOutputSqT = torch.pow(gradOutput,2):t() - local temp = torch.cmul(torch.cdiv(self.M0i,self.M00:view(-1,1):expandAs(self.M0i)),self.gradWeight) - self.gradBias:copy(torch.add(torch.cdiv(self.gradBias,self.M00),-1.0,torch.sum(temp,2))) - - else - self.gradWeight:cdiv(self.Mii:add(self.matReg)) - self.gradBias:cdiv(self.M00:add(self.matReg)) + if self.initMetric then + self.Mii:mm(gradOutputSqT,torch.pow(input,2)) + self.M00:mv(gradOutputSqT,self.addBuffer) + if self.qdFlag then self.M0i:mm(gradOutputSqT,input) end + self.initMetric = false + else + self.Mii:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,torch.pow(input,2)) + if self.qdFlag then self.M0i:mul(1.-self.gamma):addmm(self.gamma,gradOutputSqT,input) end + self.M00:mul(1.-self.gamma):addmv(self.gamma,gradOutputSqT,self.addBuffer) + end + + if self.qdFlag then + local numerator = torch.add(torch.cmul(self.gradWeight,self.M00:view(-1,1):expandAs(self.gradWeight)), -1.0, torch.cmul(self.M0i,self.gradBias:view(-1,1):expandAs(self.M0i))) + local denominator = torch.add(torch.cmul(self.Mii,self.M00:view(-1,1):expandAs(self.Mii)),-1.0,torch.pow(self.M0i,2)):clamp(self.matReg,1e25) + self.gradWeight:copy(numerator:cdiv(denominator)) + + local temp = torch.cmul(self.M0i,self.gradWeight):sum(2) + self.gradBias:add(-1.,temp):cdiv(torch.add(self.M00,self.matReg)) + + else + self.gradWeight:cdiv(self.Mii:add(self.matReg)) + self.gradBias:cdiv(self.M00:add(self.matReg)) + end end end |