diff options
author | Marco Scoffier <github@metm.org> | 2011-10-14 03:21:31 +0400 |
---|---|---|
committer | Marco Scoffier <github@metm.org> | 2011-10-14 03:21:31 +0400 |
commit | 256c081377424e2b4752346b37fc788235102490 (patch) | |
tree | fa91fd83c00568b4c5b28411793d658f2c1091db | |
parent | f708b5967f0c99c35c0f95ba0e36e4e82aec5a94 (diff) |
remove massive :new() bug
-rw-r--r-- | SGDOptimization.lua | 19 |
1 files changed, 15 insertions, 4 deletions
diff --git a/SGDOptimization.lua b/SGDOptimization.lua index 419bdc7..a0184fa 100644 --- a/SGDOptimization.lua +++ b/SGDOptimization.lua @@ -47,7 +47,7 @@ function SGD:optimize() if self.learningRates then -- we are using diagHessian and have individual learningRates self.deltaParameters = self.deltaParameters or - self.parameters.new():resizeAs(self.currentGradParameters) + torch.Tensor():typeAs(self.parameters):resizeAs(self.currentGradParameters) self.deltaParameters:copy(self.learningRates):cmul(self.currentGradParameters) self.parameters:add(-learningRate, self.deltaParameters) else @@ -80,7 +80,7 @@ function SGD:diagHessian(inputs, targets) if not self.learningRates then -- do initialization self.diagHessianEpsilon = self.diagHessianEpslion or 1e-3 - self.learningRates = self.parameters.new():resizeAs(self.parameters):fill(1) + self.learningRates = torch.Tensor():typeAs(self.parameters):resizeAs(self.parameters):fill(1) self.module:initDiagHessianParameters() self.diagHessianParameters = nnx.flattenParameters(nnx.getDiagHessianParameters(self.module)) @@ -106,11 +106,22 @@ function SGD:diagHessian(inputs, targets) end -- protect diag hessian (the proper way of doing it is the commented code, -- but for speed reasons, the uncommented code just works) - -- self.diagHessianParameters:apply(function(x) return math.max(x, diagHessianEpsilon) end) - self.diagHessianParameters:add(self.diagHessianEpsilon) + self.diagHessianParameters:apply( + function(x) + return math.max(x, self.diagHessianEpsilon) + end) + --self.diagHessianParameters:add(self.diagHessianEpsilon) -- now learning rates are obtained like this: self.learningRates:cdiv(self.diagHessianParameters) + print('<diagHessian>') + print(' + norm of dhP: '..self.diagHessianParameters:norm().. + ' norm of LR: '..self.learningRates:norm()) + print(' + max dhP : '..self.diagHessianParameters:max() .. + ' max LR: '..self.learningRates:max()) + print(' + min dhp: '.. self.diagHessianParameters:min() .. + ' min LR: '..self.learningRates:min()) + -- self.learningRates:div(self.learningRates:norm()) end function SGD:optimalLearningRate(inputs, targets) |