Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/clementfarabet/lua---nnx.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorClement Farabet <clement.farabet@gmail.com>2011-10-09 20:58:14 +0400
committerClement Farabet <clement.farabet@gmail.com>2011-10-09 20:58:14 +0400
commit4c5078046e48494048cbe5f27d9b4be4bac07c9b (patch)
treed98728794efb5fefb83bf74e5397a488c53afcd9
parent71fa5344512a1b1e8bf1eb498f3d5692e50ceead (diff)
Added code for Tanh:hessian
-rw-r--r--DiagHessian.lua10
-rw-r--r--test/test-hessian.lua78
2 files changed, 56 insertions, 32 deletions
diff --git a/DiagHessian.lua b/DiagHessian.lua
index b3a3772..d163051 100644
--- a/DiagHessian.lua
+++ b/DiagHessian.lua
@@ -59,6 +59,16 @@ function nn.Linear.accDiagHessianParameters(self, input, diagHessianOutput, scal
end
end
+-- Tanh
+function nn.Tanh.backwardDiagHessian(self, input, diagHessianOutput)
+ self.diagHessianInput = self.diagHessianInput or self.output.new()
+ self.derivativeSq = self.derivativeSq or self.output.new()
+ self.derivativeSq:resizeAs(self.output):copy(self.output):cmul(self.output):mul(-1):add(1)
+ self.derivativeSq:cmul(self.derivativeSq)
+ self.diagHessianInput:resizeAs(input):copy(diagHessianOutput):cmul(self.derivativeSq)
+ return self.diagHessianInput
+end
+
-- Sequential
function nn.Sequential.backwardDiagHessian(self, input, diagHessianOutput)
local currentDiagHessianOutput = diagHessianOutput
diff --git a/test/test-hessian.lua b/test/test-hessian.lua
index 1f7b720..ae02e73 100644
--- a/test/test-hessian.lua
+++ b/test/test-hessian.lua
@@ -5,14 +5,23 @@
--
-- given an input vector X, we want to learn a mapping
-- f(X) = \sum_i X_i
+--
+-- we use a two-layer perceptron, just to validate
+-- the tanh+linear hessian
+-- (of course learning such a function is much more
+-- trivial using a single linear layer :-)
--
-- libs
require 'nnx'
+-- fix random seed
+random.manualSeed(1)
+
-- SGD params
learningRate = 1e-3
diagHessianEpsilon = 1e-2
+computeDiagHessian = true
-- fake data
inputs = {}
@@ -24,44 +33,49 @@ end
-- create module
module = nn.Sequential()
+module:add(nn.Linear(10,10))
+module:add(nn.Tanh())
module:add(nn.Linear(10,1))
-- loss
criterion = nn.MSECriterion()
--- init diag hessian
-module:initDiagHessianParameters()
-diagHessianParameters = nnx.flattenParameters(nnx.getDiagHessianParameters(module))
-
--- estimate diag hessian over dataset
-diagHessianParameters:zero()
-for i = 1,#inputs do
- local output = module:forward(inputs[i])
- local critDiagHessian = criterion:backwardDiagHessian(output, targets[i])
- module:backwardDiagHessian(inputs[i], critDiagHessian)
- module:accDiagHessianParameters(inputs[i], critDiagHessian)
-end
-diagHessianParameters:div(#inputs)
+-- get params
+parameters = nnx.flattenParameters(nnx.getParameters(module))
+gradParameters = nnx.flattenParameters(nnx.getGradParameters(module))
--- protect diag hessian
-diagHessianParameters:apply(function(x)
- return math.max(x, diagHessianEpsilon)
- end)
+-- compute learning rates
+learningRates = torch.Tensor(parameters:size()):fill(1)
+if computeDiagHessian then
+ -- init diag hessian
+ module:initDiagHessianParameters()
+ diagHessianParameters = nnx.flattenParameters(nnx.getDiagHessianParameters(module))
--- now learning rates are obtained like this:
-learningRates = diagHessianParameters.new()
-learningRates:resizeAs(diagHessianParameters):fill(1)
-learningRates:cdiv(diagHessianParameters)
+ -- estimate diag hessian over dataset
+ diagHessianParameters:zero()
+ for i = 1,#inputs do
+ local output = module:forward(inputs[i])
+ local critDiagHessian = criterion:backwardDiagHessian(output, targets[i])
+ module:backwardDiagHessian(inputs[i], critDiagHessian)
+ module:accDiagHessianParameters(inputs[i], critDiagHessian)
+ end
+ diagHessianParameters:div(#inputs)
--- print info
-print('learning rates calculated to')
-print(learningRates)
+ -- protect diag hessian
+ diagHessianParameters:apply(function(x)
+ return math.max(x, diagHessianEpsilon)
+ end)
--- regular SGD
-parameters = nnx.flattenParameters(nnx.getParameters(module))
-gradParameters = nnx.flattenParameters(nnx.getGradParameters(module))
+ -- now learning rates are obtained like this:
+ learningRates:cdiv(diagHessianParameters)
-for epoch = 1,10 do
+ -- print info
+ print('learning rates calculated to')
+ print(learningRates)
+end
+
+-- regular SGD
+for epoch = 1,100 do
error = 0
for i = 1,#inputs do
-- backprop gradients
@@ -77,7 +91,7 @@ for epoch = 1,10 do
module:accGradParameters(inputs[i], critGradInput)
-- given a parameter vector, and a gradParameter vector, the update goes like this:
- deltaParameters = deltaParameters or diagHessianParameters.new()
+ deltaParameters = deltaParameters or parameters.new()
deltaParameters:resizeAs(gradParameters):copy(learningRates):cmul(gradParameters)
parameters:add(-learningRate, deltaParameters)
end
@@ -86,9 +100,9 @@ for epoch = 1,10 do
end
-- test vector
-input = lab.range(1,10)
-grountruth = input:sum()
+input = lab.randn(10)
+groundtruth = input:sum()
output = module:forward(input)
print('test input:') print(input)
print('predicted output:', output[1])
-print('groundtruth (\sum_i X_i):', output[1])
+print('groundtruth (\sum_i X_i):', groundtruth)