Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/optim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlfredo Canziani <alfredo.canziani@gmail.com>2016-06-29 07:49:32 +0300
committerAlfredo Canziani <alfredo.canziani@gmail.com>2016-06-30 05:51:21 +0300
commit63994c78b2eef4266e62e88e0ae444ee0c37074d (patch)
tree75d14d1c1d098ee0c9d96f88be112425f966b08d /adam.lua
parentc0c4bbfcc14fad7bc484358821563fddd0b9031e (diff)
Fix bad alignment, trailing spaces and tabs
Diffstat (limited to 'adam.lua')
-rw-r--r--adam.lua70
1 files changed, 35 insertions, 35 deletions
diff --git a/adam.lua b/adam.lua
index a6ad588..505a779 100644
--- a/adam.lua
+++ b/adam.lua
@@ -21,47 +21,47 @@ RETURN:
]]
function optim.adam(opfunc, x, config, state)
- -- (0) get/update state
- local config = config or {}
- local state = state or config
- local lr = config.learningRate or 0.001
+ -- (0) get/update state
+ local config = config or {}
+ local state = state or config
+ local lr = config.learningRate or 0.001
- local beta1 = config.beta1 or 0.9
- local beta2 = config.beta2 or 0.999
- local epsilon = config.epsilon or 1e-8
- local wd = config.weightDecay or 0
+ local beta1 = config.beta1 or 0.9
+ local beta2 = config.beta2 or 0.999
+ local epsilon = config.epsilon or 1e-8
+ local wd = config.weightDecay or 0
- -- (1) evaluate f(x) and df/dx
- local fx, dfdx = opfunc(x)
+ -- (1) evaluate f(x) and df/dx
+ local fx, dfdx = opfunc(x)
- -- (2) weight decay
- if wd ~= 0 then
- dfdx:add(wd, x)
- end
+ -- (2) weight decay
+ if wd ~= 0 then
+ dfdx:add(wd, x)
+ end
- -- Initialization
- state.t = state.t or 0
- -- Exponential moving average of gradient values
- state.m = state.m or x.new(dfdx:size()):zero()
- -- Exponential moving average of squared gradient values
- state.v = state.v or x.new(dfdx:size()):zero()
- -- A tmp tensor to hold the sqrt(v) + epsilon
- state.denom = state.denom or x.new(dfdx:size()):zero()
+ -- Initialization
+ state.t = state.t or 0
+ -- Exponential moving average of gradient values
+ state.m = state.m or x.new(dfdx:size()):zero()
+ -- Exponential moving average of squared gradient values
+ state.v = state.v or x.new(dfdx:size()):zero()
+ -- A tmp tensor to hold the sqrt(v) + epsilon
+ state.denom = state.denom or x.new(dfdx:size()):zero()
- state.t = state.t + 1
-
- -- Decay the first and second moment running average coefficient
- state.m:mul(beta1):add(1-beta1, dfdx)
- state.v:mul(beta2):addcmul(1-beta2, dfdx, dfdx)
+ state.t = state.t + 1
- state.denom:copy(state.v):sqrt():add(epsilon)
+ -- Decay the first and second moment running average coefficient
+ state.m:mul(beta1):add(1-beta1, dfdx)
+ state.v:mul(beta2):addcmul(1-beta2, dfdx, dfdx)
- local biasCorrection1 = 1 - beta1^state.t
- local biasCorrection2 = 1 - beta2^state.t
- local stepSize = lr * math.sqrt(biasCorrection2)/biasCorrection1
- -- (3) update x
- x:addcdiv(-stepSize, state.m, state.denom)
+ state.denom:copy(state.v):sqrt():add(epsilon)
- -- return x*, f(x) before optimization
- return x, {fx}
+ local biasCorrection1 = 1 - beta1^state.t
+ local biasCorrection2 = 1 - beta2^state.t
+ local stepSize = lr * math.sqrt(biasCorrection2)/biasCorrection1
+ -- (3) update x
+ x:addcdiv(-stepSize, state.m, state.denom)
+
+ -- return x*, f(x) before optimization
+ return x, {fx}
end