diff options
author | Kashif Rasul <kashif.rasul@gmail.com> | 2015-07-30 00:55:00 +0300 |
---|---|---|
committer | Kashif Rasul <kashif.rasul@gmail.com> | 2015-07-30 00:55:00 +0300 |
commit | 5c081a12bf27abd9851d14ea981ab7b738f1c9f0 (patch) | |
tree | 7564e1e05ad09a158c3700e4301f463f57bebd38 | |
parent | 07fb9e0e22c1ff1a64613b24f0ba290e710aa5bd (diff) |
remove redundant lambda param for adam
v8 of the paper does not use this parameter
-rw-r--r-- | adam.lua | 9 |
1 files changed, 3 insertions, 6 deletions
@@ -10,7 +10,6 @@ ARGS: - 'config.beta1' : first moment coefficient - 'config.beta2' : second moment coefficient - 'config.epsilon' : for numerical stability -- 'config.lambda' : first moment decay - 'state' : a table describing the state of the optimizer; after each call the state is modified @@ -29,7 +28,6 @@ function optim.adam(opfunc, x, config, state) local beta1 = config.beta1 or 0.9 local beta2 = config.beta2 or 0.999 local epsilon = config.epsilon or 1e-8 - local lambda = config.lambda or 1-1e-8 -- (1) evaluate f(x) and df/dx local fx, dfdx = opfunc(x) @@ -44,10 +42,9 @@ function optim.adam(opfunc, x, config, state) state.denom = state.denom or x.new(dfdx:size()):zero() state.t = state.t + 1 - -- Decay the first moment running average coefficient - local bt1 = beta1 * lambda^(state.t - 1) - - state.m:mul(bt1):add(1-bt1, dfdx) + + -- Decay the first and second moment running average coefficient + state.m:mul(beta1):add(1-beta1, dfdx) state.v:mul(beta2):addcmul(1-beta2, dfdx, dfdx) state.denom:copy(state.v):sqrt():add(epsilon) |