Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/optim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSoumith Chintala <soumith@gmail.com>2016-06-10 22:11:50 +0300
committerGitHub <noreply@github.com>2016-06-10 22:11:50 +0300
commit2f75fecbbd148877ba4a7345138376b74b2d509f (patch)
treecc243c5229b162290296a5461eb3717efb8920a8
parent016dca94bed88cdfd23a6ddddc5a48e9bdb114e5 (diff)
parent6534bfd77a7ef5f9dd116d03d35c6dbf9ab0bce4 (diff)
Merge pull request #118 from gcheron/adam-wdec
add weight decay support to adam
-rw-r--r--adam.lua9
1 files changed, 8 insertions, 1 deletions
diff --git a/adam.lua b/adam.lua
index 89dd793..a6ad588 100644
--- a/adam.lua
+++ b/adam.lua
@@ -10,6 +10,7 @@ ARGS:
- 'config.beta1' : first moment coefficient
- 'config.beta2' : second moment coefficient
- 'config.epsilon' : for numerical stability
+- 'config.weightDecay' : weight decay
- 'state' : a table describing the state of the optimizer; after each
call the state is modified
@@ -28,10 +29,16 @@ function optim.adam(opfunc, x, config, state)
local beta1 = config.beta1 or 0.9
local beta2 = config.beta2 or 0.999
local epsilon = config.epsilon or 1e-8
+ local wd = config.weightDecay or 0
-- (1) evaluate f(x) and df/dx
local fx, dfdx = opfunc(x)
+ -- (2) weight decay
+ if wd ~= 0 then
+ dfdx:add(wd, x)
+ end
+
-- Initialization
state.t = state.t or 0
-- Exponential moving average of gradient values
@@ -52,7 +59,7 @@ function optim.adam(opfunc, x, config, state)
local biasCorrection1 = 1 - beta1^state.t
local biasCorrection2 = 1 - beta2^state.t
local stepSize = lr * math.sqrt(biasCorrection2)/biasCorrection1
- -- (2) update x
+ -- (3) update x
x:addcdiv(-stepSize, state.m, state.denom)
-- return x*, f(x) before optimization