diff options
author | Clement Farabet <clement.farabet@gmail.com> | 2011-10-17 20:43:47 +0400 |
---|---|---|
committer | Clement Farabet <clement.farabet@gmail.com> | 2011-10-17 20:43:47 +0400 |
commit | e16b7f18d707e35a98c34d29f66edf74b62287f2 (patch) | |
tree | 8fd7187c89e3c500dc6194ffab455e63b8382f15 | |
parent | be9e6da026df3fc269c904cef0ba6651e6f34100 (diff) | |
parent | 4d51e8504f3e6b7250b572559ceb840a4f7845fa (diff) |
Merge branch 'master' of github.com:clementfarabet/lua---nnx
-rw-r--r-- | ASGDOptimization.lua | 64 | ||||
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rw-r--r-- | init.lua | 1 |
3 files changed, 66 insertions, 0 deletions
diff --git a/ASGDOptimization.lua b/ASGDOptimization.lua new file mode 100644 index 0000000..8678adb --- /dev/null +++ b/ASGDOptimization.lua @@ -0,0 +1,64 @@ +local ASGD,parent = torch.class('nn.ASGDOptimization', 'nn.SGDOptimization') + +-- ASGD: +-- w := (1 - lambda eta_t) w - eta_t dL/dw(z,w) +-- a := a + mu_t [ w - a ] +-- +-- eta_t = eta_0 / (1 + lambda eta0 t) ^ 0.75 +-- mu_t = 1/max(1,t-t0) +-- +-- implements ASGD algoritm as in L.Bottou's sgd-2.0 + +function ASGD:__init(...) + parent.__init(self,...) + xlua.unpack_class(self, {...}, + 'ASGDOptimization', nil, + {arg='eta0', type='number', + help='eta0 parameter for ASGD', default=1e-4}, + {arg='t0', type='number', + help='point at which to start averaging', default=1e6}, + {arg='lambda', type='number', + help='lambda for ASGD --decay term', default=1}, + {arg='alpha', type='number', + help='alpha for ASGD -- power for eta update', default=0.75} + ) + self.eta_t = self.eta0 + self.mu_t = 1 + self.t = 0 +end + +function ASGD:optimize() + -- (1) decay term + -- w := (1 - lambda eta_t) w + self.parameters:mul(1 - self.lambda * self.eta_t) + -- (2) parameter update with single or individual learningRates + -- w += - eta_t dL/dw(z,w) + if self.learningRates then + -- we are using diagHessian and have individual learningRates + self.deltaParameters = self.deltaParameters or + self.parameters.new():resizeAs(self.gradParameters) + self.deltaParameters:copy(self.learningRates):cmul(self.gradParameters) + self.parameters:add(-self.eta_t, self.deltaParameters) + else + -- normal single learningRate parameter update + self.parameters:add(-self.eta_t, self.gradParameters) + end + -- (3) Average part + self.a = self.a or self.parameters.new():resizeAs(self.parameters):zero() + if self.mu_t ~= 1 then + self.tmp = self.tmp or self.a.new():resizeAs(self.a) + self.tmp:copy(self.parameters):add(-1,self.a):mul(self.mu_t) + self.a:add(self.tmp) + else + self.a:copy(self.parameters) + end + -- (4) update eta_t and mu_t + -- (4a) increment time counter + self.t = self.t + 1 + -- (4b) update eta_t + -- eta_t = eta_0 / (1 + lambda eta0 t) ^ 0.75 + self.eta_t = self.eta0 / math.pow((1 + self.lambda * self.eta0 * self.t ),0.75) + -- (4c) update mu_t + -- mu_t = 1/max(1,t-t0) + self.mu_t = 1 / math.max(1,self.t - self.t0) +end
\ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index d0739ef..5b90102 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -121,6 +121,7 @@ install_files(${INSTALL_PREFIX} Optimization.lua) install_files(${INSTALL_PREFIX} LBFGSOptimization.lua) install_files(${INSTALL_PREFIX} CGOptimization.lua) install_files(${INSTALL_PREFIX} SGDOptimization.lua) +install_files(${INSTALL_PREFIX} ASGDOptimization.lua) install_files(${INSTALL_PREFIX} GeneticSGDOptimization.lua) install_files(${INSTALL_PREFIX} BatchOptimization.lua) install_files(${INSTALL_PREFIX} SNESOptimization.lua) @@ -105,6 +105,7 @@ torch.include('nnx', 'Optimization.lua') torch.include('nnx', 'BatchOptimization.lua') torch.include('nnx', 'SNESOptimization.lua') torch.include('nnx', 'SGDOptimization.lua') +torch.include('nnx', 'ASGDOptimization.lua') torch.include('nnx', 'LBFGSOptimization.lua') torch.include('nnx', 'CGOptimization.lua') torch.include('nnx', 'GeneticSGDOptimization.lua') |