diff options
author | MaxReimann <max.reimann@student.hpi.uni-potsdam.de> | 2015-12-21 02:56:10 +0300 |
---|---|---|
committer | MaxReimann <max.reimann@student.hpi.uni-potsdam.de> | 2015-12-21 02:56:10 +0300 |
commit | b3cf7c22441bae6959732acff18f09775226a1fc (patch) | |
tree | 1a612085b0eadab9a2aa6ed4d0fe945e6217ee96 | |
parent | 58db496d7380f8bc73a8d224e427920e40f5c168 (diff) |
Adapt cmaes to API, enable n-dimensional tensors as input
-rw-r--r-- | cmaes.lua | 351 | ||||
-rw-r--r-- | test/test_cmaes.lua | 17 |
2 files changed, 194 insertions, 174 deletions
@@ -2,50 +2,62 @@ require 'torch' require 'math' local BestSolution = {} ---[[Initialize` CMAES` instance +--[[ An implementation of `CMAES` (Covariance Matrix Adaptation Evolution Strategy), +ported from https://www.lri.fr/~hansen/barecmaes2.html. Parameters ---------- - `opfunc` : a function that takes a single input (X), the point of - evaluation, and returns f(X) and df/dX - `x` : the initial solution vector - ``1D tensor`` of numbers (like ``[3, 2, 1.2]``) - `sigma` - ``float``, initial step-size (standard deviation in each +ARGS: + +- `opfunc` : a function that takes a single input (X), the point of + evaluation, and returns f(X) and df/dX. Note that df/dX is not used +- `x` : the initial point +- `state.sigma` + float, initial step-size (standard deviation in each coordinate) - `maxEval` - ``int``, maximal number of function evaluations - `ftarget` - `float`, target function value - `popsize` +- `state.maxEval` + int, maximal number of function evaluations +- `state.ftarget` + float, target function value +- `state.popsize` population size. If this is left empty, 4 + int(3 * log(|x|)) will be used +- `state.ftarget` + stop if fitness < ftarget +- `state.verd_disp` + int, display on console every verb_disp iteration, 0 for never +- `state.args` + arguments to `opfunc` - --]] +RETURN: +- `x*` : the new `x` vector, at the optimal point +- `f` : a table of all function values: + `f[1]` is the value of the function before any optimization and + `f[#f]` is the final fully optimized value, at `x*` +--]] function optim.cmaes(opfunc, x, config, state) - -- process input parameters - local config = config or {} - local state = state or config - N = x:size(1) -- number of objective variables/problem dimension - local xmean = x:clone() -- initial point, distribution mean, a copy - local sigma = state.sigma - local ftarget = state.ftarget -- stop if fitness < ftarget - local maxEval = state.maxEval or 1e3*N^2 - local objfunc = opfunc - local verb_disp = state.verb_disp or 1000 - local lambda = state.popsize -- population size, offspring number - local min_iterations = state.min_iterations or 1 - -- Strategy parameter setting: Selection - if state.popsize == nil then - lambda = 4 + math.floor(3 * math.log(N)) - end + -- process input parameters + local config = config or {} + local state = state or config + local xmean = torch.Tensor(x:clone():double():storage()) -- distribution mean, a flattened copy + N = xmean:size(1) -- number of objective variables/problem dimension + local sigma = state.sigma -- coordinate wise standard deviation (step size) + local ftarget = state.ftarget -- stop if fitness < ftarget + local maxEval = tonumber(state.maxEval) or 1e3*N^2 + local objfunc = opfunc + local verb_disp = state.verb_disp -- display step size + local min_iterations = state.min_iterations or 1 + + local lambda = state.popsize -- population size, offspring number + -- Strategy parameter setting: Selection + if state.popsize == nil then + lambda = 4 + math.floor(3 * math.log(N)) + end - local mu = lambda / 2 -- number of parents/points for recombination - local weights = torch.range(0,mu-1) - weights:apply(function(i) return math.log(mu+0.5) - math.log(i+1) end) -- recombination weights - local i = 0 - local weightSum = weights:sum() - weights:div(weightSum) -- normalize recombination weights array + local mu = lambda / 2 -- number of parents/points for recombination + local weights = torch.range(0,mu-1):apply(function(i) + return math.log(mu+0.5) - math.log(i+1) end) -- recombination weights + weights:div(weights:sum()) -- normalize recombination weights array local mueff = weights:sum()^2 / torch.pow(weights,2):sum() -- variance-effectiveness of sum w_i x_i -- Strategy parameter setting: Adaptation @@ -65,31 +77,32 @@ function optim.cmaes(opfunc, x, config, state) local invsqrtC = torch.eye(N) -- C^-1/2 local eigeneval = 0 -- tracking the update of B and D local counteval = 0 - local fitnessvals = {} -- for bookkeeping output and termination + local f_hist = {opfunc(x)} -- for bookkeeping output and termination local best = BestSolution.new(nil,nil,counteval) local iteration = 0 -- iteration of the optimize loop + local function ask() - --[[return a list of lambda candidate solutions according to + --[[return a list of lambda candidate solutions according to m + sig * Normal(0,C) = m + sig * B * D * Normal(0,I) --]] - -- Eigendecomposition: first update B, D and invsqrtC from C - -- postpone in case to achieve O(N^2) - if counteval - eigeneval > lambda/(c1+cmu)/C:size(1)/10 then - eigeneval = counteval - C = torch.triu(C) + torch.triu(C,1):t() -- enforce symmetry - D, B = torch.symeig(C,'V') -- eigen decomposition, B==normalized eigenvectors, O(N^3) - D = torch.sqrt(D) -- D contains standard deviations now - invsqrtC = B * torch.diag(torch.pow(D,-1)) * B:t() - end + -- Eigendecomposition: first update B, D and invsqrtC from C + -- postpone in case to achieve O(N^2) + if counteval - eigeneval > lambda/(c1+cmu)/C:size(1)/10 then + eigeneval = counteval + C = torch.triu(C) + torch.triu(C,1):t() -- enforce symmetry + D, B = torch.symeig(C,'V') -- eigen decomposition, B==normalized eigenvectors, O(N^3) + D = torch.sqrt(D) -- D contains standard deviations now + invsqrtC = B * torch.diag(torch.pow(D,-1)) * B:t() + end + res = torch.Tensor(lambda,D:size(1)) + for k=1,lambda do --repeat lambda times + z = D:clone() + z:apply(function(d) return d * torch.normal(0,1) end) --randn[k] + res[{k,{}}] = torch.add(xmean, (B * z) * sigma) + end - res = torch.Tensor(lambda,D:size(1)) - for k=1,lambda do --repeat lambda times - z = D:clone() - z:apply(function(d) return d * torch.normal(0,1) end) --randn[k] - res[{k,{}}] = torch.add(xmean, (B * z) * sigma) - end - return res + return res end @@ -101,163 +114,161 @@ function optim.cmaes(opfunc, x, config, state) `arx` a list of solutions, presumably from `ask()` `fitvals` - the corresponding objective function values - - --]] - - local function tell(arx, _fitvals) - -- bookkeeping, preparation - counteval = counteval + _fitvals:size(1) -- slightly artificial to do here - N = xmean:size(1) -- convenience short cuts - local iN = torch.range(1,N) - local xold = xmean:clone() - - -- Sort by fitness and compute weighted mean into xmean - fitvals, arindex = torch.sort(_fitvals) - arx = arx:index(1, arindex[{{1, mu}}]) -- sorted arx - -- fitvals is kept for termination and display only - best:update( arx[1], fitvals[1], counteval) - - xmean:zero() - xmean:addmv(arx:t(), weights) --dot product - - -- Cumulation: update evolution paths - local y = xmean - xold - local z = invsqrtC * y -- == C^(-1/2) * (xnew - xold) - - local c = (cs * (2-cs) * mueff)^0.5 / sigma - ps = ps - ps * cs + z * c -- exponential decay on ps - local hsig = (torch.sum(torch.pow(ps,2)) / - (1-(1-cs)^(2*counteval/lambda)) / N < 2 + 4./(N+1)) - hsig = hsig and 1.0 or 0.0 --use binary numbers - - local c = (cc * (2-cc) * mueff)^0.5 / sigma - pc = pc - pc * cc + y * c * hsig -- exponential decay on pc - - -- Adapt covariance matrix C - local c1a = c1 - (1-hsig^2) * c1 * cc * (2-cc) - -- for a minor adjustment to the variance loss by hsig - for i=1,N do - for j=1,N do - local r = torch.range(1,mu) - r:apply(function(k) return weights[k] * (arx[k][i]-xold[i]) * (arx[k][j]-xold[j]) end) + the corresponding objective function values --]] + local function tell(arx, _fitvals) + -- bookkeeping, preparation + counteval = counteval + _fitvals:size(1) -- slightly artificial to do here + N = xmean:size(1) -- convenience short cuts + local iN = torch.range(1,N) + local xold = xmean:clone() + + -- Sort by fitness and compute weighted mean into xmean + fitvals, arindex = torch.sort(_fitvals) + arx = arx:index(1, arindex[{{1, mu}}]) -- sorted arx + + table.insert(f_hist, fitvals[1]) + best:update(arx[1], fitvals[1], counteval) + + xmean:zero() + xmean:addmv(arx:t(), weights) --dot product + + -- Cumulation: update evolution paths + local y = xmean - xold + local z = invsqrtC * y -- == C^(-1/2) * (xnew - xold) + + local c = (cs * (2-cs) * mueff)^0.5 / sigma + ps = ps - ps * cs + z * c -- exponential decay on ps + local hsig = (torch.sum(torch.pow(ps,2)) / + (1-(1-cs)^(2*counteval/lambda)) / N < 2 + 4./(N+1)) + hsig = hsig and 1.0 or 0.0 --use binary numbers + + local c = (cc * (2-cc) * mueff)^0.5 / sigma + pc = pc - pc * cc + y * c * hsig -- exponential decay on pc + + -- Adapt covariance matrix C + local c1a = c1 - (1-hsig^2) * c1 * cc * (2-cc) + -- for a minor adjustment to the variance loss by hsig + for i=1,N do + for j=1,N do + local r = torch.range(1,mu) + r:apply(function(k) + return weights[k] * (arx[k][i]-xold[i]) * (arx[k][j]-xold[j]) end) Cmuij = torch.sum(r) / sigma^2 -- rank-mu update C[i][j] = C[i][j] + ((-c1a - cmu) * C[i][j] + - c1 * pc[i]*pc[j] + cmu * Cmuij) - end - end + c1 * pc[i]*pc[j] + cmu * Cmuij) + end + end - -- Adapt step-size sigma with factor <= exp(0.6) \approx 1.82 - sigma = sigma * math.exp(math.min(0.6, - (cs / damps) * (torch.sum(torch.pow(ps,2))/N - 1)/2)) + -- Adapt step-size sigma with factor <= exp(0.6) \approx 1.82 + sigma = sigma * math.exp(math.min(0.6, + (cs / damps) * (torch.sum(torch.pow(ps,2))/N - 1)/2)) - end + end - local function stop() + local function stop() --[[return satisfied termination conditions in a table like {'termination reason':value, ...}, for example {'tolfun':1e-12}, or the empty dict {}--]] res = {} if counteval > 0 then - if counteval >= maxEval then - res['evals'] = maxEval - end - if ftarget ~= nil and fitvals:nElement() > 0 and fitvals[1] <= ftarget then - res['ftarget'] = ftarget - end - if torch.max(D) > 1e7 * torch.min(D) then - res['condition'] = 1e7 - end - if fitvals:nElement() > 1 and fitvals[fitvals:nElement()] - fitvals[1] < 1e-12 then - res['tolfun'] = 1e-12 - end - if sigma * torch.max(D) < 1e-11 then - -- remark: max(D) >= max(diag(C))^0.5 - res['tolx'] = 1e-11 - end + if counteval >= maxEval then + res['evals'] = maxEval + end + if ftarget ~= nil and fitvals:nElement() > 0 and fitvals[1] <= ftarget then + res['ftarget'] = ftarget + end + if torch.max(D) > 1e7 * torch.min(D) then + res['condition'] = 1e7 + end + if fitvals:nElement() > 1 and fitvals[fitvals:nElement()] - fitvals[1] < 1e-12 then + res['tolfun'] = 1e-12 + end + if sigma * torch.max(D) < 1e-11 then + -- remark: max(D) >= max(diag(C))^0.5 + res['tolx'] = 1e-11 + end end return res - end + end - local function disp(verb_modulo) + local function disp(verb_modulo) --[[display some iteration info--]] + if verb_disp == 0 then + return nil + end local iteration = counteval / lambda if iteration == 1 or iteration % (10*verb_modulo) == 0 then - print('evals:\t ax-ratio max(std) f-value') + print('evals:\t ax-ratio max(std) f-value') end if iteration <= 2 or iteration % verb_modulo == 0 then - local max_std = math.sqrt(torch.max(torch.diag(C))) - print(tostring(counteval).. ': ' .. - string.format(' %6.1f %8.1e ', torch.max(D) / torch.min(D), sigma * max_std) - .. tostring(fitvals[1])) + local max_std = math.sqrt(torch.max(torch.diag(C))) + print(tostring(counteval).. ': ' .. + string.format(' %6.1f %8.1e ', torch.max(D) / torch.min(D), sigma * max_std) + .. tostring(fitvals[1])) end return nil - end + end - local function result() - --[[return (xbest, f(xbest), evaluations_xbest, evaluations, - iterations, xmean)--]] - bestmean, bestf, bestcount = best:get() - return bestmean, bestf, bestcount, counteval, iteration, xmean - end - - while next(stop()) == nil or iteration < min_iterations do + while next(stop()) == nil or iteration < min_iterations do if iterations and iteration >= iterations then - return -1 + return -1 end iteration = iteration + 1 local X = ask() -- deliver candidate solutions local _fitvals = torch.Tensor(X:size(1)) for i=1, _fitvals:size(1) do - _fitvals[i] = objfunc(X[i], state.objfunc_args) + local candidate = torch.Tensor(X[i]:clone():storage(),1,x:size()):typeAs(x) + _fitvals[i] = objfunc(candidate, state.objfunc_args) end + tell(X, _fitvals) disp(verb_disp) - end - -- logger.add(self, force=True) if logger else None - if verb_disp then + end + if verb_disp > 0 then for k, v in pairs(stop()) do - print('termination by', k, '=', v) - end - bestmu, f, c, ceval, iter, bestmu = result() + print('termination by', k, '=', v) + end + + bestmu, f, c = best:get() print('best f-value =', f) print('solution = ') print(bestmu) - print('best found at iterations: ', c/lambda, ' , total iterations: ', iter) + print('best found at iterations: ', c/lambda, ' , total iterations: ', iteration) + end + table.insert(f_hist, f) + + return bestmu, f_hist, counteval end -end -BestSolution.__index = BestSolution --- syntax equivalent to "BestSolution.new = function..." -function BestSolution.new(x, f, evals) - local self = setmetatable({}, BestSolution) - self.x = x - self.f = f - self.evals = evals - return self -end + BestSolution.__index = BestSolution + function BestSolution.new(x, f, evals) + local self = setmetatable({}, BestSolution) + self.x = x + self.f = f + self.evals = evals + return self + end -function BestSolution.update(self, arx, arf, evals) - --[[initialize the best solution with `x`, `f`, and `evals`. + function BestSolution.update(self, arx, arf, evals) + --[[initialize the best solution with `x`, `f`, and `evals`. Better solutions have smaller `f`-values.--]] - if self.f == nil or arf < self.f then - -- i = i[1] --unpacking from 1-el tensor - self.x = arx:clone() - self.f = arf - if self.evals == nil then - self.evals = nil - else - self.evals = evals - end - end - return self -end - -function BestSolution.get(self) - return self.x, self.f, self.evals -end + if self.f == nil or arf < self.f then + self.x = arx:clone() + self.f = arf + if self.evals == nil then + self.evals = nil + else + self.evals = evals + end + end + return self + end + + function BestSolution.get(self) + return self.x, self.f, self.evals + end diff --git a/test/test_cmaes.lua b/test/test_cmaes.lua index 4d21465..6e808d3 100644 --- a/test/test_cmaes.lua +++ b/test/test_cmaes.lua @@ -1,10 +1,19 @@ require 'torch' require 'optim' - require 'rosenbrock' require 'l2' -x = torch.Tensor(4):fill(0) -config = {maxEval=10000, sigma=0.5} -x,fx,i=optim.cmaes(rosenbrock,x, config) +x = torch.Tensor(2):fill(0) +config = {maxEval=10000, sigma=0.5, ftarget=0.00001, verb_disp=0} +x,fx,i=optim.cmaes(rosenbrock,x,config) + + +print('Rosenbrock test') +print() +print('Number of function evals = ',i) +print('x=');print(x) +print('fx=') +for i=1,#fx do print(i,fx[i]); end +print() +print() |