Adapt cmaes to API, enable n-dimensional tensors as input

author: MaxReimann <max.reimann@student.hpi.uni-potsdam.de> 2015-12-21 02:56:10 +0300
committer: MaxReimann <max.reimann@student.hpi.uni-potsdam.de> 2015-12-21 02:56:10 +0300
commit: b3cf7c22441bae6959732acff18f09775226a1fc (patch)
tree: 1a612085b0eadab9a2aa6ed4d0fe945e6217ee96 /cmaes.lua
parent: 58db496d7380f8bc73a8d224e427920e40f5c168 (diff)
1 files changed, 181 insertions, 170 deletions
diff --git a/cmaes.lua b/cmaes.lua
index 29f0b53..bcb45e4 100644
--- a/cmaes.lua
+++ b/cmaes.lua
@@ -2,50 +2,62 @@ require 'torch'
 require 'math'
 
 local BestSolution = {} 
---[[Initialize` CMAES` instance
+--[[ An implementation of `CMAES` (Covariance Matrix Adaptation Evolution Strategy), 
+ported from https://www.lri.fr/~hansen/barecmaes2.html.
  
 Parameters
 ----------
-    `opfunc` : a function that takes a single input (X), the point of 
-          evaluation, and returns f(X) and df/dX
-    `x` : the initial solution vector
-        ``1D tensor`` of numbers (like ``[3, 2, 1.2]``)
-    `sigma`
-        ``float``, initial step-size (standard deviation in each
+ARGS:
+
+-    `opfunc` : a function that takes a single input (X), the point of 
+          evaluation, and returns f(X) and df/dX. Note that df/dX is not used 
+-    `x` : the initial point
+-    `state.sigma`
+        float, initial step-size (standard deviation in each
         coordinate)
-    `maxEval`
-        ``int``, maximal number of function evaluations
-    `ftarget`
-        `float`, target function value
-    `popsize`
+-    `state.maxEval`
+        int, maximal number of function evaluations
+-    `state.ftarget`
+        float, target function value
+-    `state.popsize`
     	 population size. If this is left empty, 
     	  4 + int(3 * log(|x|)) will be used
+-    `state.ftarget` 
+        stop if fitness < ftarget
+-    `state.verd_disp`
+        int, display on console every verb_disp iteration, 0 for never
+-    `state.args`
+        arguments to `opfunc`
 
-    --]]
+RETURN:
+- `x*` : the new `x` vector, at the optimal point
+- `f`  : a table of all function values: 
+     `f[1]` is the value of the function before any optimization and
+     `f[#f]` is the final fully optimized value, at `x*`
+--]]
 function optim.cmaes(opfunc, x, config, state)
-	-- process input parameters
-	local config = config or {}
-   	local state = state or config
-    N = x:size(1)  -- number of objective variables/problem dimension
-    local xmean = x:clone()  -- initial point, distribution mean, a copy
-    local sigma = state.sigma
-    local ftarget = state.ftarget -- stop if fitness < ftarget
-    local maxEval = state.maxEval or 1e3*N^2
-    local objfunc = opfunc
-    local verb_disp = state.verb_disp or 1000
-    local lambda = state.popsize -- population size, offspring number
-    local min_iterations = state.min_iterations or 1
-    -- Strategy parameter setting: Selection  
-    if state.popsize == nil then
-    	lambda = 4 + math.floor(3 * math.log(N))
-    end
+  -- process input parameters
+  local config = config or {}
+  local state = state or config
+  local xmean = torch.Tensor(x:clone():double():storage()) -- distribution mean, a flattened copy
+  N = xmean:size(1)  -- number of objective variables/problem dimension
+  local sigma = state.sigma -- coordinate wise standard deviation (step size)
+  local ftarget = state.ftarget -- stop if fitness < ftarget
+  local maxEval = tonumber(state.maxEval) or 1e3*N^2
+  local objfunc = opfunc
+  local verb_disp = state.verb_disp -- display step size
+  local min_iterations = state.min_iterations or 1
+
+  local lambda = state.popsize -- population size, offspring number
+  -- Strategy parameter setting: Selection  
+  if state.popsize == nil then
+    lambda = 4 + math.floor(3 * math.log(N))
+  end
 
-    local mu = lambda / 2  -- number of parents/points for recombination
-    local weights = torch.range(0,mu-1)
-    weights:apply(function(i) return math.log(mu+0.5) - math.log(i+1)  end) -- recombination weights
-    local i = 0
-    local weightSum = weights:sum()
-    weights:div(weightSum)  -- normalize recombination weights array
+  local mu = lambda / 2  -- number of parents/points for recombination
+  local weights = torch.range(0,mu-1):apply(function(i) 
+      return math.log(mu+0.5) - math.log(i+1)  end) -- recombination weights
+    weights:div(weights:sum())  -- normalize recombination weights array
     local mueff = weights:sum()^2 / torch.pow(weights,2):sum()  -- variance-effectiveness of sum w_i x_i
 
     -- Strategy parameter setting: Adaptation
@@ -65,31 +77,32 @@ function optim.cmaes(opfunc, x, config, state)
     local invsqrtC = torch.eye(N)  -- C^-1/2 
     local eigeneval = 0      -- tracking the update of B and D
     local counteval = 0
-    local fitnessvals = {}  -- for bookkeeping output and termination
+    local f_hist = {opfunc(x)}  -- for bookkeeping output and termination
     local best = BestSolution.new(nil,nil,counteval)
     local iteration = 0 -- iteration of the optimize loop
 
+
     local function ask()
-	    --[[return a list of lambda candidate solutions according to 
+      --[[return a list of lambda candidate solutions according to 
 	    m + sig * Normal(0,C) = m + sig * B * D * Normal(0,I)
 	    --]]
-        -- Eigendecomposition: first update B, D and invsqrtC from C
-        -- postpone in case to achieve O(N^2)
-        if counteval - eigeneval > lambda/(c1+cmu)/C:size(1)/10 then
-            eigeneval = counteval
-            C = torch.triu(C) + torch.triu(C,1):t() -- enforce symmetry
-            D, B = torch.symeig(C,'V')       -- eigen decomposition, B==normalized eigenvectors, O(N^3)
-            D = torch.sqrt(D)  -- D contains standard deviations now
-            invsqrtC = B * torch.diag(torch.pow(D,-1)) * B:t()
-        end
+      -- Eigendecomposition: first update B, D and invsqrtC from C
+      -- postpone in case to achieve O(N^2)
+      if counteval - eigeneval > lambda/(c1+cmu)/C:size(1)/10 then
+        eigeneval = counteval
+        C = torch.triu(C) + torch.triu(C,1):t() -- enforce symmetry
+        D, B = torch.symeig(C,'V') -- eigen decomposition, B==normalized eigenvectors, O(N^3)
+        D = torch.sqrt(D)  -- D contains standard deviations now
+        invsqrtC = B * torch.diag(torch.pow(D,-1)) * B:t()
+      end
+      res = torch.Tensor(lambda,D:size(1))
+      for k=1,lambda do --repeat lambda times
+        z = D:clone()
+        z:apply(function(d) return d * torch.normal(0,1) end) --randn[k]
+        res[{k,{}}] = torch.add(xmean, (B * z) * sigma)
+      end
 
-        res = torch.Tensor(lambda,D:size(1))
-        for k=1,lambda do --repeat lambda times
-            z = D:clone()
-            z:apply(function(d) return d * torch.normal(0,1) end) --randn[k]
-            res[{k,{}}] = torch.add(xmean, (B * z) * sigma)
-        end
-        return res
+      return res
     end
 
 
@@ -101,163 +114,161 @@ function optim.cmaes(opfunc, x, config, state)
         `arx` 
             a list of solutions, presumably from `ask()`
         `fitvals` 
-            the corresponding objective function values
-    
-    --]]
-       
-	local function tell(arx, _fitvals)
-	    -- bookkeeping, preparation
-	    counteval = counteval + _fitvals:size(1)  -- slightly artificial to do here
-	    N = xmean:size(1)             -- convenience short cuts
-	    local iN = torch.range(1,N)
-	    local xold = xmean:clone()
-	    
-	    -- Sort by fitness and compute weighted mean into xmean
-	    fitvals, arindex = torch.sort(_fitvals)
-        arx = arx:index(1, arindex[{{1, mu}}]) -- sorted arx
-	    -- fitvals is kept for termination and display only
-	    best:update( arx[1], fitvals[1], counteval)
-	    
-        xmean:zero()
-        xmean:addmv(arx:t(), weights) --dot product
-
-	    -- Cumulation: update evolution paths
-	    local y = xmean - xold
-	    local z = invsqrtC * y -- == C^(-1/2) * (xnew - xold)
-	    
-	    local c = (cs * (2-cs) * mueff)^0.5 / sigma
-        ps = ps - ps * cs + z * c -- exponential decay on ps
-	    local hsig = (torch.sum(torch.pow(ps,2)) / 
-	    	(1-(1-cs)^(2*counteval/lambda)) / N  < 2 + 4./(N+1))
-	    hsig = hsig and 1.0 or 0.0 --use binary numbers
-
-	    local c = (cc * (2-cc) * mueff)^0.5 / sigma
-	    pc = pc - pc * cc + y * c * hsig -- exponential decay on pc
-
-	    -- Adapt covariance matrix C
-	    local c1a = c1 - (1-hsig^2) * c1 * cc * (2-cc)
-	    -- for a minor adjustment to the variance loss by hsig
-	    for i=1,N do
-	        for j=1,N do
-	    		local r = torch.range(1,mu)
-	    		r:apply(function(k) return weights[k] * (arx[k][i]-xold[i]) * (arx[k][j]-xold[j]) end)
+            the corresponding objective function values --]]
+    local function tell(arx, _fitvals)
+      -- bookkeeping, preparation
+      counteval = counteval + _fitvals:size(1)  -- slightly artificial to do here
+      N = xmean:size(1)             -- convenience short cuts
+      local iN = torch.range(1,N)
+      local xold = xmean:clone()
+
+      -- Sort by fitness and compute weighted mean into xmean
+      fitvals, arindex = torch.sort(_fitvals)
+      arx = arx:index(1, arindex[{{1, mu}}]) -- sorted arx
+
+      table.insert(f_hist, fitvals[1])
+      best:update(arx[1], fitvals[1], counteval)
+
+      xmean:zero()
+      xmean:addmv(arx:t(), weights) --dot product
+
+      -- Cumulation: update evolution paths
+      local y = xmean - xold
+      local z = invsqrtC * y -- == C^(-1/2) * (xnew - xold)
+
+      local c = (cs * (2-cs) * mueff)^0.5 / sigma
+      ps = ps - ps * cs + z * c -- exponential decay on ps
+      local hsig = (torch.sum(torch.pow(ps,2)) / 
+        (1-(1-cs)^(2*counteval/lambda)) / N  < 2 + 4./(N+1))
+      hsig = hsig and 1.0 or 0.0 --use binary numbers
+
+      local c = (cc * (2-cc) * mueff)^0.5 / sigma
+      pc = pc - pc * cc + y * c * hsig -- exponential decay on pc
+
+      -- Adapt covariance matrix C
+      local c1a = c1 - (1-hsig^2) * c1 * cc * (2-cc)
+      -- for a minor adjustment to the variance loss by hsig
+      for i=1,N do
+        for j=1,N do
+          local r = torch.range(1,mu)
+          r:apply(function(k) 
+              return weights[k] * (arx[k][i]-xold[i]) * (arx[k][j]-xold[j]) end)
           Cmuij = torch.sum(r) / sigma^2  -- rank-mu update
           C[i][j] = C[i][j] + ((-c1a - cmu) * C[i][j] + 
-            c1 * pc[i]*pc[j] + cmu * Cmuij)
-	        end
-	    end
+              c1 * pc[i]*pc[j] + cmu * Cmuij)
+          end
+        end
 
-	    -- Adapt step-size sigma with factor <= exp(0.6) \approx 1.82
-	    sigma = sigma * math.exp(math.min(0.6, 
-	    	(cs / damps) * (torch.sum(torch.pow(ps,2))/N - 1)/2))
+        -- Adapt step-size sigma with factor <= exp(0.6) \approx 1.82
+        sigma = sigma * math.exp(math.min(0.6, 
+            (cs / damps) * (torch.sum(torch.pow(ps,2))/N - 1)/2))
 
-	end
+      end
 
-  local function stop() 
+      local function stop() 
         --[[return satisfied termination conditions in a table like 
         {'termination reason':value, ...}, for example {'tolfun':1e-12}, 
         or the empty dict {}--]] 
         res = {}
         if counteval > 0 then
-            if counteval >= maxEval then
-                res['evals'] = maxEval
-            end
-            if ftarget ~= nil and fitvals:nElement() > 0 and fitvals[1] <= ftarget then
-                res['ftarget'] = ftarget
-            end
-            if torch.max(D) > 1e7 * torch.min(D) then
-                res['condition'] = 1e7
-            end
-            if fitvals:nElement() > 1 and fitvals[fitvals:nElement()] - fitvals[1] < 1e-12 then
-                res['tolfun'] = 1e-12 
-            end
-            if sigma * torch.max(D) < 1e-11 then
-                -- remark: max(D) >= max(diag(C))^0.5
-                res['tolx'] = 1e-11
-            end
+          if counteval >= maxEval then
+            res['evals'] = maxEval
+          end
+          if ftarget ~= nil and fitvals:nElement() > 0 and fitvals[1] <= ftarget then
+            res['ftarget'] = ftarget
+          end
+          if torch.max(D) > 1e7 * torch.min(D) then
+            res['condition'] = 1e7
+          end
+          if fitvals:nElement() > 1 and fitvals[fitvals:nElement()] - fitvals[1] < 1e-12 then
+            res['tolfun'] = 1e-12 
+          end
+          if sigma * torch.max(D) < 1e-11 then
+            -- remark: max(D) >= max(diag(C))^0.5
+            res['tolx'] = 1e-11
+          end
         end
         return res
-  end
+      end
 
-  local function disp(verb_modulo)
+      local function disp(verb_modulo)
         --[[display some iteration info--]]
+        if verb_disp == 0 then
+          return nil
+        end
         local iteration = counteval / lambda
 
         if iteration == 1 or iteration % (10*verb_modulo) == 0 then
-            print('evals:\t ax-ratio max(std)   f-value')
+          print('evals:\t ax-ratio max(std)   f-value')
         end
         if iteration <= 2 or iteration % verb_modulo == 0 then
-            local max_std = math.sqrt(torch.max(torch.diag(C)))
-            print(tostring(counteval).. ': ' .. 
-            	string.format(' %6.1f %8.1e  ', torch.max(D) / torch.min(D), sigma * max_std) 
-            	.. tostring(fitvals[1]))
+          local max_std = math.sqrt(torch.max(torch.diag(C)))
+          print(tostring(counteval).. ': ' .. 
+            string.format(' %6.1f %8.1e ', torch.max(D) / torch.min(D), sigma * max_std) 
+            .. tostring(fitvals[1]))
         end
 
         return nil
-  end
+      end
 
-  local function result()
-        --[[return (xbest, f(xbest), evaluations_xbest, evaluations,
-        iterations, xmean)--]]
-        bestmean, bestf, bestcount = best:get()
-        return bestmean, bestf, bestcount, counteval, iteration, xmean
-  end
-
-  while next(stop()) == nil or iteration < min_iterations do
+      while next(stop()) == nil or iteration < min_iterations do
         if iterations and iteration >= iterations then
-            return -1
+          return -1
         end
         iteration = iteration + 1
 
         local X = ask()         -- deliver candidate solutions
         local _fitvals = torch.Tensor(X:size(1))
         for i=1, _fitvals:size(1) do
-            _fitvals[i] = objfunc(X[i], state.objfunc_args)
+          local candidate = torch.Tensor(X[i]:clone():storage(),1,x:size()):typeAs(x)
+          _fitvals[i] = objfunc(candidate, state.objfunc_args)
         end
+
         tell(X, _fitvals) 
         disp(verb_disp)
-   end
-    -- logger.add(self, force=True) if logger else None
-    if verb_disp then
+      end
+      if verb_disp > 0 then
         for k, v in pairs(stop()) do
-        	print('termination by', k, '=', v)
-		end
-		bestmu, f, c, ceval, iter, bestmu   = result()
+          print('termination by', k, '=', v)
+        end
+
+        bestmu, f, c   = best:get()
         print('best f-value =', f)
         print('solution = ')
         print(bestmu)
-        print('best found at iterations: ', c/lambda, ' , total iterations: ', iter)
+        print('best found at iterations: ', c/lambda, ' , total iterations: ', iteration)
+      end
+      table.insert(f_hist, f)
+
+      return bestmu, f_hist, counteval
     end
-end
 
 
 
-BestSolution.__index = BestSolution 
--- syntax equivalent to "BestSolution.new = function..."
-function BestSolution.new(x, f, evals)
-  local self = setmetatable({}, BestSolution)
-  self.x = x
-  self.f = f
-  self.evals = evals
-  return self
-end
+    BestSolution.__index = BestSolution 
+    function BestSolution.new(x, f, evals)
+      local self = setmetatable({}, BestSolution)
+      self.x = x
+      self.f = f
+      self.evals = evals
+      return self
+    end
 
-function BestSolution.update(self, arx, arf, evals)
-	--[[initialize the best solution with `x`, `f`, and `evals`.
+    function BestSolution.update(self, arx, arf, evals)
+      --[[initialize the best solution with `x`, `f`, and `evals`.
         Better solutions have smaller `f`-values.--]]
-	if self.f == nil or arf < self.f then
-        -- i = i[1] --unpacking from 1-el tensor
-		self.x = arx:clone()
-		self.f = arf
-		if self.evals == nil then
-			self.evals = nil
-		else
-		 	self.evals = evals
-		end
-	end
-	return self
-end
-
-function BestSolution.get(self)
-	return self.x, self.f, self.evals
-end
+      if self.f == nil or arf < self.f then
+        self.x = arx:clone()
+        self.f = arf
+        if self.evals == nil then
+          self.evals = nil
+        else
+          self.evals = evals
+        end
+      end
+      return self
+    end
+
+    function BestSolution.get(self)
+      return self.x, self.f, self.evals
+    end
author	MaxReimann <max.reimann@student.hpi.uni-potsdam.de>	2015-12-21 02:56:10 +0300
committer	MaxReimann <max.reimann@student.hpi.uni-potsdam.de>	2015-12-21 02:56:10 +0300
commit	b3cf7c22441bae6959732acff18f09775226a1fc (patch)
tree	1a612085b0eadab9a2aa6ed4d0fe945e6217ee96 /cmaes.lua
parent	58db496d7380f8bc73a8d224e427920e40f5c168 (diff)