lswolfe.lua


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192

--[[ A Line Search satisfying the Wolfe conditions

ARGS:
- `opfunc` : a function (the objective) that takes a single input (X),
         the point of evaluation, and returns f(X) and df/dX
- `x`          : initial point / starting location
- `t`          : initial step size
- `d`          : descent direction
- `f`          : initial function value
- `g`          : gradient at initial location
- `gtd`        : directional derivative at starting location
- `options.c1` : sufficient decrease parameter
- `options.c2` : curvature parameter
- `options.tolX`    : minimum allowable step length
- `options.maxIter` : maximum nb of iterations

RETURN:
- `f`          : function value at x+t*d
- `g`          : gradient value at x+t*d
- `x`          : the next x (=x+t*d)
- `t`          : the step length
- `lsFuncEval` : the number of function evaluations
]]
function optim.lswolfe(opfunc,x,t,d,f,g,gtd,options)
   -- options
   options = options or {}
   local c1 = options.c1 or 1e-4
   local c2 = options.c2 or 0.9
   local tolX = options.tolX or 1e-9
   local maxIter = options.maxIter or 20
   local isverbose = options.verbose or false

   -- some shortcuts
   local abs = torch.abs
   local min = math.min
   local max = math.max

   -- verbose function
   local function verbose(...)
      if isverbose then print('<optim.lswolfe> ', ...) end
   end

   -- evaluate objective and gradient using initial step
   local x_init = x:clone()
   x:add(t,d)
   local f_new,g_new = opfunc(x)
   local lsFuncEval = 1
   local gtd_new = g_new * d

   -- bracket an interval containing a point satisfying the Wolfe
   -- criteria
   local LSiter,t_prev,done = 0,0,false
   local f_prev,g_prev,gtd_prev = f,g:clone(),gtd
   local bracket,bracketFval,bracketGval
   while LSiter < maxIter do
      -- check conditions:
      if (f_new > (f + c1*t*gtd)) or (LSiter > 1 and f_new >= f_prev) then
         bracket = x.new{t_prev,t}
         bracketFval = x.new{f_prev,f_new}
         bracketGval = x.new(2,g_new:size(1))
         bracketGval[1] = g_prev
         bracketGval[2] = g_new
         break

      elseif abs(gtd_new) <= -c2*gtd then
         bracket = x.new{t}
         bracketFval = x.new{f_new}
         bracketGval = x.new(1,g_new:size(1))
         bracketGval[1] = g_new
         done = true
         break

      elseif gtd_new >= 0 then
         bracket = x.new{t_prev,t}
         bracketFval = x.new{f_prev,f_new}
         bracketGval = x.new(2,g_new:size(1))
         bracketGval[1] = g_prev
         bracketGval[2] = g_new
         break

      end

      -- interpolate:
      local tmp = t_prev
      t_prev = t
      local minStep = t + 0.01*(t-tmp)
      local maxStep = t*10
      t = optim.polyinterp(x.new{{tmp,f_prev,gtd_prev},
                                  {t,f_new,gtd_new}},
                           minStep, maxStep)

      -- next step:
      f_prev = f_new
      g_prev = g_new:clone()
      gtd_prev = gtd_new
      x[{}] = x_init
      x:add(t,d)
      f_new,g_new = opfunc(x)
      lsFuncEval = lsFuncEval + 1
      gtd_new = g_new * d
      LSiter = LSiter + 1
   end

   -- reached max nb of iterations?
   if LSiter == maxIter then
      bracket = x.new{0,t}
      bracketFval = x.new{f,f_new}
      bracketGval = x.new(2,g_new:size(1))
      bracketGval[1] = g
      bracketGval[2] = g_new
   end

   -- zoom phase: we now have a point satisfying the criteria, or
   -- a bracket around it. We refine the bracket until we find the
   -- exact point satisfying the criteria
   local insufProgress = false
   local LOposRemoved = 0
   while not done and LSiter < maxIter do
      -- find high and low points in bracket
      local f_LO,LOpos = bracketFval:min(1)
      LOpos = LOpos[1] f_LO = f_LO[1]
      local HIpos = -LOpos+3

      -- compute new trial value
      t = optim.polyinterp(x.new{{bracket[1],bracketFval[1],bracketGval[1]*d},
                                  {bracket[2],bracketFval[2],bracketGval[2]*d}})

      -- test what we are making sufficient progress
      if min(bracket:max()-t,t-bracket:min())/(bracket:max()-bracket:min()) < 0.1 then
         if insufProgress or t>=bracket:max() or t <= bracket:min() then
            if abs(t-bracket:max()) < abs(t-bracket:min()) then
               t = bracket:max()-0.1*(bracket:max()-bracket:min())
            else
               t = bracket:min()+0.1*(bracket:max()-bracket:min())
            end
            insufProgress = false
         else
            insufProgress = true
         end
      else
         insufProgress = false
      end

      -- Evaluate new point
      x[{}] = x_init
      x:add(t,d)
      f_new,g_new = opfunc(x)
      lsFuncEval = lsFuncEval + 1
      gtd_new = g_new * d
      LSiter = LSiter + 1
      if f_new > f + c1*t*gtd or f_new >= f_LO then
         -- Armijo condition not satisfied or not lower than lowest point
         bracket[HIpos] = t
         bracketFval[HIpos] = f_new
         bracketGval[HIpos] = g_new
      else
         if abs(gtd_new) <= - c2*gtd then
            -- Wolfe conditions satisfied
            done = true
         elseif gtd_new*(bracket[HIpos]-bracket[LOpos]) >= 0 then
            -- Old HI becomes new LO
            bracket[HIpos] = bracket[LOpos]
            bracketFval[HIpos] = bracketFval[LOpos]
            bracketGval[HIpos] = bracketGval[LOpos]
         end
         -- New point becomes new LO
         bracket[LOpos] = t
         bracketFval[LOpos] = f_new
         bracketGval[LOpos] = g_new
      end

      -- done?
      if not done and abs((bracket[1]-bracket[2])*gtd_new) < tolX then
         break
      end
   end

   -- be verbose
   if LSiter == maxIter then
      verbose('reached max number of iterations')
   end

   -- return stuff
   local _,LOpos = bracketFval:min(1)
   LOpos = LOpos[1]
   t = bracket[LOpos]
   f_new = bracketFval[LOpos]
   g_new = bracketGval[LOpos]
   x[{}] = x_init
   x:add(t,d)
	return f_new,g_new,x,t,lsFuncEval
end