From 087d96271f60bd3fd80e8b3aaab72626cd0b6e53 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Thu, 25 Aug 2011 16:17:08 -0400
Subject: Added map-reduce version of l-BFGS. In dev...

---
 LBFGSOptimization.lua | 102 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 102 insertions(+)

diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index 83393d9..f0e130a 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -10,6 +10,7 @@ function LBFGS:__init(...)
       {arg='maxIterations', type='number', help='maximum nb of iterations per pass (0 = no max)', default=0},
       {arg='maxLineSearch', type='number', help='maximum nb of steps in line search', default=20},
       {arg='sparsity', type='number', help='sparsity coef (Orthantwise C)', default=0},
+      {arg='parallelize', type='number', help='parallelize onto N cores (experimental!)', default=1},
       {arg='verbose', type='number', help='verbose level during training [0-2]', default=0}
    )
    self.parametersT = nnx.getParameters(self.module)
@@ -19,6 +20,14 @@ end
 
 function LBFGS:forward(inputs, targets, options)
    options = options or {}
+   if self.parallelize > 1 then
+      return self:forward_mapreduce(inputs, targets, options)
+   else
+      return self:forward_sequential(inputs, targets, options)
+   end
+end
+
+function LBFGS:forward_sequential(inputs, targets, options)
    -- (1) construct a closure that compute f(inputs) + df/dW
    --     after each call to that function:
    --       + self.parameters contains the current X vector
@@ -73,3 +82,96 @@ function LBFGS:forward(inputs, targets, options)
    -- (5) return current output after optimization
    return self.output
 end
+
+function LBFGS:forward_mapreduce(inputs, targets, options)
+   -- (0) clone module+criterion for parallel evaluations
+   local modules = {}
+   local criterions = {}
+   local outputs = {}
+   self.parametersT = {}
+   self.gradParametersT = {}
+   for m = 1,self.parallelize do
+      if m == 1 then
+         modules[m] = self.module
+         criterions[m] = self.criterion
+      else
+         modules[m] = self.module:clone()
+         criterions[m] = self.criterion:clone()
+      end
+      self.parametersT[m] = nnx.getParameters(modules[m])
+      self.gradParametersT[m] = nnx.getGradParameters(modules[m])
+   end
+
+   -- (1) construct a closure that compute f(inputs) + df/dW
+   --     after each call to that function:
+   --       + self.parameters contains the current X vector
+   --       + self.gradParameters contains the estimated dF/dX vector
+   --       + self.output contains the estimated (average) F(X)
+   lbfgs.evaluate
+      = function()
+           for t = 1,self.parallelize do
+              lbfgs.evaluate_map(t)
+           end
+           return lbfgs.evaluate_reduce()
+        end
+
+   -- (1a) the map part of the evaluation: compute partial gradients
+   --      in separate threads
+   lbfgs.evaluate_map
+      = function(thread)
+           -- set parameters from current state
+           self:unflatten(self.parametersT[thread], self.gradParametersT[thread])
+           -- reset gradients
+           modules[thread]:zeroGradParameters()
+           -- f is the average of all criterions
+           outputs[thread] = 0
+           -- given all inputs, evaluate gradients
+           for i = thread,#inputs,thread do
+              -- estimate f
+              local output = modules[thread]:forward(inputs[i])
+              local err = criterions[thread]:forward(output, targets[i])
+              outputs[thread] = outputs[thread] + err
+              -- estimate df/dW
+              local df_do = criterions[thread]:backward(output, targets[i])
+              modules[thread]:backward(inputs[i], df_do)
+           end
+        end
+
+   -- (1b) the reduce part of the evaluation: accumulate all
+   --      partial estimates of the gradients
+   lbfgs.evaluate_reduce
+      = function()
+           -- temp vectors for accumulation
+           self.gradParametersAcc = self.gradParametersAcc or torch.Tensor()
+           self.gradParametersAcc:resizeAs(self.gradParameters):zero()
+           -- update state from computed parameters
+           for t = 1,self.parallelize do
+              self:flatten(self.parametersT[1], self.gradParametersT[t])
+              self.gradParametersAcc:copy(self.gradParameters)
+           end
+           self.gradParameters:copy(self.gradParametersAcc)
+           -- normalize gradients
+           self.gradParameters:div(#inputs)
+           -- return average f(X)
+           self.output = 0
+           for t = 1,self.parallelize do
+              self.output = self.output + outputs[t]
+           end
+           return self.output/#inputs
+        end
+
+   -- (2) store current parameters/gradParameters
+   self:flatten(self.parametersT[1], self.gradParametersT[1])
+
+   -- (3) the magic function: will update the parameter vector
+   --     according to the l-BFGS method
+   self.output = lbfgs.run(self.parameters, self.gradParameters, 
+                           self.maxIterations, self.maxLineSearch,
+                           self.sparsity)
+
+   -- (4) last: read parameters back into the model
+   self:unflatten(self.parametersT[1], self.gradParametersT[1])
+
+   -- (5) return current output after optimization
+   return self.output
+end
-- 
cgit v1.2.3


From e24dfd0c32ca6759b4506a5d1cca974b70a305e2 Mon Sep 17 00:00:00 2001
From: Marco Scoffier <marco@marco-ThinkPad-X220.(none)>
Date: Thu, 25 Aug 2011 18:25:28 -0400
Subject: parallel modules weren't coping weights back to main module

---
 LBFGSOptimization.lua | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index f0e130a..4026afe 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -88,8 +88,8 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
    local modules = {}
    local criterions = {}
    local outputs = {}
-   self.parametersT = {}
-   self.gradParametersT = {}
+   self.parametersPT = {}
+   self.gradParametersPT = {}
    for m = 1,self.parallelize do
       if m == 1 then
          modules[m] = self.module
@@ -98,8 +98,8 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
          modules[m] = self.module:clone()
          criterions[m] = self.criterion:clone()
       end
-      self.parametersT[m] = nnx.getParameters(modules[m])
-      self.gradParametersT[m] = nnx.getGradParameters(modules[m])
+      self.parametersPT[m] = nnx.getParameters(modules[m])
+      self.gradParametersPT[m] = nnx.getGradParameters(modules[m])
    end
 
    -- (1) construct a closure that compute f(inputs) + df/dW
@@ -119,14 +119,14 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
    --      in separate threads
    lbfgs.evaluate_map
       = function(thread)
-           -- set parameters from current state
-           self:unflatten(self.parametersT[thread], self.gradParametersT[thread])
+           -- set parameters of current state
+           self:unflatten(self.parametersPT[thread], self.gradParametersPT[thread])
            -- reset gradients
            modules[thread]:zeroGradParameters()
            -- f is the average of all criterions
            outputs[thread] = 0
-           -- given all inputs, evaluate gradients
-           for i = thread,#inputs,thread do
+           -- evaluate gradients on inputs for this thread
+	   for i = thread,#inputs,#modules do
               -- estimate f
               local output = modules[thread]:forward(inputs[i])
               local err = criterions[thread]:forward(output, targets[i])
@@ -146,8 +146,8 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
            self.gradParametersAcc:resizeAs(self.gradParameters):zero()
            -- update state from computed parameters
            for t = 1,self.parallelize do
-              self:flatten(self.parametersT[1], self.gradParametersT[t])
-              self.gradParametersAcc:copy(self.gradParameters)
+              self:flatten(self.parametersPT[t], self.gradParametersPT[t])
+              self.gradParametersAcc:add(self.gradParameters)
            end
            self.gradParameters:copy(self.gradParametersAcc)
            -- normalize gradients
@@ -161,7 +161,7 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
         end
 
    -- (2) store current parameters/gradParameters
-   self:flatten(self.parametersT[1], self.gradParametersT[1])
+   self:flatten(self.parametersT, self.gradParametersT)
 
    -- (3) the magic function: will update the parameter vector
    --     according to the l-BFGS method
@@ -169,8 +169,8 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
                            self.maxIterations, self.maxLineSearch,
                            self.sparsity)
 
-   -- (4) last: read parameters back into the model
-   self:unflatten(self.parametersT[1], self.gradParametersT[1])
+   -- (4) last: read parameters back into the main (not parrallel) model
+   self:unflatten(self.parametersT, self.gradParametersT)
 
    -- (5) return current output after optimization
    return self.output
-- 
cgit v1.2.3


From 20ad9917d9bdea4fc0ce6d220198aa7e9ace9189 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Thu, 25 Aug 2011 18:55:38 -0400
Subject: tabs->space.

---
 LBFGSOptimization.lua | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index 4026afe..4760cf2 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -72,7 +72,7 @@ function LBFGS:forward_sequential(inputs, targets, options)
 
    -- (3) the magic function: will update the parameter vector
    --     according to the l-BFGS method
-   self.output = lbfgs.run(self.parameters, self.gradParameters, 
+   self.output = lbfgs.run(self.parameters, self.gradParameters,
                            self.maxIterations, self.maxLineSearch,
                            self.sparsity)
 
@@ -126,7 +126,7 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
            -- f is the average of all criterions
            outputs[thread] = 0
            -- evaluate gradients on inputs for this thread
-	   for i = thread,#inputs,#modules do
+           for i = thread,#inputs,#modules do
               -- estimate f
               local output = modules[thread]:forward(inputs[i])
               local err = criterions[thread]:forward(output, targets[i])
@@ -165,7 +165,7 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
 
    -- (3) the magic function: will update the parameter vector
    --     according to the l-BFGS method
-   self.output = lbfgs.run(self.parameters, self.gradParameters, 
+   self.output = lbfgs.run(self.parameters, self.gradParameters,
                            self.maxIterations, self.maxLineSearch,
                            self.sparsity)
 
-- 
cgit v1.2.3


From 2402831b4a641ecf65fc27cdd4a9551694b8d710 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Thu, 25 Aug 2011 19:07:01 -0400
Subject: Cleaned up useless special case in mapreduce bfgs.

---
 LBFGSOptimization.lua | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index 4760cf2..0eff7b4 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -91,13 +91,8 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
    self.parametersPT = {}
    self.gradParametersPT = {}
    for m = 1,self.parallelize do
-      if m == 1 then
-         modules[m] = self.module
-         criterions[m] = self.criterion
-      else
-         modules[m] = self.module:clone()
-         criterions[m] = self.criterion:clone()
-      end
+      modules[m] = self.module:clone()
+      criterions[m] = self.criterion:clone()
       self.parametersPT[m] = nnx.getParameters(modules[m])
       self.gradParametersPT[m] = nnx.getGradParameters(modules[m])
    end
-- 
cgit v1.2.3


From acfec8199f55eeba2d5d2f6eeeefcb67dc4b7b53 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Sat, 27 Aug 2011 19:24:05 -0400
Subject: Trying to get BFGS to work with new parallel framework.

---
 LBFGSOptimization.lua | 94 ++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 74 insertions(+), 20 deletions(-)

diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index 0eff7b4..22b8aba 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -16,6 +16,12 @@ function LBFGS:__init(...)
    self.parametersT = nnx.getParameters(self.module)
    self.gradParametersT = nnx.getGradParameters(self.module)
    lbfgs.verbose = self.verbose
+   if self.parallelize > 1 then
+      if not xrequire 'parallel' then
+         xerror('install parallel for Lua to enable parallel computing (luarocks install parallel)',
+                'nn.LBFGSOptimization')
+      end
+   end
 end
 
 function LBFGS:forward(inputs, targets, options)
@@ -97,6 +103,18 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
       self.gradParametersPT[m] = nnx.getGradParameters(modules[m])
    end
 
+   -- (0b) divide input/target batch into N batches
+   local inputss = {}
+   local targetss = {}
+   for t = 1,self.parallelize do
+      inputss[t] = {}
+      targetss[t] = {}
+      for i = t,#inputs,self.parallelize do
+         table.insert(inputss[t], inputs[i])
+         table.insert(targetss[t], targets[i])
+      end
+   end
+
    -- (1) construct a closure that compute f(inputs) + df/dW
    --     after each call to that function:
    --       + self.parameters contains the current X vector
@@ -104,33 +122,69 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
    --       + self.output contains the estimated (average) F(X)
    lbfgs.evaluate
       = function()
+           -- reset parallel state
+           parallel.reset()
+           -- dispatch N parallel jobs
            for t = 1,self.parallelize do
-              lbfgs.evaluate_map(t)
+              parallel.run(lbfgs.evaluate_map)
            end
+           -- transmit data to all jobs
+           for t = 1,self.parallelize do
+              -- update each module with latest parameters
+              self:unflatten(self.parametersPT[t], self.gradParametersPT[t])
+              -- transmit all necessary data
+              parallel.children[t]:send(modules[t])
+              parallel.children[t]:send(criterions[t])
+              parallel.children[t]:send(inputss[t])
+              parallel.children[t]:send(targetss[t])
+           end
+           -- then wait for all workers to return their trained modules
+           for t = 1,self.parallelize do
+              modules[t] = parallel.children[t]:receive()
+              outputs[t] = parallel.children[t]:receive()
+              self.parametersPT[t] = nnx.getParameters(modules[t])
+              self.gradParametersPT[t] = nnx.getGradParameters(modules[t])
+           end
+           -- and join
+           parallel.children:join()
+           -- reduce
            return lbfgs.evaluate_reduce()
         end
 
    -- (1a) the map part of the evaluation: compute partial gradients
    --      in separate threads
-   lbfgs.evaluate_map
-      = function(thread)
-           -- set parameters of current state
-           self:unflatten(self.parametersPT[thread], self.gradParametersPT[thread])
-           -- reset gradients
-           modules[thread]:zeroGradParameters()
-           -- f is the average of all criterions
-           outputs[thread] = 0
-           -- evaluate gradients on inputs for this thread
-           for i = thread,#inputs,#modules do
-              -- estimate f
-              local output = modules[thread]:forward(inputs[i])
-              local err = criterions[thread]:forward(output, targets[i])
-              outputs[thread] = outputs[thread] + err
-              -- estimate df/dW
-              local df_do = criterions[thread]:backward(output, targets[i])
-              modules[thread]:backward(inputs[i], df_do)
-           end
-        end
+   lbfgs.evaluate_map = [[
+         -- require packages
+         require 'nnx'
+
+         -- thread ID
+         thread = parallel.id
+
+         -- retrieve module + criterion + mini-batch
+         module = parallel.parent:receive()
+         criterion = parallel.parent:receive()
+         inputs = parallel.parent:receive()
+         targets = parallel.parent:receive()
+
+         -- reset gradients
+         module:zeroGradParameters()
+         -- f is the average of all criterions
+         local output = 0
+         -- evaluate gradients on inputs for this thread
+         for i = 1,#inputs do
+            -- estimate f
+            local output = module:forward(inputs[i])
+            local err = criterion:forward(output, targets[i])
+            output = output + err
+            -- estimate df/dW
+            local df_do = criterion:backward(output, targets[i])
+            module:backward(inputs[i], df_do)
+         end
+
+         -- return module + output
+         parallel.parent:send(module)
+         parallel.parent:send(output)
+   ]]
 
    -- (1b) the reduce part of the evaluation: accumulate all
    --      partial estimates of the gradients
-- 
cgit v1.2.3


From 5f632bb32240f2d65ea39ee90d12977ff29e5113 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Sat, 27 Aug 2011 22:55:34 -0400
Subject: Trying to fix this buggy map-reduce BFGS

---
 LBFGSOptimization.lua | 53 +++++++++++++++++++++------------------------------
 1 file changed, 22 insertions(+), 31 deletions(-)

diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index 22b8aba..e85bc97 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -21,6 +21,7 @@ function LBFGS:__init(...)
          xerror('install parallel for Lua to enable parallel computing (luarocks install parallel)',
                 'nn.LBFGSOptimization')
       end
+      parallel.setSharedSize(4*1024*1024)
    end
 end
 
@@ -90,26 +91,20 @@ function LBFGS:forward_sequential(inputs, targets, options)
 end
 
 function LBFGS:forward_mapreduce(inputs, targets, options)
-   -- (0) clone module+criterion for parallel evaluations
-   local modules = {}
-   local criterions = {}
+   -- parameters
+   local P = self.parallelize
+
+   -- (0a) replicate output and gradParameters
    local outputs = {}
-   self.parametersPT = {}
-   self.gradParametersPT = {}
-   for m = 1,self.parallelize do
-      modules[m] = self.module:clone()
-      criterions[m] = self.criterion:clone()
-      self.parametersPT[m] = nnx.getParameters(modules[m])
-      self.gradParametersPT[m] = nnx.getGradParameters(modules[m])
-   end
+   local gradParameters = {}
 
    -- (0b) divide input/target batch into N batches
    local inputss = {}
    local targetss = {}
-   for t = 1,self.parallelize do
+   for t = 1,P do
       inputss[t] = {}
       targetss[t] = {}
-      for i = t,#inputs,self.parallelize do
+      for i = t,#inputs,P do
          table.insert(inputss[t], inputs[i])
          table.insert(targetss[t], targets[i])
       end
@@ -125,25 +120,23 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
            -- reset parallel state
            parallel.reset()
            -- dispatch N parallel jobs
-           for t = 1,self.parallelize do
+           for t = 1,P do
               parallel.run(lbfgs.evaluate_map)
            end
+           -- load parameters into current model
+           self:unflatten(self.parametersT, self.gradParametersT)
            -- transmit data to all jobs
-           for t = 1,self.parallelize do
-              -- update each module with latest parameters
-              self:unflatten(self.parametersPT[t], self.gradParametersPT[t])
+           for t = 1,P do
               -- transmit all necessary data
-              parallel.children[t]:send(modules[t])
-              parallel.children[t]:send(criterions[t])
+              parallel.children[t]:send(self.module)
+              parallel.children[t]:send(self.criterion)
               parallel.children[t]:send(inputss[t])
               parallel.children[t]:send(targetss[t])
            end
            -- then wait for all workers to return their trained modules
-           for t = 1,self.parallelize do
-              modules[t] = parallel.children[t]:receive()
+           for t = 1,P do
+              gradParameters = parallel.children[t]:receive()
               outputs[t] = parallel.children[t]:receive()
-              self.parametersPT[t] = nnx.getParameters(modules[t])
-              self.gradParametersPT[t] = nnx.getGradParameters(modules[t])
            end
            -- and join
            parallel.children:join()
@@ -157,9 +150,6 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
          -- require packages
          require 'nnx'
 
-         -- thread ID
-         thread = parallel.id
-
          -- retrieve module + criterion + mini-batch
          module = parallel.parent:receive()
          criterion = parallel.parent:receive()
@@ -181,8 +171,8 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
             module:backward(inputs[i], df_do)
          end
 
-         -- return module + output
-         parallel.parent:send(module)
+         -- return partial gradParameters + output
+         parallel.parent:send( nnx.getGradParameters(module) )
          parallel.parent:send(output)
    ]]
 
@@ -194,8 +184,8 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
            self.gradParametersAcc = self.gradParametersAcc or torch.Tensor()
            self.gradParametersAcc:resizeAs(self.gradParameters):zero()
            -- update state from computed parameters
-           for t = 1,self.parallelize do
-              self:flatten(self.parametersPT[t], self.gradParametersPT[t])
+           for t = 1,P do
+              self:flatten(self.parametersT, gradParameters)
               self.gradParametersAcc:add(self.gradParameters)
            end
            self.gradParameters:copy(self.gradParametersAcc)
@@ -203,9 +193,10 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
            self.gradParameters:div(#inputs)
            -- return average f(X)
            self.output = 0
-           for t = 1,self.parallelize do
+           for t = 1,P do
               self.output = self.output + outputs[t]
            end
+           -- export parameters, again
            return self.output/#inputs
         end
 
-- 
cgit v1.2.3


From e0f477a75504b5ce09c511bc6f6e9e035e89d435 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Sat, 27 Aug 2011 23:36:12 -0400
Subject: First end-to-end working version of map-reduce for l-BFGS.

---
 LBFGSOptimization.lua | 153 ++++++++++++++++++++++++++++++++------------------
 1 file changed, 97 insertions(+), 56 deletions(-)

diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index e85bc97..d8b42e6 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -17,11 +17,7 @@ function LBFGS:__init(...)
    self.gradParametersT = nnx.getGradParameters(self.module)
    lbfgs.verbose = self.verbose
    if self.parallelize > 1 then
-      if not xrequire 'parallel' then
-         xerror('install parallel for Lua to enable parallel computing (luarocks install parallel)',
-                'nn.LBFGSOptimization')
-      end
-      parallel.setSharedSize(4*1024*1024)
+      self:setup_mapreduce()
    end
 end
 
@@ -110,6 +106,12 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
       end
    end
 
+   -- (0c) send mini-batch to all workers
+   for t = 1,P do
+      parallel.children[t]:send(inputss[t])
+      parallel.children[t]:send(targetss[t])
+   end
+
    -- (1) construct a closure that compute f(inputs) + df/dW
    --     after each call to that function:
    --       + self.parameters contains the current X vector
@@ -117,65 +119,27 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
    --       + self.output contains the estimated (average) F(X)
    lbfgs.evaluate
       = function()
-           -- reset parallel state
-           parallel.reset()
-           -- dispatch N parallel jobs
-           for t = 1,P do
-              parallel.run(lbfgs.evaluate_map)
-           end
+           lbfgs.evaluate_map()
+           return lbfgs.evaluate_reduce()
+        end
+
+   -- (1a) the map part of the evaluation: compute partial gradients
+   --      in separate threads
+   lbfgs.evaluate_map
+      = function()
            -- load parameters into current model
            self:unflatten(self.parametersT, self.gradParametersT)
-           -- transmit data to all jobs
+           -- transmit new parameters to workers
            for t = 1,P do
-              -- transmit all necessary data
-              parallel.children[t]:send(self.module)
-              parallel.children[t]:send(self.criterion)
-              parallel.children[t]:send(inputss[t])
-              parallel.children[t]:send(targetss[t])
+              parallel.children[t]:send(self.parametersT)
            end
-           -- then wait for all workers to return their trained modules
+           -- then wait for all workers to return their partial gradParameters + outputs
            for t = 1,P do
-              gradParameters = parallel.children[t]:receive()
+              gradParameters[t] = parallel.children[t]:receive()
               outputs[t] = parallel.children[t]:receive()
            end
-           -- and join
-           parallel.children:join()
-           -- reduce
-           return lbfgs.evaluate_reduce()
         end
 
-   -- (1a) the map part of the evaluation: compute partial gradients
-   --      in separate threads
-   lbfgs.evaluate_map = [[
-         -- require packages
-         require 'nnx'
-
-         -- retrieve module + criterion + mini-batch
-         module = parallel.parent:receive()
-         criterion = parallel.parent:receive()
-         inputs = parallel.parent:receive()
-         targets = parallel.parent:receive()
-
-         -- reset gradients
-         module:zeroGradParameters()
-         -- f is the average of all criterions
-         local output = 0
-         -- evaluate gradients on inputs for this thread
-         for i = 1,#inputs do
-            -- estimate f
-            local output = module:forward(inputs[i])
-            local err = criterion:forward(output, targets[i])
-            output = output + err
-            -- estimate df/dW
-            local df_do = criterion:backward(output, targets[i])
-            module:backward(inputs[i], df_do)
-         end
-
-         -- return partial gradParameters + output
-         parallel.parent:send( nnx.getGradParameters(module) )
-         parallel.parent:send(output)
-   ]]
-
    -- (1b) the reduce part of the evaluation: accumulate all
    --      partial estimates of the gradients
    lbfgs.evaluate_reduce
@@ -185,7 +149,7 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
            self.gradParametersAcc:resizeAs(self.gradParameters):zero()
            -- update state from computed parameters
            for t = 1,P do
-              self:flatten(self.parametersT, gradParameters)
+              self:flatten(self.parametersT, gradParameters[t])
               self.gradParametersAcc:add(self.gradParameters)
            end
            self.gradParameters:copy(self.gradParametersAcc)
@@ -212,6 +176,83 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
    -- (4) last: read parameters back into the main (not parrallel) model
    self:unflatten(self.parametersT, self.gradParametersT)
 
+   -- (6) reset workers so they're ready for next mini-batch
+   for t = 1,P do
+      parallel.children[t]:send('break')
+   end
+
    -- (5) return current output after optimization
    return self.output
 end
+
+function LBFGS:setup_mapreduce ()
+   -- (0) startup parallel package
+   if not xrequire 'parallel' then
+      xerror('install parallel for Lua to enable parallel computing (luarocks install parallel)',
+             'nn.LBFGSOptimization')
+   end
+   parallel.setSharedSize(4*1024*1024)
+   local P = self.parallelize
+
+   -- (1) define code for workers
+   local worker_code = [[
+         -- require packages
+         require 'nnx'
+
+         -- retrieve module + criterion at startup
+         module = parallel.parent:receive()
+         criterion = parallel.parent:receive()
+
+         -- get pointer to parameter and gradParameter vectors
+         parameters = nnx.getParameters(module)
+         gradParameters = nnx.getGradParameters(module)
+
+         -- outter loop: mini-batches
+         while true do
+            -- receive new mini-batch
+            inputs = parallel.parent:receive()
+            if type(inputs) == 'string' and inputs == 'break' then break end
+            targets = parallel.parent:receive()
+
+            -- inner loop: evaluations
+            while true do
+               -- receive new set of parameters
+               newParameters = parallel.parent:receive()
+               if type(newParameters) == 'string' and newParameters == 'break' then break end
+               for i = 1,#newParameters do
+                  parameters[i]:copy(newParameters[i])
+               end
+
+               -- reset gradients
+               module:zeroGradParameters()
+               -- f is the average of all criterions
+               local f_x = 0
+               -- evaluate gradients on inputs for this thread
+               for i = 1,#inputs do
+                  -- estimate f
+                  local output = module:forward(inputs[i])
+                  local err = criterion:forward(output, targets[i])
+                  f_x = f_x + err
+                  -- estimate df/dW
+                  local df_do = criterion:backward(output, targets[i])
+                  module:backward(inputs[i], df_do)
+               end
+
+               -- now send back gradParameters + partial output
+               parallel.parent:send(gradParameters)
+               parallel.parent:send(f_x)
+            end
+         end
+   ]]
+
+   -- (2) startup all workers
+   for t = 1,P do
+      parallel.run(worker_code)
+   end
+
+   -- (3) and send them the module + criterion architecture
+   for t = 1,P do
+      parallel.children[t]:send(self.module)
+      parallel.children[t]:send(self.criterion)
+   end
+end
\ No newline at end of file
-- 
cgit v1.2.3


From 2cd71bc5e355f299000ea4b4403dcd52aa1a36db Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Sun, 28 Aug 2011 19:07:11 -0400
Subject: using new optimizations for data transfers (mapreduce)

---
 LBFGSOptimization.lua | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index d8b42e6..643e2b1 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -129,10 +129,8 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
       = function()
            -- load parameters into current model
            self:unflatten(self.parametersT, self.gradParametersT)
-           -- transmit new parameters to workers
-           for t = 1,P do
-              parallel.children[t]:send(self.parametersT)
-           end
+           -- transmit new parameters to all workers
+           parallel.children:send(self.parametersT)
            -- then wait for all workers to return their partial gradParameters + outputs
            for t = 1,P do
               gradParameters[t] = parallel.children[t]:receive()
@@ -177,9 +175,7 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
    self:unflatten(self.parametersT, self.gradParametersT)
 
    -- (6) reset workers so they're ready for next mini-batch
-   for t = 1,P do
-      parallel.children[t]:send('break')
-   end
+   parallel.children:send('break')
 
    -- (5) return current output after optimization
    return self.output
@@ -251,8 +247,6 @@ function LBFGS:setup_mapreduce ()
    end
 
    -- (3) and send them the module + criterion architecture
-   for t = 1,P do
-      parallel.children[t]:send(self.module)
-      parallel.children[t]:send(self.criterion)
-   end
-end
\ No newline at end of file
+   parallel.children:send(self.module)
+   parallel.children:send(self.criterion)
+end
-- 
cgit v1.2.3


From 357829243d61169f719144231bb0260cbde52404 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Sun, 28 Aug 2011 19:58:36 -0400
Subject: Changed README.

---
 README.md  | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 README.txt | 13 ------------
 2 files changed, 67 insertions(+), 13 deletions(-)
 create mode 100644 README.md
 delete mode 100644 README.txt

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..f30f5b9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,67 @@
+# nnx: an Xperimental package for neural network modules + optimizations
+
+The original neural network from Torch7, 'nn', contains stable and widely
+used modules. 'nnx' contains more experimental, unproven modules, and
+optimizations. Eventually, modules that become stable enough will make 
+their way into 'nn' (some already have).
+
+## Install dependencies 
+
+1/ third-party libraries:
+
+On Linux (Ubuntu > 9.04):
+
+``` sh
+$ apt-get install gcc g++ git libreadline5-dev cmake wget
+```
+
+On Mac OS (Leopard, or more), using [Homebrew](http://mxcl.github.com/homebrew/):
+
+``` sh
+$ brew install git readline cmake wget
+```
+
+2/ Lua 5.1 + Luarocks + xLua:
+
+``` sh
+$ git clone https://github.com/clementfarabet/lua4torch
+$ cd lua4torch
+$ make install PREFIX=/usr/local
+```
+
+3/ nnx:
+
+Note: this automatically installs Torch7+nn, and other Lua dependencies.
+
+``` sh
+$ luarocks install nnx
+```
+
+## Use the library
+
+First run xlua, and load nnx:
+
+``` sh
+$ xlua
+``` 
+
+``` lua
+> require 'nnx'
+```
+
+Once loaded, tab-completion will help you navigate through the
+library (note that most function are added directly to nn):
+
+``` lua
+> nnx. + TAB
+...
+> nn. + TAB
+```
+
+In particular, it's good to verify that all modules provided pass their
+tests:
+
+``` lua
+> nnx.test_all()
+> nnx.test_omp()
+```
diff --git a/README.txt b/README.txt
deleted file mode 100644
index 6f183e9..0000000
--- a/README.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-
-INSTALL:
-$ luarocks --from=http://data.neuflow.org/lua/rocks install nnx
-
-USE:
-> require 'nnx'
-> n1 = nn.SpatialLinear(16,4)
-
--- run tests:
-> nnx.test_all()
-...
-> nnx.test_omp()
-...
-- 
cgit v1.2.3


From ce7043e49884a2aeb2bc7489aa6e50d21682aba5 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Sun, 28 Aug 2011 20:08:23 -0400
Subject: Added support for user hooks in parallel BFGS. To be tested.

---
 LBFGSOptimization.lua | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index 643e2b1..8373cfd 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -90,6 +90,33 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
    -- parameters
    local P = self.parallelize
 
+   -- transmit user hooks, if defined
+   if not self.hooksets then
+      if self.prehook then
+         if type(self.prehook) == 'string' then
+            parallel.children:send(self.prehook)
+         else
+            print('\r<LBFGSOptimization> WARNING: when using para||el mode, hooks should be')
+            print('\r<LBFGSOptimization> WARNING: defined as strings. User prehook ignored.')
+            parallel.children:send('')
+         end
+      else
+         parallel.children:send('')
+      end
+      if self.posthook then
+         if type(self.posthook) == 'string' then
+            parallel.children:send(self.posthook)
+         else
+            print('\r<LBFGSOptimization> WARNING: when using para||el mode, hooks should be')
+            print('<\rLBFGSOptimization> WARNING: defined as strings. User posthook ignored.')
+            parallel.children:send('')
+         end
+      else
+         parallel.children:send('')
+      end
+      self.hooksets = true
+   end
+
    -- (0a) replicate output and gradParameters
    local outputs = {}
    local gradParameters = {}
@@ -97,12 +124,15 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
    -- (0b) divide input/target batch into N batches
    local inputss = {}
    local targetss = {}
+   local optionss = {}
    for t = 1,P do
       inputss[t] = {}
       targetss[t] = {}
+      optionss[t] = {}
       for i = t,#inputs,P do
          table.insert(inputss[t], inputs[i])
          table.insert(targetss[t], targets[i])
+         if options then table.insert(optionss[t], options[i]) end
       end
    end
 
@@ -110,6 +140,7 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
    for t = 1,P do
       parallel.children[t]:send(inputss[t])
       parallel.children[t]:send(targetss[t])
+      parallel.children[t]:send(optionss[t])
    end
 
    -- (1) construct a closure that compute f(inputs) + df/dW
@@ -199,6 +230,15 @@ function LBFGS:setup_mapreduce ()
          module = parallel.parent:receive()
          criterion = parallel.parent:receive()
 
+         -- create fake optimizer, for hooks
+         optimizer = {module=module, criterion=criterion}
+
+         -- retrieve optional prehook/posthook
+         prehook = parallel.parent:receive()
+         posthook = parallel.parent:receive()
+         if prehook ~= '' then loadstring(prehook)() else prehook = nil end
+         if posthook ~= '' then loadstring(posthook)() else posthook = nil end
+
          -- get pointer to parameter and gradParameter vectors
          parameters = nnx.getParameters(module)
          gradParameters = nnx.getGradParameters(module)
@@ -209,6 +249,7 @@ function LBFGS:setup_mapreduce ()
             inputs = parallel.parent:receive()
             if type(inputs) == 'string' and inputs == 'break' then break end
             targets = parallel.parent:receive()
+            options = parallel.parent:receive()
 
             -- inner loop: evaluations
             while true do
@@ -225,6 +266,10 @@ function LBFGS:setup_mapreduce ()
                local f_x = 0
                -- evaluate gradients on inputs for this thread
                for i = 1,#inputs do
+                  -- user hook
+                  if prehook then
+                     prehook(optimizer, {inputs[i], targets[i], options[i]})
+                  end
                   -- estimate f
                   local output = module:forward(inputs[i])
                   local err = criterion:forward(output, targets[i])
@@ -232,6 +277,10 @@ function LBFGS:setup_mapreduce ()
                   -- estimate df/dW
                   local df_do = criterion:backward(output, targets[i])
                   module:backward(inputs[i], df_do)
+                  -- user hook
+                  if posthook then
+                     posthook(optimizer, {inputs[i], targets[i], options[i]})
+                  end
                end
 
                -- now send back gradParameters + partial output
-- 
cgit v1.2.3


From e22340ed5ef2badf8c51c484708fcf326d774a9c Mon Sep 17 00:00:00 2001
From: Marco Scoffier <marco@marco-ThinkPad-X220.(none)>
Date: Mon, 29 Aug 2011 04:25:00 -0400
Subject: adding stopping criteria to verbose==2, pass verbose as a parameter

---
 LBFGSOptimization.lua | 23 ++++++++++++++---------
 OnlineTrainer.lua     | 25 ++++++++++++++++---------
 lbfgs.c               | 47 ++++++++++++++++++++++++++++++++---------------
 3 files changed, 62 insertions(+), 33 deletions(-)

diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index 0eff7b4..b6a102a 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -6,16 +6,21 @@ function LBFGS:__init(...)
    xlua.unpack_class(self, {...},
       'LBFGSOptimization', nil,
       {arg='module', type='nn.Module', help='a module to train', req=true},
-      {arg='criterion', type='nn.Criterion', help='a criterion to estimate the error', req=true},
-      {arg='maxIterations', type='number', help='maximum nb of iterations per pass (0 = no max)', default=0},
-      {arg='maxLineSearch', type='number', help='maximum nb of steps in line search', default=20},
-      {arg='sparsity', type='number', help='sparsity coef (Orthantwise C)', default=0},
-      {arg='parallelize', type='number', help='parallelize onto N cores (experimental!)', default=1},
-      {arg='verbose', type='number', help='verbose level during training [0-2]', default=0}
+      {arg='criterion', type='nn.Criterion', 
+       help='a criterion to estimate the error', req=true},
+      {arg='maxIterations', type='number', 
+       help='maximum nb of iterations per pass (0 = no max)', default=0},
+      {arg='maxLineSearch', type='number', 
+       help='maximum nb of steps in line search', default=20},
+      {arg='sparsity', type='number', 
+       help='sparsity coef (Orthantwise C)', default=0},
+      {arg='parallelize', type='number', 
+       help='parallelize onto N cores (experimental!)', default=1},
+      {arg='verbose', type='number', 
+       help='verbose level during training [0-2]', default=0}
    )
    self.parametersT = nnx.getParameters(self.module)
    self.gradParametersT = nnx.getGradParameters(self.module)
-   lbfgs.verbose = self.verbose
 end
 
 function LBFGS:forward(inputs, targets, options)
@@ -74,7 +79,7 @@ function LBFGS:forward_sequential(inputs, targets, options)
    --     according to the l-BFGS method
    self.output = lbfgs.run(self.parameters, self.gradParameters,
                            self.maxIterations, self.maxLineSearch,
-                           self.sparsity)
+                           self.sparsity, self.verbose)
 
    -- (4) last: read parameters back into the model
    self:unflatten(self.parametersT, self.gradParametersT)
@@ -162,7 +167,7 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
    --     according to the l-BFGS method
    self.output = lbfgs.run(self.parameters, self.gradParameters,
                            self.maxIterations, self.maxLineSearch,
-                           self.sparsity)
+                           self.sparsity,self.verbose)
 
    -- (4) last: read parameters back into the main (not parrallel) model
    self:unflatten(self.parametersT, self.gradParametersT)
diff --git a/OnlineTrainer.lua b/OnlineTrainer.lua
index 2b7f2b5..bb120e8 100644
--- a/OnlineTrainer.lua
+++ b/OnlineTrainer.lua
@@ -16,15 +16,22 @@ function OnlineTrainer:__init(...)
          .. '> ',
 
       {arg='module', type='nn.Module', help='a module to train', req=true},
-      {arg='criterion', type='nn.Criterion', help='a criterion to estimate the error'},
-      {arg='preprocessor', type='nn.Module', help='a preprocessor to prime the data before the module'},
-      {arg='optimizer', type='nn.Optimization', help='an optimization method'},
-
-      {arg='batchSize', type='number', help='[mini] batch size', default=1},
-      {arg='maxEpoch', type='number', help='maximum number of epochs', default=50},
-      {arg='dispProgress', type='boolean', help='display a progress bar during training/testing', default=true},
-      {arg='save', type='string', help='path to save networks and log training'},
-      {arg='timestamp', type='boolean', help='if true, appends a timestamp to each network saved', default=false}
+      {arg='criterion', type='nn.Criterion', 
+       help='a criterion to estimate the error'},
+      {arg='preprocessor', type='nn.Module', 
+       help='a preprocessor to prime the data before the module'},
+      {arg='optimizer', type='nn.Optimization', 
+       help='an optimization method'}, 
+      {arg='batchSize', type='number', 
+       help='[mini] batch size', default=1},
+      {arg='maxEpoch', type='number', 
+       help='maximum number of epochs', default=50},
+      {arg='dispProgress', type='boolean', 
+       help='display a progress bar during training/testing', default=true},
+      {arg='save', type='string', 
+       help='path to save networks and log training'},
+      {arg='timestamp', type='boolean', 
+       help='if true, appends a timestamp to each network saved', default=false}
    )
    -- private params
    self.trainOffset = 0
diff --git a/lbfgs.c b/lbfgs.c
index 93680bd..6382183 100644
--- a/lbfgs.c
+++ b/lbfgs.c
@@ -81,6 +81,11 @@
 #define max2(a, b)      ((a) >= (b) ? (a) : (b))
 #define max3(a, b, c)   max2(max2((a), (b)), (c));
 
+// extra globals 
+static int nEvaluation = 0;
+static int nIteration = 0;
+static int verbose = 0;
+
 struct tag_callback_data {
   int n;
   void *instance;
@@ -415,7 +420,8 @@ int lbfgs(
     fx += xnorm * param.orthantwise_c;
     owlqn_pseudo_gradient(
                           pg, x, g, n,
-                          param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
+                          param.orthantwise_c, 
+			  param.orthantwise_start, param.orthantwise_end
                           );
   }
 
@@ -468,7 +474,8 @@ int lbfgs(
       ls = linesearch(n, x, &fx, g, d, &step, xp, pg, w, &cd, &param);
       owlqn_pseudo_gradient(
                             pg, x, g, n,
-                            param.orthantwise_c, param.orthantwise_start, param.orthantwise_end
+                            param.orthantwise_c, 
+			    param.orthantwise_start, param.orthantwise_end
                             );
     }
     if (ls < 0) {
@@ -476,6 +483,9 @@ int lbfgs(
       veccpy(x, xp, n);
       veccpy(g, gp, n);
       ret = ls;
+      if (verbose > 1){
+	printf("Stopping b/c ls (%f) < 0\n", ls);
+      }
       goto lbfgs_exit;
     }
 
@@ -490,7 +500,10 @@ int lbfgs(
     /* Report the progress. */
     if (cd.proc_progress) {
       if ((ret = cd.proc_progress(cd.instance, x, g, fx, xnorm, gnorm, step, cd.n, k, ls))) {
-        goto lbfgs_exit;
+	if (verbose > 1){
+	  printf("Stopping b/c cd.proc_progress (%d)\n", ret);
+	}
+	goto lbfgs_exit;
       }
     }
 
@@ -501,6 +514,10 @@ int lbfgs(
     */
     if (xnorm < 1.0) xnorm = 1.0;
     if (gnorm / xnorm <= param.epsilon) {
+      if (verbose > 1){
+	printf("Stopping b/c gnorm(%f)/xnorm(%f) <= param.epsilon (%f)\n",
+	       gnorm, xnorm, param.epsilon);
+	  }
       /* Convergence. */
       ret = LBFGS_SUCCESS;
       break;
@@ -519,6 +536,10 @@ int lbfgs(
 
         /* The stopping criterion. */
         if (rate < param.delta) {
+	  if (verbose > 1){
+	    printf("Stopping b/c rate (%f) < param.delta (%f)\n",
+		   rate, param.delta);
+	  }
           ret = LBFGS_STOP;
           break;
         }
@@ -529,6 +550,10 @@ int lbfgs(
     }
 
     if (param.max_iterations != 0 && param.max_iterations < k+1) {
+      if (verbose > 1){
+	printf("Stopping b/c param.max_iterations (%d) < k+1 (%d)\n",
+	       param.max_iterations, k+1);
+	  }
       /* Maximum number of iterations. */
       ret = LBFGSERR_MAXIMUMITERATION;
       break;
@@ -1375,9 +1400,6 @@ static THDoubleTensor *gradParameters = NULL;
 static int nParameter = 0;
 static lua_State *GL = NULL;
 static lbfgs_parameter_t lbfgs_param;
-static int nEvaluation = 0;
-static int nIteration = 0;
-static int verbose = 0;
 
 static lbfgsfloatval_t evaluate(void *instance,
                                 const lbfgsfloatval_t *x,
@@ -1417,7 +1439,7 @@ static int progress(void *instance,
                     int ls)
 {
   nIteration = k;
-  if (verbose == 2) {
+  if (verbose > 1) {
     printf("\n<LBFGSOptimization> iteration %d:\n", nIteration);
     printf("  + fx = %f\n", fx);
     printf("  + xnorm = %f, gnorm = %f, step = %f\n", xnorm, gnorm, step);
@@ -1433,19 +1455,11 @@ int lbfgs_run(lua_State *L) {
   parameters = luaT_checkudata(L, 1, torch_DoubleTensor_id);
   gradParameters = luaT_checkudata(L, 2, torch_DoubleTensor_id);
   nParameter = THDoubleTensor_nElement(parameters);
-
   // parameters for algorithm
   nEvaluation = 0;
   lbfgsfloatval_t fx;
   lbfgsfloatval_t *x = lbfgs_malloc(nParameter);
 
-  // get verbose level
-  lua_getfield(GL, LUA_GLOBALSINDEX, "lbfgs");   // push lbfgs on top of stack
-  lua_getfield(GL, -1, "verbose");               // push lbfgs.verbose on top of stack
-  verbose = lua_tonumber(GL, -1);                // verbose = lbfgs.verbose
-  lua_pop(GL, 2);                                // pop last two entries
-
-  // initialize vector x <- parameters
   memcpy(x, THDoubleTensor_data(parameters), sizeof(double)*nParameter);
 
   // initialize the parameters for the L-BFGS optimization
@@ -1454,6 +1468,8 @@ int lbfgs_run(lua_State *L) {
   lbfgs_param.max_linesearch = lua_tonumber(L, 4);
   lbfgs_param.linesearch = LBFGS_LINESEARCH_BACKTRACKING;
   lbfgs_param.orthantwise_c = lua_tonumber(L, 5);
+  // get verbose level
+  verbose = lua_tonumber(L,6);
 
   // Start the L-BFGS optimization; this will invoke the callback functions
   // evaluate() and progress() when necessary.
@@ -1463,6 +1479,7 @@ int lbfgs_run(lua_State *L) {
   if (verbose) {
     printf("\n<LBFGSOptimization> batch optimized after %d iterations\n", nIteration);
     printf("  + fx = %f\n", fx);
+    printf("  +  x = [ %f , ... %f]\n",x[0],x[nParameter-1]);
     printf("  + nb evaluations = %d\n", nEvaluation);
     printf("\n");
   }
-- 
cgit v1.2.3


From fce4ba1f64cc54465313b7973d3941a39d654608 Mon Sep 17 00:00:00 2001
From: Marco Scoffier <marco@marco-ThinkPad-X220.(none)>
Date: Mon, 29 Aug 2011 09:11:15 -0400
Subject: tweak verbose print

---
 lbfgs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lbfgs.c b/lbfgs.c
index 6382183..f84623a 100644
--- a/lbfgs.c
+++ b/lbfgs.c
@@ -484,7 +484,7 @@ int lbfgs(
       veccpy(g, gp, n);
       ret = ls;
       if (verbose > 1){
-	printf("Stopping b/c ls (%f) < 0\n", ls);
+	printf("Stopping b/c ls (%d) < 0\n", ls);
       }
       goto lbfgs_exit;
     }
-- 
cgit v1.2.3


From c6cb639ce76899720b82612eafb214b610a8b9a2 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Mon, 29 Aug 2011 14:19:40 -0400
Subject: Added a convenient method to re-alloc all params of a module in a
 flat vector.

---
 init.lua | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/init.lua b/init.lua
index 6b8b7e5..04877d3 100644
--- a/init.lua
+++ b/init.lua
@@ -185,3 +185,29 @@ function nnx.getGradParameters(...)
    -- return all parameters found
    return holder
 end
+
+function nnx.flattenParameters(parameters)
+   -- compute offsets of each parameter
+   local offsets = {}
+   local dimensions = {}
+   local elements = {}
+   local nParameters = 0
+   for _,param in ipairs(parameters) do
+      table.insert(offsets, nParameters+1)
+      table.insert(dimensions, param:size())
+      table.insert(elements, param:nElement())
+      nParameters = nParameters + param:nElement()
+   end
+   -- create flat vector
+   local flatParameters = torch.Tensor(nParameters)
+   local storage = flatParameters:storage()
+   -- reallocate all parameters in flat vector
+   for i = 1,#parameters do
+      local data = parameters[i]:clone()
+      parameters[i]:set(storage, offsets[i], elements[i]):resize(dimensions[i]):copy(data)
+   end
+   -- cleanup
+   collectgarbage()
+   -- return new flat vector that contains all discrete parameters
+   return flatParameters
+end
-- 
cgit v1.2.3


From 1c45fa457e45fd70696bd379603db1a4d893409e Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Mon, 29 Aug 2011 14:43:10 -0400
Subject: Completely got rid of calls to 'flatten'.

Now parameters are flattenned once and for all at startup,
so that optimizations can have access to contiguous vectors
(as was done in LUSH).
---
 LBFGSOptimization.lua | 68 ++++++++++++++++-----------------------------------
 Optimization.lua      | 49 ++-----------------------------------
 SGDOptimization.lua   | 10 ++------
 3 files changed, 25 insertions(+), 102 deletions(-)

diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index b3f2f9b..73405ed 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -19,8 +19,8 @@ function LBFGS:__init(...)
       {arg='verbose', type='number', 
        help='verbose level during training [0-2]', default=0}
    )
-   self.parametersT = nnx.getParameters(self.module)
-   self.gradParametersT = nnx.getGradParameters(self.module)
+   self.parameters = nnx.flattenParameters(nnx.getParameters(self.module))
+   self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
    if self.parallelize > 1 then
       self:setup_mapreduce()
    end
@@ -43,10 +43,8 @@ function LBFGS:forward_sequential(inputs, targets, options)
    --       + self.output contains the estimated (average) F(X)
    lbfgs.evaluate
       = function()
-           -- set parameters from current state
-           self:unflatten(self.parametersT, self.gradParametersT)
            -- reset gradients
-           self.module:zeroGradParameters()
+           self.gradParameters:zero()
            -- f is the average of all criterions
            self.output = 0
            -- given all inputs, evaluate gradients
@@ -67,27 +65,19 @@ function LBFGS:forward_sequential(inputs, targets, options)
                  self.posthook(self, {inputs[i], targets[i], options[i]})
               end
            end
-           -- update state from computed parameters
-           self:flatten(self.parametersT, self.gradParametersT)
            -- normalize gradients
            self.gradParameters:div(#inputs)
            -- return average f(X)
            return self.output/#inputs
         end
 
-   -- (2) store current parameters/gradParameters
-   self:flatten(self.parametersT, self.gradParametersT)
-
-   -- (3) the magic function: will update the parameter vector
+   -- (2) the magic function: will update the parameter vector
    --     according to the l-BFGS method
    self.output = lbfgs.run(self.parameters, self.gradParameters,
                            self.maxIterations, self.maxLineSearch,
                            self.sparsity, self.verbose)
 
-   -- (4) last: read parameters back into the model
-   self:unflatten(self.parametersT, self.gradParametersT)
-
-   -- (5) return current output after optimization
+   -- (3) return current output after optimization
    return self.output
 end
 
@@ -123,8 +113,8 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
    end
 
    -- (0a) replicate output and gradParameters
-   local outputs = {}
-   local gradParameters = {}
+   local outputsPartial = {}
+   local gradParametersPartial = {}
 
    -- (0b) divide input/target batch into N batches
    local inputss = {}
@@ -163,14 +153,12 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
    --      in separate threads
    lbfgs.evaluate_map
       = function()
-           -- load parameters into current model
-           self:unflatten(self.parametersT, self.gradParametersT)
            -- transmit new parameters to all workers
-           parallel.children:send(self.parametersT)
+           parallel.children:send(self.parameters)
            -- then wait for all workers to return their partial gradParameters + outputs
            for t = 1,P do
-              gradParameters[t] = parallel.children[t]:receive()
-              outputs[t] = parallel.children[t]:receive()
+              gradParametersPartial[t] = parallel.children[t]:receive()
+              outputsPartial[t] = parallel.children[t]:receive()
            end
         end
 
@@ -178,42 +166,30 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
    --      partial estimates of the gradients
    lbfgs.evaluate_reduce
       = function()
-           -- temp vectors for accumulation
-           self.gradParametersAcc = self.gradParametersAcc or torch.Tensor()
-           self.gradParametersAcc:resizeAs(self.gradParameters):zero()
-           -- update state from computed parameters
+           -- accumulate partial gradients, and average
+           self.gradParameters:zero()
            for t = 1,P do
-              self:flatten(self.parametersT, gradParameters[t])
-              self.gradParametersAcc:add(self.gradParameters)
+              self.gradParameters:add(gradParametersPartial[t])
            end
-           self.gradParameters:copy(self.gradParametersAcc)
-           -- normalize gradients
            self.gradParameters:div(#inputs)
            -- return average f(X)
            self.output = 0
            for t = 1,P do
-              self.output = self.output + outputs[t]
+              self.output = self.output + outputsPartial[t]
            end
-           -- export parameters, again
            return self.output/#inputs
         end
 
-   -- (2) store current parameters/gradParameters
-   self:flatten(self.parametersT, self.gradParametersT)
-
-   -- (3) the magic function: will update the parameter vector
+   -- (2) the magic function: will update the parameter vector
    --     according to the l-BFGS method
    self.output = lbfgs.run(self.parameters, self.gradParameters,
                            self.maxIterations, self.maxLineSearch,
                            self.sparsity, self.verbose)
 
-   -- (4) last: read parameters back into the main (not parrallel) model
-   self:unflatten(self.parametersT, self.gradParametersT)
-
-   -- (6) reset workers so they're ready for next mini-batch
+   -- (3) reset workers so they're ready for next mini-batch
    parallel.children:send('break')
 
-   -- (5) return current output after optimization
+   -- (4) return current output after optimization
    return self.output
 end
 
@@ -245,8 +221,8 @@ function LBFGS:setup_mapreduce ()
          if posthook ~= '' then loadstring(posthook)() else posthook = nil end
 
          -- get pointer to parameter and gradParameter vectors
-         parameters = nnx.getParameters(module)
-         gradParameters = nnx.getGradParameters(module)
+         parameters = nnx.flattenParameters(nnx.getParameters(module))
+         gradParameters = nnx.flattenParameters(nnx.getGradParameters(module))
 
          -- outter loop: mini-batches
          while true do
@@ -261,12 +237,10 @@ function LBFGS:setup_mapreduce ()
                -- receive new set of parameters
                newParameters = parallel.parent:receive()
                if type(newParameters) == 'string' and newParameters == 'break' then break end
-               for i = 1,#newParameters do
-                  parameters[i]:copy(newParameters[i])
-               end
+               parameters:copy(newParameters)
 
                -- reset gradients
-               module:zeroGradParameters()
+               gradParameters:zero()
                -- f is the average of all criterions
                local f_x = 0
                -- evaluate gradients on inputs for this thread
diff --git a/Optimization.lua b/Optimization.lua
index f18c635..daf0a8d 100644
--- a/Optimization.lua
+++ b/Optimization.lua
@@ -1,56 +1,11 @@
 local Optimization = torch.class('nn.Optimization')
 
 function Optimization:__init()
+   self.output = 0
 end
 
 function Optimization:forward(inputs, targets)
-   self:flatten(parameters, gradParameters)
    self.output = 0
-   self:unflatten(parameters, gradParameters)
+   print('<Optimization> WARNING: this is a virtual function, please overload !')
    return self.output
 end
-
-function Optimization:flatten(parameters, gradParameters)
-   if type(parameters) == 'table' then
-      -- create flat parameters
-      self.parameters = self.parameters or torch.Tensor()
-      self.gradParameters = self.gradParameters or torch.Tensor()
-      -- assuming that the parameters won't change their size, 
-      -- we compute offsets once
-      if not self.offsets then
-         self.nParameters = 0
-         self.offsets = {}
-         for _,param in ipairs(parameters) do
-            table.insert(self.offsets, self.nParameters+1)
-            self.nParameters = self.nParameters + param:nElement()
-         end
-         self.parameters:resize(self.nParameters)
-         self.gradParameters:resize(self.nParameters)
-      end
-      -- copy all params in flat array
-      for i = 1,#parameters do
-         local nElement = parameters[i]:nElement()
-         self.parameters:narrow(1,self.offsets[i],nElement):copy(parameters[i])
-         self.gradParameters:narrow(1,self.offsets[i],nElement):copy(gradParameters[i])
-      end
-   else
-      self.parameters = parameters
-      self.gradParameters = gradParameters
-   end
-end
-
-function Optimization:unflatten(parameters, gradParameters)
-   if type(parameters) == 'table' then
-      -- copy all params into unflat arrays
-      local offset = 1
-      for i = 1,#parameters do
-         local nElement = parameters[i]:nElement()
-         parameters[i]:copy(self.parameters:narrow(1,offset,nElement))
-         gradParameters[i]:copy(self.gradParameters:narrow(1,offset,nElement))
-         offset = offset + nElement
-      end
-   else
-      parameters = self.parameters
-      gradParameters = self.gradParameters
-   end
-end
diff --git a/SGDOptimization.lua b/SGDOptimization.lua
index 8bfe9a5..0ad5b28 100644
--- a/SGDOptimization.lua
+++ b/SGDOptimization.lua
@@ -10,8 +10,8 @@ function SGD:__init(...)
       {arg='weightDecay', type='number', help='amount of weight decay (W = W - decay*W)', default=0},
       {arg='momentum', type='number', help='amount of momentum on weights (dE/W = dE/dW*(1-momentum) + prev(dE/dW)*momentum)', default=0}
    )
-   self.parametersT = nnx.getParameters(self.module)
-   self.gradParametersT = nnx.getGradParameters(self.module)
+   self.parameters = nnx.flattenParameters(nnx.getParameters(self.module))
+   self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
 end
 
 function SGD:forward(inputs, targets, options)
@@ -48,9 +48,6 @@ function SGD:forward(inputs, targets, options)
    -- renorm f
    self.output = self.output / #inputs
    
-   -- update state from computed parameters
-   self:flatten(self.parametersT, self.gradParametersT)
-
    -- normalize gradients
    self.gradParameters:div(#inputs)
 
@@ -73,9 +70,6 @@ function SGD:forward(inputs, targets, options)
    -- update parameters
    self.parameters:add(-self.learningRate, self.currentGradParameters)
 
-   -- write compute parameters back in place
-   self:unflatten(self.parametersT, self.gradParametersT)
-
    -- return current output
    return self.output
 end
-- 
cgit v1.2.3


From 80edcf411716bcfb0ac1cdcdaa967922d1bf0fea Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Mon, 29 Aug 2011 14:49:03 -0400
Subject: Using flat vector for zeroing gradients.

---
 SGDOptimization.lua | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/SGDOptimization.lua b/SGDOptimization.lua
index 0ad5b28..7bb8b2c 100644
--- a/SGDOptimization.lua
+++ b/SGDOptimization.lua
@@ -18,7 +18,7 @@ function SGD:forward(inputs, targets, options)
    options = options or {}
 
    -- reset gradients
-   self.module:zeroGradParameters()
+   self.gradParameters:zero()
    
    -- f is the average of all criterions
    self.output = 0
-- 
cgit v1.2.3


From 68efd2b303b962f97e6cd3fb4e864ae192bac4ad Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Mon, 29 Aug 2011 16:32:06 -0400
Subject: Got rid of StochasticTrainer.

---
 OnlineTrainer.lua     |   2 +-
 StochasticTrainer.lua | 265 --------------------------------------------------
 init.lua              |   1 -
 nnx-1.0-1.rockspec    |   1 -
 4 files changed, 1 insertion(+), 268 deletions(-)
 delete mode 100644 StochasticTrainer.lua

diff --git a/OnlineTrainer.lua b/OnlineTrainer.lua
index bb120e8..db662e5 100644
--- a/OnlineTrainer.lua
+++ b/OnlineTrainer.lua
@@ -74,7 +74,7 @@ function OnlineTrainer:train(dataset)
 
    while true do
       print('<trainer> on training set:')
-      print("<trainer> online epoch # " .. self.epoch .. '[batchSize = ' .. self.batchSize .. ']')
+      print("<trainer> online epoch # " .. self.epoch .. ' [batchSize = ' .. self.batchSize .. ']')
 
       self.time = sys.clock()
       self.currentError = 0
diff --git a/StochasticTrainer.lua b/StochasticTrainer.lua
deleted file mode 100644
index 62fb670..0000000
--- a/StochasticTrainer.lua
+++ /dev/null
@@ -1,265 +0,0 @@
-local StochasticTrainer, parent = torch.class('nn.StochasticTrainer','nn.Trainer')
-
-function StochasticTrainer:__init(...)
-   parent.__init(self)
-   -- unpack args
-   xlua.unpack_class(self, {...},
-      'StochasticTrainer', 
-
-      'A general-purpose stochastic trainer class.\n'
-         .. 'Provides 4 user hooks to perform extra work after each sample, or each epoch:\n'
-         .. '> trainer = nn.StochasticTrainer(...) \n'
-         .. '> trainer.hookTrainSample = function(trainer, sample) ... end \n'
-         .. '> trainer.hookTrainEpoch = function(trainer) ... end \n'
-         .. '> trainer.hookTestSample = function(trainer, sample) ... end \n'
-         .. '> trainer.hookTestEpoch = function(trainer) ... end \n'
-         .. '> ',
-
-      {arg='module', type='nn.Module', help='a module to train', req=true},
-      {arg='criterion', type='nn.Module', help='a criterion to estimate the error'},
-      {arg='preprocessor', type='nn.Module', help='a preprocessor to prime the data before the module'},
-
-      {arg='learningRate', type='number', help='learning rate (W = W - rate*dE/dW)', default=1e-2},
-      {arg='learningRateDecay', type='number', help='learning rate decay (rate = rate * (1-decay), at each epoch)', default=0},
-      {arg='weightDecay', type='number', help='amount of weight decay (W = W - decay*W)', default=0},
-      {arg='momentum', type='number', help='amount of momentum on weights (dE/W = dE/dW + momentum*prev(dE/dW))', default=0},
-      {arg='maxEpoch', type='number', help='maximum number of epochs', default=50},
-
-      {arg='maxTarget', type='boolean', help='replaces an CxHxW target map by a HxN target of max values (for NLL criterions)', default=false},
-      {arg='dispProgress', type='boolean', help='display a progress bar during training/testing', default=true},
-      {arg='skipUniformTargets', type='boolean', help='skip uniform (flat) targets during training', default=false},
-
-      {arg='save', type='string', help='path to save networks and log training'},
-      {arg='timestamp', type='boolean', help='if true, appends a timestamp to each network saved', default=false}
-   )
-   -- instantiate SGD optimization module
-   self.optimizer = nn.SGDOptimization(self.learningRate, self.weightDecay, self.momentum)
-   -- private params
-   self.errorArray = self.skipUniformTargets
-   self.trainOffset = 0
-   self.testOffset = 0
-end
-
-function StochasticTrainer:log()
-   -- save network
-   local filename = self.save
-   os.execute('mkdir -p ' .. sys.dirname(filename))
-   if self.timestamp then
-      -- use a timestamp to store all networks uniquely
-      filename = filename .. '-' .. os.date("%Y_%m_%d_%X")
-   else
-      -- if no timestamp, just store the previous one
-      if sys.filep(filename) then
-         os.execute('mv ' .. filename .. ' ' .. filename .. '.old')
-      end
-   end
-   print('<trainer> saving network to '..filename)
-   local file = torch.DiskFile(filename,'w')
-   self.module:write(file)
-   file:close()
-end
-
-function StochasticTrainer:train(dataset)
-   self.epoch = self.epoch or 1
-   local currentLearningRate = self.learningRate
-   local module = self.module
-   local criterion = self.criterion
-   self.trainset = dataset
-
-   local shuffledIndices = {}
-   if not self.shuffleIndices then
-      for t = 1,dataset:size() do
-         shuffledIndices[t] = t
-      end
-   else
-      shuffledIndices = lab.randperm(dataset:size())
-   end
-
-   local parameters = nnx.getParameters(module)
-   local gradParameters = nnx.getGradParameters(module)
-
-   while true do
-      print('<trainer> on training set:')
-      print("<trainer> stochastic gradient descent epoch # " .. self.epoch)
-
-      module:zeroGradParameters()
-
-      self.time = sys.clock()
-      self.currentError = 0
-      for t = 1,dataset:size() do
-         -- disp progress
-         if self.dispProgress then
-            xlua.progress(t, dataset:size())
-         end
-
-         -- load new sample
-         local sample = dataset[self.trainOffset + shuffledIndices[t]]
-         local input = sample[1]
-         local target = sample[2]
-         local sample_x = sample.x
-         local sample_y = sample.y
-
-         -- get max of target ?
-         if self.maxTarget then
-            target = torch.Tensor(target:nElement()):copy(target)
-            _,target = lab.max(target)
-            target = target[1]
-         end
-
-         -- is target uniform ?
-         local isUniform = false
-         if self.errorArray and target:min() == target:max() then
-            isUniform = true
-         end
-
-         -- perform SGD step
-         if not (self.skipUniformTargets and isUniform) then
-            -- optional preprocess
-            if self.preprocessor then input = self.preprocessor:forward(input) end
-
-            -- forward through model and criterion 
-            -- (if no criterion, it is assumed to be contained in the model)
-            local modelOut, error
-            if criterion then
-               modelOut = module:forward(input)
-               error = criterion:forward(modelOut, target)
-            else
-               modelOut, error = module:forward(input, target, sample_x, sample_y)
-            end
-
-            -- accumulate error
-            self.currentError = self.currentError + error
-
-            -- reset gradients
-            module:zeroGradParameters()
-
-            -- backward through model
-            -- (if no criterion, it is assumed that derror is internally generated)
-            if criterion then
-               local derror = criterion:backward(module.output, target)
-               module:backward(input, derror)
-            else
-               module:backward(input)
-            end
-
-            -- update parameters in the model
-            self.optimizer:forward(parameters, gradParameters)
-         end
-
-         -- call user hook, if any
-         if self.hookTrainSample then
-            self.hookTrainSample(self, sample)
-         end
-      end
-
-      self.currentError = self.currentError / dataset:size()
-      print("<trainer> current error = " .. self.currentError)
-
-      self.time = sys.clock() - self.time
-      self.time = self.time / dataset:size()
-      print("<trainer> time to learn 1 sample = " .. (self.time*1000) .. 'ms')
-
-      if self.hookTrainEpoch then
-         self.hookTrainEpoch(self)
-      end
-
-      if self.save then self:log() end
-
-      self.epoch = self.epoch + 1
-      currentLearningRate = self.learningRate/(1+self.epoch*self.learningRateDecay)
-      self.optimizer.learningRate = currentLearningRate
-
-      if dataset.infiniteSet then
-         self.trainOffset = self.trainOffset + dataset:size()
-      end
-
-      if self.maxEpoch > 0 and self.epoch > self.maxEpoch then
-         print("<trainer> you have reached the maximum number of epochs")
-         break
-      end
-   end
-end
-
-
-function StochasticTrainer:test(dataset)
-   print('<trainer> on testing Set:')
-
-   local module = self.module
-   local shuffledIndices = {}
-   local criterion = self.criterion
-   self.currentError = 0
-   self.testset = dataset
-
-   local shuffledIndices = {}
-   if not self.shuffleIndices then
-      for t = 1,dataset:size() do
-         shuffledIndices[t] = t
-      end
-   else
-      shuffledIndices = lab.randperm(dataset:size())
-   end
-   
-   self.time = sys.clock()
-   for t = 1,dataset:size() do
-      -- disp progress
-      if self.dispProgress then
-         xlua.progress(t, dataset:size())
-      end
-
-      -- get new sample
-      local sample = dataset[self.testOffset + shuffledIndices[t]]
-      local input = sample[1]
-      local target = sample[2]
-
-      -- max target ?
-      if self.maxTarget then
-         target = torch.Tensor(target:nElement()):copy(target)
-         _,target = lab.max(target)
-         target = target[1]
-      end
-      
-      -- test sample through current model
-      if self.preprocessor then input = self.preprocessor:forward(input) end
-      if criterion then
-         self.currentError = self.currentError + 
-	    criterion:forward(module:forward(input), target)
-      else
-         local _,error = module:forward(input, target)
-         self.currentError = self.currentError + error
-      end
-
-      -- user hook
-      if self.hookTestSample then
-         self.hookTestSample(self, sample)
-      end
-   end
-
-   self.currentError = self.currentError / dataset:size()
-   print("<trainer> test current error = " .. self.currentError)
-
-   self.time = sys.clock() - self.time
-   self.time = self.time / dataset:size()
-   print("<trainer> time to test 1 sample = " .. (self.time*1000) .. 'ms')
-
-   if self.hookTestEpoch then
-      self.hookTestEpoch(self)
-   end
-
-   if dataset.infiniteSet then
-      self.testOffset = self.testOffset + dataset:size()
-   end
-
-   return self.currentError
-end
-
-function StochasticTrainer:write(file)
-   parent.write(self,file)
-   file:writeObject(self.module)
-   file:writeObject(self.criterion)
-end
-
-function StochasticTrainer:read(file)
-   parent.read(self,file)
-   self.module = file:readObject()
-   self.criterion = file:readObject()
-end
diff --git a/init.lua b/init.lua
index 04877d3..20246bc 100644
--- a/init.lua
+++ b/init.lua
@@ -105,7 +105,6 @@ torch.include('nnx', 'LBFGSOptimization.lua')
 -- trainers:
 torch.include('nnx', 'Trainer.lua')
 torch.include('nnx', 'OnlineTrainer.lua')
-torch.include('nnx', 'StochasticTrainer.lua')
 
 -- datasets:
 torch.include('nnx', 'DataSet.lua')
diff --git a/nnx-1.0-1.rockspec b/nnx-1.0-1.rockspec
index 4529d24..cfbc571 100644
--- a/nnx-1.0-1.rockspec
+++ b/nnx-1.0-1.rockspec
@@ -83,7 +83,6 @@ build = {
          install_files(/lua/nnx SpatialCriterion.lua)
          install_files(/lua/nnx Trainer.lua)
          install_files(/lua/nnx OnlineTrainer.lua)
-         install_files(/lua/nnx StochasticTrainer.lua)
          install_files(/lua/nnx DataSet.lua)
          install_files(/lua/nnx DataList.lua)
          install_files(/lua/nnx DataSetLabelMe.lua)
-- 
cgit v1.2.3


From 0b7f3b9cd4e578a96cfc6b50b18bca2f27cc4682 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Mon, 29 Aug 2011 16:32:27 -0400
Subject: By default, DataList now generates vector targets.

The previous default was really stupid.
---
 DataList.lua | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/DataList.lua b/DataList.lua
index 99b117a..9677022 100644
--- a/DataList.lua
+++ b/DataList.lua
@@ -13,6 +13,7 @@ function DataList:__init()
    self.nbClass = 0
    self.ClassName = {}
    self.nbSamples = 0
+   self.spatialTarget = false
 end
 
 function DataList:__tostring__()
@@ -30,8 +31,13 @@ function DataList:__index__(key)
       elmt = ((elmt-1) % classSize) + 1
 
       -- create target vector on the fly
-      self.datasets[class][elmt][2] = torch.Tensor(1,1,self.nbClass):fill(-1)
-      self.datasets[class][elmt][2][1][1][class] = 1
+      if self.spatialTarget then
+         self.datasets[class][elmt][2] = torch.Tensor(self.nbClass,1,1):fill(-1)
+         self.datasets[class][elmt][2][class][1][1] = 1
+      else
+         self.datasets[class][elmt][2] = torch.Tensor(self.nbClass):fill(-1)
+         self.datasets[class][elmt][2][class] = 1
+      end
 
       -- apply hook on sample
       local sample = self.datasets[class][elmt]
-- 
cgit v1.2.3


From 398e21cfaf6a8e4fa47edd9e6de081497e75dda2 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Mon, 29 Aug 2011 16:55:12 -0400
Subject: Added DistNLLCriterion, to support Neg Likelihood for distributions.

ClassNLLCriterion only supports simple distributions, e.g. one-of-N.
DistNLLCriterion supports arbitrary distributions.
---
 DataList.lua         | 13 +++++++++--
 DistNLLCriterion.lua | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 init.lua             |  1 +
 nnx-1.0-1.rockspec   |  1 +
 4 files changed, 74 insertions(+), 2 deletions(-)
 create mode 100644 DistNLLCriterion.lua

diff --git a/DataList.lua b/DataList.lua
index 9677022..4922e8b 100644
--- a/DataList.lua
+++ b/DataList.lua
@@ -13,6 +13,7 @@ function DataList:__init()
    self.nbClass = 0
    self.ClassName = {}
    self.nbSamples = 0
+   self.targetIsProbability = false
    self.spatialTarget = false
 end
 
@@ -32,10 +33,18 @@ function DataList:__index__(key)
 
       -- create target vector on the fly
       if self.spatialTarget then
-         self.datasets[class][elmt][2] = torch.Tensor(self.nbClass,1,1):fill(-1)
+         if self.targetIsProbability then
+            self.datasets[class][elmt][2] = torch.Tensor(self.nbClass,1,1):zero()
+         else
+            self.datasets[class][elmt][2] = torch.Tensor(self.nbClass,1,1):fill(-1)
+         end
          self.datasets[class][elmt][2][class][1][1] = 1
       else
-         self.datasets[class][elmt][2] = torch.Tensor(self.nbClass):fill(-1)
+         if self.targetIsProbability then
+            self.datasets[class][elmt][2] = torch.Tensor(self.nbClass):zero()
+         else
+            self.datasets[class][elmt][2] = torch.Tensor(self.nbClass):fill(-1)
+         end
          self.datasets[class][elmt][2][class] = 1
       end
 
diff --git a/DistNLLCriterion.lua b/DistNLLCriterion.lua
new file mode 100644
index 0000000..c0b69e3
--- /dev/null
+++ b/DistNLLCriterion.lua
@@ -0,0 +1,61 @@
+local DistNLLCriterion, parent = torch.class('nn.DistNLLCriterion', 'nn.Criterion')
+
+function DistNLLCriterion:__init()
+   parent.__init(self)
+   -- user options
+   self.inputIsProbability = false
+   self.inputIsLogProbability = false
+   self.targetIsProbability = false
+   -- internal
+   self.targetSoftMax = nn.SoftMax()
+   self.inputLogSoftMax = nn.LogSoftMax()
+   self.gradLogInput = torch.Tensor()
+end
+
+function DistNLLCriterion:normalize(input, target)
+   -- normalize target
+   if not self.targetIsProbability then
+      self.probTarget = self.targetSoftMax:forward(target)
+   else
+      self.probTarget = target
+   end
+
+   -- normalize input
+   if not self.inputIsLogProbability and not self.inputIsProbability then
+      self.logProbInput = self.inputLogSoftMax:forward(input)
+   elseif not self.inputIsLogProbability then
+      print('TODO: implement nn.Log()')
+   else
+      self.logProbInput = input
+   end
+end
+
+function DistNLLCriterion:denormalize(input)
+   -- denormalize gradients
+   if not self.inputIsLogProbability and not self.inputIsProbability then
+      self.gradInput = self.inputLogSoftMax:backward(input, self.gradLogInput)
+   elseif not self.inputIsLogProbability then
+      print('TODO: implement nn.Log()')
+   else
+      self.gradInput = self.gradLogInput
+   end
+end
+
+function DistNLLCriterion:forward(input, target)
+   self:normalize(input, target)
+   self.output = 0
+   for i = 1,input:size(1) do
+      self.output = self.output - self.logProbInput[i] * self.probTarget[i]
+   end
+   return self.output
+end
+
+function DistNLLCriterion:backward(input, target)
+   self:normalize(input, target)
+   self.gradLogInput:resizeAs(input)
+   for i = 1,input:size(1) do
+      self.gradLogInput[i] = -self.probTarget[i]
+   end
+   self:denormalize(input)
+   return self.gradInput
+end
diff --git a/init.lua b/init.lua
index 20246bc..ea44de0 100644
--- a/init.lua
+++ b/init.lua
@@ -93,6 +93,7 @@ torch.include('nnx', 'SpatialColorTransform.lua')
 -- criterions:
 torch.include('nnx', 'SuperCriterion.lua')
 torch.include('nnx', 'SparseCriterion.lua')
+torch.include('nnx', 'DistNLLCriterion.lua')
 torch.include('nnx', 'SpatialMSECriterion.lua')
 torch.include('nnx', 'SpatialClassNLLCriterion.lua')
 torch.include('nnx', 'SpatialSparseCriterion.lua')
diff --git a/nnx-1.0-1.rockspec b/nnx-1.0-1.rockspec
index cfbc571..3af08d0 100644
--- a/nnx-1.0-1.rockspec
+++ b/nnx-1.0-1.rockspec
@@ -62,6 +62,7 @@ build = {
          install_files(/lua/nnx init.lua)
          install_files(/lua/nnx Abs.lua)
          install_files(/lua/nnx ConfusionMatrix.lua)
+         install_files(/lua/nnx DistNLLCriterion.lua)
          install_files(/lua/nnx Logger.lua)
          install_files(/lua/nnx Probe.lua)
          install_files(/lua/nnx HardShrink.lua)
-- 
cgit v1.2.3


From 7d1c3269145eed293018727f5cf806adcddeef33 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Mon, 29 Aug 2011 17:00:38 -0400
Subject: Added missing read/write functions for new NLL Loss.

---
 DistNLLCriterion.lua | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/DistNLLCriterion.lua b/DistNLLCriterion.lua
index c0b69e3..fedda1b 100644
--- a/DistNLLCriterion.lua
+++ b/DistNLLCriterion.lua
@@ -59,3 +59,23 @@ function DistNLLCriterion:backward(input, target)
    self:denormalize(input)
    return self.gradInput
 end
+
+function DistNLLCriterion:write(file)
+   parent.write(self, file)
+   file:writeBool(self.inputIsProbability)
+   file:writeBool(self.inputIsLogProbability)
+   file:writeBool(self.targetIsProbability)
+   file:writeObject(self.targetSoftMax)
+   file:writeObject(self.inputLogSoftMax)
+   file:writeObject(self.gradLogInput)
+end
+
+function DistNLLCriterion:read(file)
+   parent.read(self, file)
+   self.inputIsProbability = file:readBool()
+   self.inputIsLogProbability = file:readBool()
+   self.targetIsProbability = file:readBool()
+   self.targetSoftMax = file:readObject()
+   self.inputLogSoftMax = file:readObject()
+   self.gradLogInput = file:readObject()
+end
-- 
cgit v1.2.3


From d6eb74d4d6e2ab0c8a62a7ad74160b525f63100e Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Mon, 29 Aug 2011 20:37:29 -0400
Subject: Forcing GC in mapreduce BFGS.

---
 LBFGSOptimization.lua | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index 73405ed..f47ccbd 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -160,6 +160,8 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
               gradParametersPartial[t] = parallel.children[t]:receive()
               outputsPartial[t] = parallel.children[t]:receive()
            end
+           -- force cleanup
+           collectgarbage()
         end
 
    -- (1b) the reduce part of the evaluation: accumulate all
@@ -265,6 +267,9 @@ function LBFGS:setup_mapreduce ()
                -- now send back gradParameters + partial output
                parallel.parent:send(gradParameters)
                parallel.parent:send(f_x)
+
+               -- force cleanup
+               collectgarbage()
             end
          end
    ]]
-- 
cgit v1.2.3


From c94034a869d4306a3d58de8b7e0578415543316e Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Mon, 29 Aug 2011 21:02:46 -0400
Subject: Added proper learningRate decay in SGD optimizer.

---
 SGDOptimization.lua | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/SGDOptimization.lua b/SGDOptimization.lua
index 7bb8b2c..88d63d0 100644
--- a/SGDOptimization.lua
+++ b/SGDOptimization.lua
@@ -7,11 +7,13 @@ function SGD:__init(...)
       {arg='module', type='nn.Module', help='a module to train', req=true},
       {arg='criterion', type='nn.Criterion', help='a criterion to estimate the error', req=true},
       {arg='learningRate', type='number', help='learning rate (W = W - rate*dE/dW)', default=1e-2},
+      {arg='learningRateDecay', type='number', help='learning rate decay (lr_t = lr_0 / (1 + samplesSeen*lrDecay))', default=0},
       {arg='weightDecay', type='number', help='amount of weight decay (W = W - decay*W)', default=0},
       {arg='momentum', type='number', help='amount of momentum on weights (dE/W = dE/dW*(1-momentum) + prev(dE/dW)*momentum)', default=0}
    )
    self.parameters = nnx.flattenParameters(nnx.getParameters(self.module))
    self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
+   self.samplesSeen = 0
 end
 
 function SGD:forward(inputs, targets, options)
@@ -45,6 +47,9 @@ function SGD:forward(inputs, targets, options)
       end
    end
 
+   -- update counter of samples seen
+   self.samplesSeen = self.samplesSeen + #inputs
+
    -- renorm f
    self.output = self.output / #inputs
    
@@ -68,7 +73,8 @@ function SGD:forward(inputs, targets, options)
    end
 
    -- update parameters
-   self.parameters:add(-self.learningRate, self.currentGradParameters)
+   local learningRate = self.learningRate / (1 + self.samplesSeen*self.learningRateDecay)
+   self.parameters:add(-learningRate, self.currentGradParameters)
 
    -- return current output
    return self.output
-- 
cgit v1.2.3


From 470b00f1bbcf4b8d4ef671b703b8e6fa3209344a Mon Sep 17 00:00:00 2001
From: Marc Scoffier <mps309@cassio.cs.nyu.edu>
Date: Tue, 30 Aug 2011 16:05:05 -0400
Subject: fixed bug when using shuffled indices

---
 OnlineTrainer.lua | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/OnlineTrainer.lua b/OnlineTrainer.lua
index db662e5..f213924 100644
--- a/OnlineTrainer.lua
+++ b/OnlineTrainer.lua
@@ -89,7 +89,7 @@ function OnlineTrainer:train(dataset)
          local targets = {}
          for i = t,math.min(t+self.batchSize-1,dataset:size()) do
             -- load new sample
-            local sample = dataset[self.trainOffset + shuffledIndices[i]]
+            local sample = dataset[shuffledIndices[self.trainOffset + i]]
             local input = sample[1]
             local target = sample[2]
 
@@ -166,7 +166,7 @@ function OnlineTrainer:test(dataset)
       end
 
       -- get new sample
-      local sample = dataset[self.testOffset + shuffledIndices[t]]
+      local sample = dataset[shuffledIndices[self.testOffset + t]]
       local input = sample[1]
       local target = sample[2]
       
-- 
cgit v1.2.3


From dab3bb7517155399bc6f9e377e9fc15c16063aa1 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Tue, 30 Aug 2011 16:42:48 -0400
Subject: fixed wrong PATH in cmake

---
 FindTorch.cmake | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/FindTorch.cmake b/FindTorch.cmake
index 8ada8cc..6658d42 100644
--- a/FindTorch.cmake
+++ b/FindTorch.cmake
@@ -13,9 +13,9 @@ if (TORCH_EXECUTABLE)
   get_filename_component (TORCH_BIN_DIR ${TORCH_EXECUTABLE} PATH)
 endif (TORCH_EXECUTABLE)
 
-find_library (TORCH_TH TH ${TORCH_BIN_DIR}/../lib)
-find_library (TORCH_luaT luaT ${TORCH_BIN_DIR}/../lib)
-find_library (TORCH_lua lua ${TORCH_BIN_DIR}/../lib)
+find_library (TORCH_TH TH ${TORCH_BIN_DIR}/../lib NO_DEFAULT_PATH)
+find_library (TORCH_luaT luaT ${TORCH_BIN_DIR}/../lib NO_DEFAULT_PATH)
+find_library (TORCH_lua lua ${TORCH_BIN_DIR}/../lib NO_DEFAULT_PATH)
 
 set (TORCH_LIBRARIES ${TORCH_TH} ${TORCH_luaT} ${TORCH_lua})
 
-- 
cgit v1.2.3


From 987894db868ed9b5ad0cd746a8c3569985acd71d Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Tue, 30 Aug 2011 19:16:32 -0400
Subject: God rid of shuffle flags in Trainers.

---
 OnlineTrainer.lua | 34 ++--------------------------------
 Trainer.lua       |  7 -------
 2 files changed, 2 insertions(+), 39 deletions(-)

diff --git a/OnlineTrainer.lua b/OnlineTrainer.lua
index db662e5..dc6e860 100644
--- a/OnlineTrainer.lua
+++ b/OnlineTrainer.lua
@@ -33,9 +33,6 @@ function OnlineTrainer:__init(...)
       {arg='timestamp', type='boolean', 
        help='if true, appends a timestamp to each network saved', default=false}
    )
-   -- private params
-   self.trainOffset = 0
-   self.testOffset = 0
 end
 
 function OnlineTrainer:log()
@@ -63,15 +60,6 @@ function OnlineTrainer:train(dataset)
    local criterion = self.criterion
    self.trainset = dataset
 
-   local shuffledIndices = {}
-   if not self.shuffleIndices then
-      for t = 1,dataset:size() do
-         shuffledIndices[t] = t
-      end
-   else
-      shuffledIndices = lab.randperm(dataset:size())
-   end
-
    while true do
       print('<trainer> on training set:')
       print("<trainer> online epoch # " .. self.epoch .. ' [batchSize = ' .. self.batchSize .. ']')
@@ -89,7 +77,7 @@ function OnlineTrainer:train(dataset)
          local targets = {}
          for i = t,math.min(t+self.batchSize-1,dataset:size()) do
             -- load new sample
-            local sample = dataset[self.trainOffset + shuffledIndices[i]]
+            local sample = dataset[i]
             local input = sample[1]
             local target = sample[2]
 
@@ -128,10 +116,6 @@ function OnlineTrainer:train(dataset)
 
       self.epoch = self.epoch + 1
 
-      if dataset.infiniteSet then
-         self.trainOffset = self.trainOffset + dataset:size()
-      end
-
       if self.maxEpoch > 0 and self.epoch > self.maxEpoch then
          print("<trainer> you have reached the maximum number of epochs")
          break
@@ -144,20 +128,10 @@ function OnlineTrainer:test(dataset)
    print('<trainer> on testing Set:')
 
    local module = self.module
-   local shuffledIndices = {}
    local criterion = self.criterion
    self.currentError = 0
    self.testset = dataset
 
-   local shuffledIndices = {}
-   if not self.shuffleIndices then
-      for t = 1,dataset:size() do
-         shuffledIndices[t] = t
-      end
-   else
-      shuffledIndices = lab.randperm(dataset:size())
-   end
-   
    self.time = sys.clock()
    for t = 1,dataset:size() do
       -- disp progress
@@ -166,7 +140,7 @@ function OnlineTrainer:test(dataset)
       end
 
       -- get new sample
-      local sample = dataset[self.testOffset + shuffledIndices[t]]
+      local sample = dataset[t]
       local input = sample[1]
       local target = sample[2]
       
@@ -197,10 +171,6 @@ function OnlineTrainer:test(dataset)
       self.hookTestEpoch(self)
    end
 
-   if dataset.infiniteSet then
-      self.testOffset = self.testOffset + dataset:size()
-   end
-
    return self.currentError
 end
 
diff --git a/Trainer.lua b/Trainer.lua
index 3388ef7..b7da770 100644
--- a/Trainer.lua
+++ b/Trainer.lua
@@ -4,7 +4,6 @@ function Trainer:__init()
    self.learningRate = 0.01
    self.learningRateDecay = 0
    self.maxIteration = 25
-   self.shuffleIndices = true
 end
 
 function Trainer:train(dataset)
@@ -14,14 +13,12 @@ function Trainer:write(file)
    file:writeDouble(self.learningRate)
    file:writeDouble(self.learningRateDecay)
    file:writeInt(self.maxIteration)
-   file:writeBool(self.shuffleIndices)
 end
 
 function Trainer:read(file)
    self.learningRate = file:readDouble()
    self.learningRateDecay = file:readDouble()
    self.maxIteration = file:readInt()
-   self.shuffleIndices = file:readBool()
 end
 
 function Trainer:share(mlp, ...)
@@ -30,10 +27,6 @@ function Trainer:share(mlp, ...)
    end
 end
 
-function Trainer:setShuffle(bool)
-      self.shuffleIndices = bool
-end
-
 function Trainer:clone(...)
    local f = torch.MemoryFile("rw"):binary()
    f:writeObject(self)
-- 
cgit v1.2.3


From e2ed5a2b94d52e7ae048e502d768512272a07a36 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Tue, 30 Aug 2011 21:40:50 -0400
Subject: Got rid of useless/redundant copies in DataSet.

---
 DataSetLabelMe.lua | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/DataSetLabelMe.lua b/DataSetLabelMe.lua
index decec18..5666c96 100644
--- a/DataSetLabelMe.lua
+++ b/DataSetLabelMe.lua
@@ -259,15 +259,9 @@ end
 function DataSetLabelMe:loadSample(index)
    if self.preloadedDone then
       if index ~= self.currentIndex then
-         -- clean up
-         self.currentSample = nil
-         self.currentMask = nil
-         collectgarbage()
          -- load new sample
-         self.currentSample = torch.Tensor(self.preloaded.samples[index]:size())
-         self.currentSample:copy(self.preloaded.samples[index]):mul(1/255)
-         self.currentMask = torch.Tensor(self.preloaded.masks[index]:size())
-         self.currentMask:copy(self.preloaded.masks[index])
+         self.currentSample = self.preloaded.samples[index]
+         self.currentMask = self.preloaded.masks[index]
          -- remember index
          self.currentIndex = index
       end
@@ -364,7 +358,7 @@ function DataSetLabelMe:preload(saveFile)
       xlua.progress(i,self.nbRawSamples)
       -- load samples, and store them in raw byte tensors (min memory footprint)
       self:loadSample(i)
-      local rawTensor = torch.Tensor(self.currentSample:size()):copy(self.currentSample:mul(255))
+      local rawTensor = torch.Tensor(self.currentSample:size()):copy(self.currentSample)
       local rawMask = torch.Tensor(self.currentMask:size()):copy(self.currentMask)
       -- insert them in our list
       table.insert(self.preloaded.samples, rawTensor)
-- 
cgit v1.2.3


From 9d7cdf26db61f8537555fc5045732b5e5102e9ac Mon Sep 17 00:00:00 2001
From: Marco Scoffier <github@metm.org>
Date: Tue, 30 Aug 2011 22:40:09 -0400
Subject: updates for useable Batch modules

---
 BatchOptimization.lua |  1 +
 BatchTrainer.lua      | 67 ++++++++++++++++++++++++++++-----------------------
 2 files changed, 38 insertions(+), 30 deletions(-)

diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index 93cbd2a..0951ae4 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -28,6 +28,7 @@ function Batch:__init(...)
    if self.parallelize > 1 then
       self:setup_mapreduce()
    end
+   batch = {}
 end
 
 function Batch:forward(inputs, targets, options)
diff --git a/BatchTrainer.lua b/BatchTrainer.lua
index b774a9b..ab5ba92 100644
--- a/BatchTrainer.lua
+++ b/BatchTrainer.lua
@@ -8,22 +8,17 @@ local BatchTrainer, parent = torch.class('nn.BatchTrainer', 'nn.OnlineTrainer')
 -- closure with the current batch as input.
 
 function BatchTrainer:__init(...)
-   -- parent.__init(self)
+   local args = {...}
+   parent.__init(self, args)
    -- unpack args
-   xlua.unpack_class(self, {...},
+   xlua.unpack_class(
+      self, args,
       'BatchTrainer', 
-
-      'A general-purpose online trainer class.\n'
-         .. 'Provides 4 user hooks to perform extra work after each sample, or each epoch:\n'
-         .. '> trainer = nn.BatchTrainer(...) \n'
-         .. '> trainer.hookTrainSample = function(trainer, sample) ... end \n'
-         .. '> trainer.hookTrainEpoch = function(trainer) ... end \n'
-         .. '> trainer.hookTestSample = function(trainer, sample) ... end \n'
-         .. '> trainer.hookTestEpoch = function(trainer) ... end \n'
-         .. '> ',
-
-      {arg='trainset', type='nn.DataList',
-       help='dataset to split into batches for closures',req=true},
+      'A modified version of the general-purpose online trainer class.\n'
+	 .. ' which only preps the input batch and calls optimizer to\n'
+	 .. ' create a closure\n',
+      {arg='trainset', type='nn.DataList', 
+       help='dataset from which to draw batches', req=true},
       {arg='module', type='nn.Module', help='a module to train', req=true},
       {arg='criterion', type='nn.Criterion', 
        help='a criterion to estimate the error'},
@@ -42,27 +37,37 @@ function BatchTrainer:__init(...)
       {arg='timestamp', type='boolean', 
        help='if true, appends a timestamp to each network saved', default=false}
    )
-   -- private params
-   self.trainOffset = -self.batchSize
-   self.testOffset = 0
-
-   -- counters
    self.epoch = 1
-   self.batch = 0
+   self.batch = nil
+   self.trainOffset = nil
 end
 
 -- update the counters
 function BatchTrainer:next()
-   self.batch = self.batch + 1
-   self.trainOffset = self.trainOffset + self.batchSize
-   if self.trainOffset > self.trainset:size()-1 then
-      self.trainOffset = 1
-      self.epoch = self.epoch + 1
+   if not self.batch then
       self.batch = 1
+   else 
+      self.batch = self.batch + 1
+   end
+   if not self.trainOffset then
+      self.trainOffset = 1
+   else
+      self.trainOffset = self.trainOffset + self.batchSize
+      if self.trainOffset > self.trainset:size() then
+	 self.trainOffset = 1
+	 self.epoch = self.epoch + 1
+	 self.batch = 1
+	 if self.hookTrainEpoch then
+	    self.hookTrainEpoch(self)
+	 end
+
+	 if self.save then self:log() end
+
+      end
    end
    -- disp progress
    if self.dispProgress then
-      xlua.progress(self.trainOffset, trainset:size())
+      xlua.progress(self.trainOffset, self.trainset:size())
    end
 
 end
@@ -71,8 +76,8 @@ end
 -- make more sense to call it next_batch() here as the training is
 -- done outside of this code.
 
-function BatchTrainer:next_batch()
-   self.next()
+function BatchTrainer:nextBatch()
+   self:next()
    local module = self.module
    local criterion = self.criterion
    local t = self.trainOffset
@@ -80,7 +85,9 @@ function BatchTrainer:next_batch()
    local bs = self.batchSize
    
    print('<trainer> on training set:')
-   print("<trainer> online epoch # " .. self.epoch .. ' batch # '..self.batch.. '[batchSize = ' .. self.batchSize .. ']')
+   print("<trainer> online epoch # " .. self.epoch 
+	 .. ' batch # '..self.batch
+	 .. ' [batchSize = ' .. self.batchSize .. ']')
 
    -- create mini batch
    self.inputs = self.inputs or {}
@@ -96,7 +103,7 @@ function BatchTrainer:next_batch()
 
       for i = t,math.min(t+bs-1,ds) do
 	 -- load new sample
-	 local sample = trainset[t + i]
+	 local sample = self.trainset[t]
 	 local input = sample[1]
 	 local target = sample[2]
       
-- 
cgit v1.2.3


From f97f610a34d2a323b56976186aeacf1a6c7bb6cb Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Wed, 31 Aug 2011 01:54:42 -0400
Subject: Using new optimized send/receive functions (mapreduce).

---
 LBFGSOptimization.lua | 22 ++++++++++++++--------
 lbfgs.c               | 12 +++++-------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index f47ccbd..976226d 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -43,6 +43,9 @@ function LBFGS:forward_sequential(inputs, targets, options)
    --       + self.output contains the estimated (average) F(X)
    lbfgs.evaluate
       = function()
+           -- verbose
+           if self.verbose >= 2 then print('<LBFGSOptimization> evaluating f(X) + df/dX') end
+           local _t_ = sys.clock()
            -- reset gradients
            self.gradParameters:zero()
            -- f is the average of all criterions
@@ -67,6 +70,8 @@ function LBFGS:forward_sequential(inputs, targets, options)
            end
            -- normalize gradients
            self.gradParameters:div(#inputs)
+           -- verbose
+           if self.verbose >= 2 then print('<LBFGSOptimization> f(X) + df/dX computed in ' .. (sys.clock() - _t_) .. ' sec') end
            -- return average f(X)
            return self.output/#inputs
         end
@@ -91,8 +96,7 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
          if type(self.prehook) == 'string' then
             parallel.children:send(self.prehook)
          else
-            print('\r<LBFGSOptimization> WARNING: when using para||el mode, hooks should be')
-            print('\r<LBFGSOptimization> WARNING: defined as strings. User prehook ignored.')
+            print('\r<LBFGSOptimization> WARNING: when using para||el mode, hooks should be defined as strings. User prehook ignored.')
             parallel.children:send('')
          end
       else
@@ -102,8 +106,7 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
          if type(self.posthook) == 'string' then
             parallel.children:send(self.posthook)
          else
-            print('\r<LBFGSOptimization> WARNING: when using para||el mode, hooks should be')
-            print('<\rLBFGSOptimization> WARNING: defined as strings. User posthook ignored.')
+            print('\r<LBFGSOptimization> WARNING: when using para||el mode, hooks should be defined as strings. User posthook ignored.')
             parallel.children:send('')
          end
       else
@@ -153,15 +156,18 @@ function LBFGS:forward_mapreduce(inputs, targets, options)
    --      in separate threads
    lbfgs.evaluate_map
       = function()
+           -- verbose
+           if self.verbose >= 2 then print('<LBFGSOptimization> evaluating f(X) + df/dX') end
+           local _t_ = sys.clock()
            -- transmit new parameters to all workers
            parallel.children:send(self.parameters)
            -- then wait for all workers to return their partial gradParameters + outputs
-           for t = 1,P do
-              gradParametersPartial[t] = parallel.children[t]:receive()
-              outputsPartial[t] = parallel.children[t]:receive()
-           end
+           gradParametersPartial = parallel.children:receive()
+           outputsPartial = parallel.children:receive()
            -- force cleanup
            collectgarbage()
+           -- verbose
+           if self.verbose >= 2 then print('<LBFGSOptimization> f(X) + df/dX computed in ' .. (sys.clock() - _t_) .. ' sec') end
         end
 
    -- (1b) the reduce part of the evaluation: accumulate all
diff --git a/lbfgs.c b/lbfgs.c
index f84623a..7a660b1 100644
--- a/lbfgs.c
+++ b/lbfgs.c
@@ -1440,11 +1440,10 @@ static int progress(void *instance,
 {
   nIteration = k;
   if (verbose > 1) {
-    printf("\n<LBFGSOptimization> iteration %d:\n", nIteration);
-    printf("  + fx = %f\n", fx);
+    printf("<LBFGSOptimization> iteration %d:\n", nIteration);
+    printf("  + f(X) = %f\n", fx);
     printf("  + xnorm = %f, gnorm = %f, step = %f\n", xnorm, gnorm, step);
     printf("  + nb evaluations = %d\n", nEvaluation);
-    printf("\n");
   }
   return 0;
 }
@@ -1477,11 +1476,10 @@ int lbfgs_run(lua_State *L) {
 
   // verbose
   if (verbose) {
-    printf("\n<LBFGSOptimization> batch optimized after %d iterations\n", nIteration);
-    printf("  + fx = %f\n", fx);
-    printf("  +  x = [ %f , ... %f]\n",x[0],x[nParameter-1]);
+    printf("<LBFGSOptimization> batch optimized after %d iterations\n", nIteration);
+    printf("  + f(X) = %f\n", fx);
+    printf("  + X = [%f , ... %f]\n",x[0],x[nParameter-1]);
     printf("  + nb evaluations = %d\n", nEvaluation);
-    printf("\n");
   }
 
   // cleanup
-- 
cgit v1.2.3


From 24219cba24be5a75d6666d1ca2a9a6ea0bde252a Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Wed, 31 Aug 2011 18:05:59 -0400
Subject: Re-organized BatchOptimization modules.

---
 BatchOptimization.lua |  87 +++++++++++-----
 LBFGSOptimization.lua | 277 ++------------------------------------------------
 SGDOptimization.lua   |  99 ++++++------------
 init.lua              |   2 +-
 4 files changed, 101 insertions(+), 364 deletions(-)

diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index 0951ae4..4caa064 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -12,12 +12,6 @@ function Batch:__init(...)
       {arg='module', type='nn.Module', help='a module to train', req=true},
       {arg='criterion', type='nn.Criterion', 
        help='a criterion to estimate the error', req=true},
-      {arg='maxIterations', type='number', 
-       help='maximum nb of iterations per pass (0 = no max)', default=0},
-      {arg='maxLineSearch', type='number', 
-       help='maximum nb of steps in line search', default=20},
-      {arg='sparsity', type='number', 
-       help='sparsity coef (Orthantwise C)', default=0},
       {arg='parallelize', type='number', 
        help='parallelize onto N cores (experimental!)', default=1},
       {arg='verbose', type='number', 
@@ -25,10 +19,11 @@ function Batch:__init(...)
    )
    self.parameters = nnx.flattenParameters(nnx.getParameters(self.module))
    self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
+   self.evalCounter = 0
+   self.sampleCounter = 0
    if self.parallelize > 1 then
       self:setup_mapreduce()
    end
-   batch = {}
 end
 
 function Batch:forward(inputs, targets, options)
@@ -46,8 +41,13 @@ function Batch:forward_sequential(inputs, targets, options)
    --       + self.parameters contains the current X vector
    --       + self.gradParameters contains the estimated dF/dX vector
    --       + self.output contains the estimated (average) F(X)
-   batch.evaluate
+   self.evaluate
       = function()
+           -- verbose
+           if self.verbose >= 2 then
+              print('<BatchOptimization> evaluating f(X) + df/dX') 
+           end
+           local _t_ = sys.clock()
            -- reset gradients
            self.gradParameters:zero()
            -- f is the average of all criterions
@@ -70,13 +70,28 @@ function Batch:forward_sequential(inputs, targets, options)
                  self.posthook(self, {inputs[i], targets[i], options[i]})
               end
            end
+           -- update evaluation counter
+           self.evalCounter = self.evalCounter + 1
            -- normalize gradients
            self.gradParameters:div(#inputs)
+           -- verbose
+           if self.verbose >= 2 then 
+              print('<BatchOptimization> ' .. self.evalCounter .. 'th evaluation took ' .. (sys.clock() - _t_) .. ' sec')
+           end
            -- return average f(X)
-           return self.output/#inputs
+           self.output = self.output/#inputs
+           return self.output
         end
 
-   -- (3) return current output after optimization
+   -- (2) optimization callback
+   if self.optimize then
+      self:optimize()
+   end
+
+   -- (3) update sample counter
+   self.sampleCounter = self.sampleCounter + #inputs
+
+   -- (4) return current output after optimization
    return self.output
 end
 
@@ -90,8 +105,8 @@ function Batch:forward_mapreduce(inputs, targets, options)
          if type(self.prehook) == 'string' then
             parallel.children:send(self.prehook)
          else
-            print('\r<BatchOptimization> WARNING: when using para||el mode, hooks should be')
-            print('\r<BatchOptimization> WARNING: defined as strings. User prehook ignored.')
+            print('\r<BatchOptimization> WARNING: when using para||el mode,'..
+                  ' hooks should be defined as strings. User prehook ignored.')
             parallel.children:send('')
          end
       else
@@ -101,8 +116,8 @@ function Batch:forward_mapreduce(inputs, targets, options)
          if type(self.posthook) == 'string' then
             parallel.children:send(self.posthook)
          else
-            print('\r<BatchOptimization> WARNING: when using para||el mode, hooks should be')
-            print('<\rBatchOptimization> WARNING: defined as strings. User posthook ignored.')
+            print('\r<BatchOptimization> WARNING: when using para||el mode,'..
+                  ' hooks should be defined as strings. User posthook ignored.')
             parallel.children:send('')
          end
       else
@@ -142,30 +157,41 @@ function Batch:forward_mapreduce(inputs, targets, options)
    --       + self.parameters contains the current X vector
    --       + self.gradParameters contains the estimated dF/dX vector
    --       + self.output contains the estimated (average) F(X)
-   batch.evaluate
+   self.evaluate
       = function()
-           batch.evaluate_map()
-           return batch.evaluate_reduce()
+           -- verbose
+           if self.verbose >= 2 then 
+              print('<BatchOptimization> evaluating f(X) + df/dX') 
+           end
+           local _t_ = sys.clock()
+           -- do map/reduce
+           self.evaluate_map()
+           self.evaluate_reduce()
+           -- update evaluation counter
+           self.evalCounter = self.evalCounter + 1
+           -- verbose
+           if self.verbose >= 2 then 
+              print('<BatchOptimization> ' .. self.evalCounter .. 'th evaluation took ' .. (sys.clock() - _t_) .. ' sec')
+           end
+           return self.output
         end
 
    -- (1a) the map part of the evaluation: compute partial gradients
    --      in separate threads
-   batch.evaluate_map
+   self.evaluate_map
       = function()
            -- transmit new parameters to all workers
            parallel.children:send(self.parameters)
            -- then wait for all workers to return their partial gradParameters + outputs
-           for t = 1,P do
-              gradParametersPartial[t] = parallel.children[t]:receive()
-              outputsPartial[t] = parallel.children[t]:receive()
-           end
+           gradParametersPartial = parallel.children:receive()
+           outputsPartial = parallel.children:receive()
            -- force cleanup
            collectgarbage()
         end
 
    -- (1b) the reduce part of the evaluation: accumulate all
    --      partial estimates of the gradients
-   batch.evaluate_reduce
+   self.evaluate_reduce
       = function()
            -- accumulate partial gradients, and average
            self.gradParameters:zero()
@@ -178,14 +204,22 @@ function Batch:forward_mapreduce(inputs, targets, options)
            for t = 1,P do
               self.output = self.output + outputsPartial[t]
            end
-           return self.output/#inputs
+           self.output = self.output/#inputs
         end
 
+   -- (2) optimization callback
+   if self.optimize then
+      self:optimize()
+   end
+
    -- (3) reset workers so they're ready for next mini-batch
    parallel.children:send('break')
 
-   -- (4) return current output after optimization
-   return self.output/#inputs
+   -- (4) update sample counter
+   self.sampleCounter = self.sampleCounter + #inputs
+
+   -- (5) return current output after optimization
+   return self.output
 end
 
 function Batch:setup_mapreduce ()
@@ -194,7 +228,6 @@ function Batch:setup_mapreduce ()
       xerror('install parallel for Lua to enable parallel computing (luarocks install parallel)',
              'nn.BatchOptimization')
    end
-   parallel.setSharedSize(4*1024*1024)
    local P = self.parallelize
 
    -- (1) define code for workers
diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index 976226d..720b7ba 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -1,13 +1,10 @@
-local LBFGS,parent = torch.class('nn.LBFGSOptimization', 'nn.Optimization')
+local LBFGS,parent = torch.class('nn.LBFGSOptimization', 'nn.BatchOptimization')
 
 function LBFGS:__init(...)
    require 'liblbfgs'
-   parent.__init(self)
+   parent.__init(self, ...)
    xlua.unpack_class(self, {...},
       'LBFGSOptimization', nil,
-      {arg='module', type='nn.Module', help='a module to train', req=true},
-      {arg='criterion', type='nn.Criterion', 
-       help='a criterion to estimate the error', req=true},
       {arg='maxIterations', type='number', 
        help='maximum nb of iterations per pass (0 = no max)', default=0},
       {arg='maxLineSearch', type='number', 
@@ -15,277 +12,17 @@ function LBFGS:__init(...)
       {arg='sparsity', type='number', 
        help='sparsity coef (Orthantwise C)', default=0},
       {arg='parallelize', type='number', 
-       help='parallelize onto N cores (experimental!)', default=1},
-      {arg='verbose', type='number', 
-       help='verbose level during training [0-2]', default=0}
+       help='parallelize onto N cores (experimental!)', default=1}
    )
    self.parameters = nnx.flattenParameters(nnx.getParameters(self.module))
    self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
-   if self.parallelize > 1 then
-      self:setup_mapreduce()
-   end
 end
 
-function LBFGS:forward(inputs, targets, options)
-   options = options or {}
-   if self.parallelize > 1 then
-      return self:forward_mapreduce(inputs, targets, options)
-   else
-      return self:forward_sequential(inputs, targets, options)
-   end
-end
-
-function LBFGS:forward_sequential(inputs, targets, options)
-   -- (1) construct a closure that compute f(inputs) + df/dW
-   --     after each call to that function:
-   --       + self.parameters contains the current X vector
-   --       + self.gradParameters contains the estimated dF/dX vector
-   --       + self.output contains the estimated (average) F(X)
-   lbfgs.evaluate
-      = function()
-           -- verbose
-           if self.verbose >= 2 then print('<LBFGSOptimization> evaluating f(X) + df/dX') end
-           local _t_ = sys.clock()
-           -- reset gradients
-           self.gradParameters:zero()
-           -- f is the average of all criterions
-           self.output = 0
-           -- given all inputs, evaluate gradients
-           for i = 1,#inputs do
-              -- user hook
-              if self.prehook then
-                 self.prehook(self, {inputs[i], targets[i], options[i]})
-              end
-              -- estimate f
-              local output = self.module:forward(inputs[i])
-              local err = self.criterion:forward(output, targets[i])
-              self.output = self.output + err
-              -- estimate df/dW
-              local df_do = self.criterion:backward(output, targets[i])
-              self.module:backward(inputs[i], df_do)
-              -- user hook
-              if self.posthook then
-                 self.posthook(self, {inputs[i], targets[i], options[i]})
-              end
-           end
-           -- normalize gradients
-           self.gradParameters:div(#inputs)
-           -- verbose
-           if self.verbose >= 2 then print('<LBFGSOptimization> f(X) + df/dX computed in ' .. (sys.clock() - _t_) .. ' sec') end
-           -- return average f(X)
-           return self.output/#inputs
-        end
-
-   -- (2) the magic function: will update the parameter vector
-   --     according to the l-BFGS method
-   self.output = lbfgs.run(self.parameters, self.gradParameters,
-                           self.maxIterations, self.maxLineSearch,
-                           self.sparsity, self.verbose)
-
-   -- (3) return current output after optimization
-   return self.output
-end
-
-function LBFGS:forward_mapreduce(inputs, targets, options)
-   -- parameters
-   local P = self.parallelize
-
-   -- transmit user hooks, if defined
-   if not self.hooksets then
-      if self.prehook then
-         if type(self.prehook) == 'string' then
-            parallel.children:send(self.prehook)
-         else
-            print('\r<LBFGSOptimization> WARNING: when using para||el mode, hooks should be defined as strings. User prehook ignored.')
-            parallel.children:send('')
-         end
-      else
-         parallel.children:send('')
-      end
-      if self.posthook then
-         if type(self.posthook) == 'string' then
-            parallel.children:send(self.posthook)
-         else
-            print('\r<LBFGSOptimization> WARNING: when using para||el mode, hooks should be defined as strings. User posthook ignored.')
-            parallel.children:send('')
-         end
-      else
-         parallel.children:send('')
-      end
-      self.hooksets = true
-   end
-
-   -- (0a) replicate output and gradParameters
-   local outputsPartial = {}
-   local gradParametersPartial = {}
-
-   -- (0b) divide input/target batch into N batches
-   local inputss = {}
-   local targetss = {}
-   local optionss = {}
-   for t = 1,P do
-      inputss[t] = {}
-      targetss[t] = {}
-      optionss[t] = {}
-      for i = t,#inputs,P do
-         table.insert(inputss[t], inputs[i])
-         table.insert(targetss[t], targets[i])
-         if options then table.insert(optionss[t], options[i]) end
-      end
-   end
-
-   -- (0c) send mini-batch to all workers
-   for t = 1,P do
-      parallel.children[t]:send(inputss[t])
-      parallel.children[t]:send(targetss[t])
-      parallel.children[t]:send(optionss[t])
-   end
-
-   -- (1) construct a closure that compute f(inputs) + df/dW
-   --     after each call to that function:
-   --       + self.parameters contains the current X vector
-   --       + self.gradParameters contains the estimated dF/dX vector
-   --       + self.output contains the estimated (average) F(X)
-   lbfgs.evaluate
-      = function()
-           lbfgs.evaluate_map()
-           return lbfgs.evaluate_reduce()
-        end
-
-   -- (1a) the map part of the evaluation: compute partial gradients
-   --      in separate threads
-   lbfgs.evaluate_map
-      = function()
-           -- verbose
-           if self.verbose >= 2 then print('<LBFGSOptimization> evaluating f(X) + df/dX') end
-           local _t_ = sys.clock()
-           -- transmit new parameters to all workers
-           parallel.children:send(self.parameters)
-           -- then wait for all workers to return their partial gradParameters + outputs
-           gradParametersPartial = parallel.children:receive()
-           outputsPartial = parallel.children:receive()
-           -- force cleanup
-           collectgarbage()
-           -- verbose
-           if self.verbose >= 2 then print('<LBFGSOptimization> f(X) + df/dX computed in ' .. (sys.clock() - _t_) .. ' sec') end
-        end
-
-   -- (1b) the reduce part of the evaluation: accumulate all
-   --      partial estimates of the gradients
-   lbfgs.evaluate_reduce
-      = function()
-           -- accumulate partial gradients, and average
-           self.gradParameters:zero()
-           for t = 1,P do
-              self.gradParameters:add(gradParametersPartial[t])
-           end
-           self.gradParameters:div(#inputs)
-           -- return average f(X)
-           self.output = 0
-           for t = 1,P do
-              self.output = self.output + outputsPartial[t]
-           end
-           return self.output/#inputs
-        end
-
-   -- (2) the magic function: will update the parameter vector
-   --     according to the l-BFGS method
+function LBFGS:optimize()
+   lbfgs.evaluate = self.evaluate
+   -- the magic function: will update the parameter vector
+   -- according to the l-BFGS method
    self.output = lbfgs.run(self.parameters, self.gradParameters,
                            self.maxIterations, self.maxLineSearch,
                            self.sparsity, self.verbose)
-
-   -- (3) reset workers so they're ready for next mini-batch
-   parallel.children:send('break')
-
-   -- (4) return current output after optimization
-   return self.output
-end
-
-function LBFGS:setup_mapreduce ()
-   -- (0) startup parallel package
-   if not xrequire 'parallel' then
-      xerror('install parallel for Lua to enable parallel computing (luarocks install parallel)',
-             'nn.LBFGSOptimization')
-   end
-   parallel.setSharedSize(4*1024*1024)
-   local P = self.parallelize
-
-   -- (1) define code for workers
-   local worker_code = [[
-         -- require packages
-         require 'nnx'
-
-         -- retrieve module + criterion at startup
-         module = parallel.parent:receive()
-         criterion = parallel.parent:receive()
-
-         -- create fake optimizer, for hooks
-         optimizer = {module=module, criterion=criterion}
-
-         -- retrieve optional prehook/posthook
-         prehook = parallel.parent:receive()
-         posthook = parallel.parent:receive()
-         if prehook ~= '' then loadstring(prehook)() else prehook = nil end
-         if posthook ~= '' then loadstring(posthook)() else posthook = nil end
-
-         -- get pointer to parameter and gradParameter vectors
-         parameters = nnx.flattenParameters(nnx.getParameters(module))
-         gradParameters = nnx.flattenParameters(nnx.getGradParameters(module))
-
-         -- outter loop: mini-batches
-         while true do
-            -- receive new mini-batch
-            inputs = parallel.parent:receive()
-            if type(inputs) == 'string' and inputs == 'break' then break end
-            targets = parallel.parent:receive()
-            options = parallel.parent:receive()
-
-            -- inner loop: evaluations
-            while true do
-               -- receive new set of parameters
-               newParameters = parallel.parent:receive()
-               if type(newParameters) == 'string' and newParameters == 'break' then break end
-               parameters:copy(newParameters)
-
-               -- reset gradients
-               gradParameters:zero()
-               -- f is the average of all criterions
-               local f_x = 0
-               -- evaluate gradients on inputs for this thread
-               for i = 1,#inputs do
-                  -- user hook
-                  if prehook then
-                     prehook(optimizer, {inputs[i], targets[i], options[i]})
-                  end
-                  -- estimate f
-                  local output = module:forward(inputs[i])
-                  local err = criterion:forward(output, targets[i])
-                  f_x = f_x + err
-                  -- estimate df/dW
-                  local df_do = criterion:backward(output, targets[i])
-                  module:backward(inputs[i], df_do)
-                  -- user hook
-                  if posthook then
-                     posthook(optimizer, {inputs[i], targets[i], options[i]})
-                  end
-               end
-
-               -- now send back gradParameters + partial output
-               parallel.parent:send(gradParameters)
-               parallel.parent:send(f_x)
-
-               -- force cleanup
-               collectgarbage()
-            end
-         end
-   ]]
-
-   -- (2) startup all workers
-   for t = 1,P do
-      parallel.run(worker_code)
-   end
-
-   -- (3) and send them the module + criterion architecture
-   parallel.children:send(self.module)
-   parallel.children:send(self.criterion)
 end
diff --git a/SGDOptimization.lua b/SGDOptimization.lua
index 88d63d0..ddbf220 100644
--- a/SGDOptimization.lua
+++ b/SGDOptimization.lua
@@ -1,81 +1,48 @@
-local SGD,parent = torch.class('nn.SGDOptimization', 'nn.Optimization')
+local SGD,parent = torch.class('nn.SGDOptimization', 'nn.BatchOptimization')
 
 function SGD:__init(...)
-   parent.__init(self)
+   parent.__init(self,...)
    xlua.unpack_class(self, {...},
       'SGDOptimization', nil,
-      {arg='module', type='nn.Module', help='a module to train', req=true},
-      {arg='criterion', type='nn.Criterion', help='a criterion to estimate the error', req=true},
-      {arg='learningRate', type='number', help='learning rate (W = W - rate*dE/dW)', default=1e-2},
-      {arg='learningRateDecay', type='number', help='learning rate decay (lr_t = lr_0 / (1 + samplesSeen*lrDecay))', default=0},
-      {arg='weightDecay', type='number', help='amount of weight decay (W = W - decay*W)', default=0},
-      {arg='momentum', type='number', help='amount of momentum on weights (dE/W = dE/dW*(1-momentum) + prev(dE/dW)*momentum)', default=0}
+      {arg='maxIterations', type='number', 
+       help='maximum nb of iterations per pass', default=1},
+      {arg='learningRate', type='number', 
+       help='learning rate (W = W - rate*dE/dW)', default=1e-2},
+      {arg='learningRateDecay', type='number', 
+       help='learning rate decay (lr_t = lr_0 / (1 + samplesSeen*lrDecay))', default=0},
+      {arg='weightDecay', type='number', 
+       help='amount of weight decay (W = W - decay*W)', default=0},
+      {arg='momentum', type='number', 
+       help='amount of momentum on weights (dE/W = dE/dW*(1-momentum) + prev(dE/dW)*momentum)', default=0}
    )
    self.parameters = nnx.flattenParameters(nnx.getParameters(self.module))
    self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
-   self.samplesSeen = 0
 end
 
-function SGD:forward(inputs, targets, options)
-   options = options or {}
-
-   -- reset gradients
-   self.gradParameters:zero()
-   
-   -- f is the average of all criterions
-   self.output = 0
-   
-   -- given all inputs, evaluate gradients
-   for i = 1,#inputs do
-      -- user hook
-      if self.prehook then
-         self.prehook(self, {inputs[i], targets[i], options[i]})
-      end
-
-      -- estimate f
-      local output = self.module:forward(inputs[i])
-      local err = self.criterion:forward(output, targets[i])
-      self.output = self.output + err
-      
-      -- estimate df/dW
-      local df_do = self.criterion:backward(output, targets[i])
-      self.module:backward(inputs[i], df_do)
-
-      -- user hook
-      if self.posthook then
-         self.posthook(self, {inputs[i], targets[i], options[i]})
+function SGD:optimize()
+   -- optimize N times
+   for i = 1,self.maxIterations do
+      -- evaluate f(X) + df/dX
+      self.evaluate()
+
+      -- apply momentum
+      if self.momentum ~= 0 then
+         if not self.currentGradParameters then
+            self.currentGradParameters = torch.Tensor():resizeAs(self.gradParameters):copy(self.gradParameters)
+         else
+            self.currentGradParameters:mul(self.momentum):add(1-self.momentum, self.gradParameters)
+         end
+      else
+         self.currentGradParameters = self.gradParameters
       end
-   end
 
-   -- update counter of samples seen
-   self.samplesSeen = self.samplesSeen + #inputs
-
-   -- renorm f
-   self.output = self.output / #inputs
-   
-   -- normalize gradients
-   self.gradParameters:div(#inputs)
-
-   -- apply momentum
-   if self.momentum ~= 0 then
-      if not self.currentGradParameters then
-         self.currentGradParameters = torch.Tensor():resizeAs(self.gradParameters):copy(self.gradParameters)
-      else
-         self.currentGradParameters:mul(self.momentum):add(1-self.momentum, self.gradParameters)
+      -- weight decay
+      if self.weightDecay ~= 0 then
+         self.parameters:add(-self.weightDecay, self.parameters)
       end
-   else
-      self.currentGradParameters = self.gradParameters
-   end
 
-   -- weight decay
-   if self.weightDecay ~= 0 then
-      self.parameters:add(-self.weightDecay, self.parameters)
+      -- update parameters
+      local learningRate = self.learningRate / (1 + self.sampleCounter*self.learningRateDecay)
+      self.parameters:add(-learningRate, self.currentGradParameters)
    end
-
-   -- update parameters
-   local learningRate = self.learningRate / (1 + self.samplesSeen*self.learningRateDecay)
-   self.parameters:add(-learningRate, self.currentGradParameters)
-
-   -- return current output
-   return self.output
 end
diff --git a/init.lua b/init.lua
index 6b09afc..3519294 100644
--- a/init.lua
+++ b/init.lua
@@ -100,9 +100,9 @@ torch.include('nnx', 'SpatialSparseCriterion.lua')
 
 -- optimizations:
 torch.include('nnx', 'Optimization.lua')
+torch.include('nnx', 'BatchOptimization.lua')
 torch.include('nnx', 'SGDOptimization.lua')
 torch.include('nnx', 'LBFGSOptimization.lua')
-torch.include('nnx', 'BatchOptimization.lua')
 
 -- trainers:
 torch.include('nnx', 'Trainer.lua')
-- 
cgit v1.2.3


From 0704307f7e2b4e2bc0720dc7d7c28f4e887dfac6 Mon Sep 17 00:00:00 2001
From: Marco Scoffier <github@metm.org>
Date: Thu, 1 Sep 2011 13:24:51 -0400
Subject: added batchTest and hook

---
 BatchTrainer.lua | 55 +++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 47 insertions(+), 8 deletions(-)

diff --git a/BatchTrainer.lua b/BatchTrainer.lua
index ab5ba92..338440d 100644
--- a/BatchTrainer.lua
+++ b/BatchTrainer.lua
@@ -44,25 +44,37 @@ end
 
 -- update the counters
 function BatchTrainer:next()
-   if not self.batch then
+   if not self.batch or not self.trainOffset then
+      -- initialize
       self.batch = 1
-   else 
-      self.batch = self.batch + 1
-   end
-   if not self.trainOffset then
       self.trainOffset = 1
    else
+      -- hook to run something on the current batch
+      -- (for eg. if you want to run a test on this batch before
+      -- switching to the next)
+      if self.hookTrainBatch then
+	 self.hookTrainBatch(self)
+      end
+
+      -- simple batch increment
+      self.batch = self.batch + 1
       self.trainOffset = self.trainOffset + self.batchSize
+	 
+      -- test for new epoch
       if self.trainOffset > self.trainset:size() then
-	 self.trainOffset = 1
-	 self.epoch = self.epoch + 1
-	 self.batch = 1
+
+	 -- hook to run on current epoch before switching to next
 	 if self.hookTrainEpoch then
 	    self.hookTrainEpoch(self)
 	 end
 
 	 if self.save then self:log() end
 
+	 self.trainOffset = 1
+	 self.epoch = self.epoch + 1
+	 self.batch = 1
+
+
       end
    end
    -- disp progress
@@ -124,4 +136,31 @@ function BatchTrainer:nextBatch()
    local error = self.optimizer:forward(inputs, targets)
 end
 
+-- special test to just get results of current batch
+function BatchTrainer:testBatch()
+   local criterion = self.criterion
+   local module = self.module
+   
+   local inputs = self.inputs[self.batch]
+   local targets = self.targets[self.batch]
+   
+   self.currentError = 0
+   
+   for i = 1,#inputs do 
+      local input = inputs[i]
+      local target = targets[i]
+      if criterion then
+         self.currentError = self.currentError + 
+	    criterion:forward(module:forward(input), target)
+      else
+         local _,error = module:forward(input, target)
+         self.currentError = self.currentError + error
+      end
+      -- user hook
+      if self.hookTestSample then
+         self.hookTestSample(self, {input, target})
+      end
+   end
+end
 
+      
\ No newline at end of file
-- 
cgit v1.2.3


From 376a0e86f0ac963707291a05a148fc079a4cc28e Mon Sep 17 00:00:00 2001
From: Marco Scoffier <github@metm.org>
Date: Thu, 1 Sep 2011 14:05:38 -0400
Subject: bug in batch creation

---
 BatchTrainer.lua | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/BatchTrainer.lua b/BatchTrainer.lua
index 338440d..bbc0e24 100644
--- a/BatchTrainer.lua
+++ b/BatchTrainer.lua
@@ -115,7 +115,7 @@ function BatchTrainer:nextBatch()
 
       for i = t,math.min(t+bs-1,ds) do
 	 -- load new sample
-	 local sample = self.trainset[t]
+	 local sample = self.trainset[i]
 	 local input = sample[1]
 	 local target = sample[2]
       
-- 
cgit v1.2.3


From 519ea0c2a562f7c752ad13fca53ca36f3e97e95c Mon Sep 17 00:00:00 2001
From: Marco Scoffier <github@metm.org>
Date: Thu, 1 Sep 2011 14:58:54 -0400
Subject: added flag to quit after a specified number of function evaluations

---
 LBFGSOptimization.lua |  4 +++-
 lbfgs.c               | 21 +++++++++++++++------
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/LBFGSOptimization.lua b/LBFGSOptimization.lua
index 720b7ba..ad98139 100644
--- a/LBFGSOptimization.lua
+++ b/LBFGSOptimization.lua
@@ -5,6 +5,8 @@ function LBFGS:__init(...)
    parent.__init(self, ...)
    xlua.unpack_class(self, {...},
       'LBFGSOptimization', nil,
+      {arg='maxEvaluation', type='number', 
+       help='maximum nb of function evaluations per pass (0 = no max)', default=0},
       {arg='maxIterations', type='number', 
        help='maximum nb of iterations per pass (0 = no max)', default=0},
       {arg='maxLineSearch', type='number', 
@@ -23,6 +25,6 @@ function LBFGS:optimize()
    -- the magic function: will update the parameter vector
    -- according to the l-BFGS method
    self.output = lbfgs.run(self.parameters, self.gradParameters,
-                           self.maxIterations, self.maxLineSearch,
+                           self.maxEvaluation, self.maxIterations, self.maxLineSearch,
                            self.sparsity, self.verbose)
 end
diff --git a/lbfgs.c b/lbfgs.c
index 7a660b1..851a8dd 100644
--- a/lbfgs.c
+++ b/lbfgs.c
@@ -83,8 +83,9 @@
 
 // extra globals 
 static int nEvaluation = 0;
-static int nIteration = 0;
-static int verbose = 0;
+static int maxEval     = 0; // maximum number of function evaluations
+static int nIteration  = 0;
+static int verbose     = 0;
 
 struct tag_callback_data {
   int n;
@@ -507,6 +508,13 @@ int lbfgs(
       }
     }
 
+    /* Count number of function evaluations */
+    if ((maxEval != 0)&&(nEvaluation > maxEval)) {
+      if (verbose > 1){
+	printf("Stopping b/c exceeded max number of function evaluations\n");
+      }
+      goto lbfgs_exit;
+    }
     /*
       Convergence test.
       The criterion is given by the following formula:
@@ -1463,12 +1471,13 @@ int lbfgs_run(lua_State *L) {
 
   // initialize the parameters for the L-BFGS optimization
   lbfgs_parameter_init(&lbfgs_param);
-  lbfgs_param.max_iterations = lua_tonumber(L, 3);
-  lbfgs_param.max_linesearch = lua_tonumber(L, 4);
+  maxEval = lua_tonumber(L,3);
+  lbfgs_param.max_iterations = lua_tonumber(L, 4);
+  lbfgs_param.max_linesearch = lua_tonumber(L, 5);
   lbfgs_param.linesearch = LBFGS_LINESEARCH_BACKTRACKING;
-  lbfgs_param.orthantwise_c = lua_tonumber(L, 5);
+  lbfgs_param.orthantwise_c = lua_tonumber(L, 6);
   // get verbose level
-  verbose = lua_tonumber(L,6);
+  verbose = lua_tonumber(L,7);
 
   // Start the L-BFGS optimization; this will invoke the callback functions
   // evaluate() and progress() when necessary.
-- 
cgit v1.2.3


From dc98c37487f72b656ff9b23da9979dd7e372cfb7 Mon Sep 17 00:00:00 2001
From: Marco Scoffier <github@metm.org>
Date: Thu, 1 Sep 2011 19:11:47 -0400
Subject: make parallel compatible with not optimize hook

---
 BatchOptimization.lua | 49 ++++++++++++++++++++++++-------------------------
 BatchTrainer.lua      | 16 ++++++++++------
 2 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index 4caa064..27309f2 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -8,15 +8,15 @@ local Batch,parent = torch.class('nn.BatchOptimization', 'nn.Optimization')
 function Batch:__init(...)
    parent.__init(self)
    xlua.unpack_class(self, {...},
-      'BatchOptimization', nil,
-      {arg='module', type='nn.Module', help='a module to train', req=true},
-      {arg='criterion', type='nn.Criterion', 
-       help='a criterion to estimate the error', req=true},
-      {arg='parallelize', type='number', 
-       help='parallelize onto N cores (experimental!)', default=1},
-      {arg='verbose', type='number', 
-       help='verbose level during training [0-2]', default=0}
-   )
+		     'BatchOptimization', nil,
+		     {arg='module', type='nn.Module', help='a module to train', req=true},
+		     {arg='criterion', type='nn.Criterion', 
+		      help='a criterion to estimate the error', req=true},
+		     {arg='parallelize', type='number', 
+		      help='parallelize onto N cores (experimental!)', default=1},
+		     {arg='verbose', type='number', 
+		      help='verbose level during training [0-2]', default=0}
+		  )
    self.parameters = nnx.flattenParameters(nnx.getParameters(self.module))
    self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
    self.evalCounter = 0
@@ -165,9 +165,9 @@ function Batch:forward_mapreduce(inputs, targets, options)
            end
            local _t_ = sys.clock()
            -- do map/reduce
-           self.evaluate_map()
-           self.evaluate_reduce()
-           -- update evaluation counter
+	   self.evaluate_map()
+	   self.evaluate_reduce()
+	   -- update evaluation counter
            self.evalCounter = self.evalCounter + 1
            -- verbose
            if self.verbose >= 2 then 
@@ -180,12 +180,12 @@ function Batch:forward_mapreduce(inputs, targets, options)
    --      in separate threads
    self.evaluate_map
       = function()
-           -- transmit new parameters to all workers
+	   -- transmit new parameters to all workers
            parallel.children:send(self.parameters)
-           -- then wait for all workers to return their partial gradParameters + outputs
+	   -- then wait for all workers to return their partial gradParameters + outputs
            gradParametersPartial = parallel.children:receive()
-           outputsPartial = parallel.children:receive()
-           -- force cleanup
+	   outputsPartial = parallel.children:receive()
+	   -- force cleanup
            collectgarbage()
         end
 
@@ -210,10 +210,11 @@ function Batch:forward_mapreduce(inputs, targets, options)
    -- (2) optimization callback
    if self.optimize then
       self:optimize()
+      -- (3) reset workers so they're ready for next mini-batch
+      -- only do this when we have an optimization hook
+      parallel.children:send('break')
    end
 
-   -- (3) reset workers so they're ready for next mini-batch
-   parallel.children:send('break')
 
    -- (4) update sample counter
    self.sampleCounter = self.sampleCounter + #inputs
@@ -232,7 +233,7 @@ function Batch:setup_mapreduce ()
 
    -- (1) define code for workers
    local worker_code = [[
-         -- require packages
+	 -- require packages
          require 'nnx'
 
          -- retrieve module + criterion at startup
@@ -259,11 +260,11 @@ function Batch:setup_mapreduce ()
             if type(inputs) == 'string' and inputs == 'break' then break end
             targets = parallel.parent:receive()
             options = parallel.parent:receive()
-
-            -- inner loop: evaluations
+	    -- inner loop: evaluations
             while true do
-               -- receive new set of parameters
+	       -- receive new set of parameters
                newParameters = parallel.parent:receive()
+	       
                if type(newParameters) == 'string' and newParameters == 'break' then break end
                parameters:copy(newParameters)
 
@@ -273,7 +274,7 @@ function Batch:setup_mapreduce ()
                local f_x = 0
                -- evaluate gradients on inputs for this thread
                for i = 1,#inputs do
-                  -- user hook
+		  -- user hook
                   if prehook then
                      prehook(optimizer, {inputs[i], targets[i], options[i]})
                   end
@@ -289,11 +290,9 @@ function Batch:setup_mapreduce ()
                      posthook(optimizer, {inputs[i], targets[i], options[i]})
                   end
                end
-
                -- now send back gradParameters + partial output
                parallel.parent:send(gradParameters)
                parallel.parent:send(f_x)
-
                -- force cleanup
                collectgarbage()
             end
diff --git a/BatchTrainer.lua b/BatchTrainer.lua
index bbc0e24..a5b135d 100644
--- a/BatchTrainer.lua
+++ b/BatchTrainer.lua
@@ -59,7 +59,7 @@ function BatchTrainer:next()
       -- simple batch increment
       self.batch = self.batch + 1
       self.trainOffset = self.trainOffset + self.batchSize
-	 
+      
       -- test for new epoch
       if self.trainOffset > self.trainset:size() then
 
@@ -73,9 +73,13 @@ function BatchTrainer:next()
 	 self.trainOffset = 1
 	 self.epoch = self.epoch + 1
 	 self.batch = 1
-
-
       end
+      
+      -- on all but the first batch we need to reset the children
+      if optimizer.parallelize > 1 then 
+	 parallel.children:send('break')
+      end
+
    end
    -- disp progress
    if self.dispProgress then
@@ -118,11 +122,11 @@ function BatchTrainer:nextBatch()
 	 local sample = self.trainset[i]
 	 local input = sample[1]
 	 local target = sample[2]
-      
+	 
 	 -- optional preprocess (no learning is done for that guy)
 	 if self.preprocessor then input = self.preprocessor:forward(input) end
 	 
-      -- store input/target
+	 -- store input/target
 	 table.insert(inputs, input)
 	 table.insert(targets, target)
       end
@@ -134,6 +138,7 @@ function BatchTrainer:nextBatch()
 
    -- set up closure batch.evaluate() for optimizer
    local error = self.optimizer:forward(inputs, targets)
+   
 end
 
 -- special test to just get results of current batch
@@ -163,4 +168,3 @@ function BatchTrainer:testBatch()
    end
 end
 
-      
\ No newline at end of file
-- 
cgit v1.2.3


From 3863613a4717c59aac9306e3962f8de4dd7405bd Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Thu, 1 Sep 2011 19:16:08 -0400
Subject: Fixed tabs.

---
 BatchOptimization.lua | 52 +++++++++++++++++++++++++--------------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index 27309f2..3143a3e 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -8,15 +8,15 @@ local Batch,parent = torch.class('nn.BatchOptimization', 'nn.Optimization')
 function Batch:__init(...)
    parent.__init(self)
    xlua.unpack_class(self, {...},
-		     'BatchOptimization', nil,
-		     {arg='module', type='nn.Module', help='a module to train', req=true},
-		     {arg='criterion', type='nn.Criterion', 
-		      help='a criterion to estimate the error', req=true},
-		     {arg='parallelize', type='number', 
-		      help='parallelize onto N cores (experimental!)', default=1},
-		     {arg='verbose', type='number', 
-		      help='verbose level during training [0-2]', default=0}
-		  )
+                     'BatchOptimization', nil,
+                     {arg='module', type='nn.Module', help='a module to train', req=true},
+                     {arg='criterion', type='nn.Criterion',
+                      help='a criterion to estimate the error', req=true},
+                     {arg='parallelize', type='number',
+                      help='parallelize onto N cores (experimental!)', default=1},
+                     {arg='verbose', type='number',
+                      help='verbose level during training [0-2]', default=0}
+                  )
    self.parameters = nnx.flattenParameters(nnx.getParameters(self.module))
    self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
    self.evalCounter = 0
@@ -45,7 +45,7 @@ function Batch:forward_sequential(inputs, targets, options)
       = function()
            -- verbose
            if self.verbose >= 2 then
-              print('<BatchOptimization> evaluating f(X) + df/dX') 
+              print('<BatchOptimization> evaluating f(X) + df/dX')
            end
            local _t_ = sys.clock()
            -- reset gradients
@@ -75,7 +75,7 @@ function Batch:forward_sequential(inputs, targets, options)
            -- normalize gradients
            self.gradParameters:div(#inputs)
            -- verbose
-           if self.verbose >= 2 then 
+           if self.verbose >= 2 then
               print('<BatchOptimization> ' .. self.evalCounter .. 'th evaluation took ' .. (sys.clock() - _t_) .. ' sec')
            end
            -- return average f(X)
@@ -160,17 +160,17 @@ function Batch:forward_mapreduce(inputs, targets, options)
    self.evaluate
       = function()
            -- verbose
-           if self.verbose >= 2 then 
-              print('<BatchOptimization> evaluating f(X) + df/dX') 
+           if self.verbose >= 2 then
+              print('<BatchOptimization> evaluating f(X) + df/dX')
            end
            local _t_ = sys.clock()
            -- do map/reduce
-	   self.evaluate_map()
-	   self.evaluate_reduce()
-	   -- update evaluation counter
+           self.evaluate_map()
+           self.evaluate_reduce()
+           -- update evaluation counter
            self.evalCounter = self.evalCounter + 1
            -- verbose
-           if self.verbose >= 2 then 
+           if self.verbose >= 2 then
               print('<BatchOptimization> ' .. self.evalCounter .. 'th evaluation took ' .. (sys.clock() - _t_) .. ' sec')
            end
            return self.output
@@ -180,12 +180,12 @@ function Batch:forward_mapreduce(inputs, targets, options)
    --      in separate threads
    self.evaluate_map
       = function()
-	   -- transmit new parameters to all workers
+           -- transmit new parameters to all workers
            parallel.children:send(self.parameters)
-	   -- then wait for all workers to return their partial gradParameters + outputs
+           -- then wait for all workers to return their partial gradParameters + outputs
            gradParametersPartial = parallel.children:receive()
-	   outputsPartial = parallel.children:receive()
-	   -- force cleanup
+           outputsPartial = parallel.children:receive()
+           -- force cleanup
            collectgarbage()
         end
 
@@ -233,7 +233,7 @@ function Batch:setup_mapreduce ()
 
    -- (1) define code for workers
    local worker_code = [[
-	 -- require packages
+         -- require packages
          require 'nnx'
 
          -- retrieve module + criterion at startup
@@ -260,11 +260,11 @@ function Batch:setup_mapreduce ()
             if type(inputs) == 'string' and inputs == 'break' then break end
             targets = parallel.parent:receive()
             options = parallel.parent:receive()
-	    -- inner loop: evaluations
+            -- inner loop: evaluations
             while true do
-	       -- receive new set of parameters
+               -- receive new set of parameters
                newParameters = parallel.parent:receive()
-	       
+
                if type(newParameters) == 'string' and newParameters == 'break' then break end
                parameters:copy(newParameters)
 
@@ -274,7 +274,7 @@ function Batch:setup_mapreduce ()
                local f_x = 0
                -- evaluate gradients on inputs for this thread
                for i = 1,#inputs do
-		  -- user hook
+                  -- user hook
                   if prehook then
                      prehook(optimizer, {inputs[i], targets[i], options[i]})
                   end
-- 
cgit v1.2.3


From 9bc8cc01e1bbbc5d532a22d58061964c036f43e5 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Thu, 1 Sep 2011 22:23:27 -0400
Subject: Formatting.

---
 BatchOptimization.lua | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index 3143a3e..f2b38d5 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -207,15 +207,15 @@ function Batch:forward_mapreduce(inputs, targets, options)
            self.output = self.output/#inputs
         end
 
-   -- (2) optimization callback
    if self.optimize then
+      -- (2) optimization callback
       self:optimize()
+
       -- (3) reset workers so they're ready for next mini-batch
       -- only do this when we have an optimization hook
       parallel.children:send('break')
    end
 
-
    -- (4) update sample counter
    self.sampleCounter = self.sampleCounter + #inputs
 
-- 
cgit v1.2.3


From ab854c6f2490c33802fb074b1ace517a407848ee Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Thu, 1 Sep 2011 22:53:47 -0400
Subject: Fixed tester.

---
 test/test-all.lua | 87 -------------------------------------------------------
 1 file changed, 87 deletions(-)

diff --git a/test/test-all.lua b/test/test-all.lua
index 148e860..f7e591a 100644
--- a/test/test-all.lua
+++ b/test/test-all.lua
@@ -301,93 +301,6 @@ function nnxtest.SpatialConvolution()
    mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
 end
 
-function nnxtest.SpatialConvolutionSparse_1()
-   local from = math.random(1,10)
-   local to = math.random(1,10)
-   local ini = math.random(10,20)
-   local inj = math.random(10,20)
-   local ki = math.random(1,10)
-   local kj = math.random(1,10)
-   local si = math.random(1,1)
-   local sj = math.random(1,1)
-
-   local ct = nn.tables.full(from,to)
-   local module = nn.SpatialConvolutionSparse(ct, ki, kj, si, sj)
-   local input = torch.Tensor(from, inj, ini):zero()
-   module:reset()
-
-   local err = nn.Jacobian.testJacobian(module, input)
-   mytester:assertlt(err, precision, 'error on state ')
-
-   local err = nn.Jacobian.testJacobianParameters(module, input, module.weight, module.gradWeight)
-   mytester:assertlt(err, precision, 'error on weight ')
-
-   local err = nn.Jacobian.testJacobianParameters(module, input, module.bias, module.gradBias)
-   mytester:assertlt(err, precision, 'error on bias ')
-
-   local ferr, berr = nn.Jacobian.testIO(module, input)
-   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
-   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-end
-
-function nnxtest.SpatialConvolutionSparse_2()
-   local from = math.random(1,10)
-   local to = math.random(1,10)
-   local ini = math.random(10,20)
-   local inj = math.random(10,20)
-   local ki = math.random(1,10)
-   local kj = math.random(1,10)
-   local si = math.random(1,1)
-   local sj = math.random(1,1)
-
-   local ct = nn.tables.oneToOne(from)
-   local module = nn.SpatialConvolutionSparse(ct, ki, kj, si, sj)
-   local input = torch.Tensor(from, inj, ini):zero()
-   module:reset()
-
-   local err = nn.Jacobian.testJacobian(module, input)
-   mytester:assertlt(err, precision, 'error on state ')
-
-   local err = nn.Jacobian.testJacobianParameters(module, input, module.weight, module.gradWeight)
-   mytester:assertlt(err, precision, 'error on weight ')
-
-   local err = nn.Jacobian.testJacobianParameters(module, input, module.bias, module.gradBias)
-   mytester:assertlt(err, precision, 'error on bias ')
-
-   local ferr, berr = nn.Jacobian.testIO(module, input)
-   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
-   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-end
-
-function nnxtest.SpatialConvolutionSparse_3()
-   local from = math.random(2,6)
-   local to = math.random(4,8)
-   local ini = math.random(10,20)
-   local inj = math.random(10,20)
-   local ki = math.random(1,10)
-   local kj = math.random(1,10)
-   local si = math.random(1,1)
-   local sj = math.random(1,1)
-
-   local ct = nn.tables.random(from,to,from-1)
-   local module = nn.SpatialConvolutionSparse(ct, ki, kj, si, sj)
-   local input = torch.Tensor(from, inj, ini):zero()
-   module:reset()
-
-   local err = nn.Jacobian.testJacobian(module, input)
-   mytester:assertlt(err, precision, 'error on state ')
-
-   local err = nn.Jacobian.testJacobianParameters(module, input, module.weight, module.gradWeight)
-   mytester:assertlt(err, precision, 'error on weight ')
-
-   local err = nn.Jacobian.testJacobianParameters(module, input, module.bias, module.gradBias)
-   mytester:assertlt(err, precision, 'error on bias ')
-
-   local ferr, berr = nn.Jacobian.testIO(module, input)
-   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
-   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-end
-
 function nnxtest.SpatialNormalization_Gaussian2D()
    local inputSize = math.random(11,20)
    local kersize = 9
-- 
cgit v1.2.3


From 38b8bea69d7cca9198ea98a0d68dc4343548579c Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Sat, 3 Sep 2011 17:35:48 -0400
Subject: using new parallel API: fork/exec.

---
 BatchOptimization.lua | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index f2b38d5..b05c9ab 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -301,8 +301,9 @@ function Batch:setup_mapreduce ()
 
    -- (2) startup all workers
    for t = 1,P do
-      parallel.run(worker_code)
+      parallel.fork()
    end
+   parallel.children:exec(worker_code)
 
    -- (3) and send them the module + criterion architecture
    parallel.children:send(self.module)
-- 
cgit v1.2.3


From 1b13c51e0d3264d2a94824ab1c1075a521008965 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Sat, 3 Sep 2011 22:52:24 -0400
Subject: BatchOptimization now supports server list.

---
 BatchOptimization.lua | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index b05c9ab..f5e0b53 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -14,6 +14,8 @@ function Batch:__init(...)
                       help='a criterion to estimate the error', req=true},
                      {arg='parallelize', type='number',
                       help='parallelize onto N cores (experimental!)', default=1},
+                     {arg='servers', type='number',
+                      help='server list, to parallelize over the network (experimental!)'},
                      {arg='verbose', type='number',
                       help='verbose level during training [0-2]', default=0}
                   )
@@ -21,7 +23,7 @@ function Batch:__init(...)
    self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
    self.evalCounter = 0
    self.sampleCounter = 0
-   if self.parallelize > 1 then
+   if self.parallelize > 1 or self.servers then
       self:setup_mapreduce()
    end
 end
@@ -229,7 +231,6 @@ function Batch:setup_mapreduce ()
       xerror('install parallel for Lua to enable parallel computing (luarocks install parallel)',
              'nn.BatchOptimization')
    end
-   local P = self.parallelize
 
    -- (1) define code for workers
    local worker_code = [[
@@ -300,8 +301,14 @@ function Batch:setup_mapreduce ()
    ]]
 
    -- (2) startup all workers
-   for t = 1,P do
-      parallel.fork()
+   if self.servers then
+      self.parallelize = 0
+      for _,server in ipairs(servers) do
+         self.parallelize = self.parallelize + server[1]
+      end
+      parallel.nfork(self.servers)
+   else
+      parallel.nfork(self.parallelize)
    end
    parallel.children:exec(worker_code)
 
-- 
cgit v1.2.3


From cc256ca1c3302aebedf219c36560a524bc955df4 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Sat, 3 Sep 2011 22:53:25 -0400
Subject: Fixed typoe

---
 BatchOptimization.lua | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index f5e0b53..e6c94a3 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -303,7 +303,7 @@ function Batch:setup_mapreduce ()
    -- (2) startup all workers
    if self.servers then
       self.parallelize = 0
-      for _,server in ipairs(servers) do
+      for _,server in ipairs(self.servers) do
          self.parallelize = self.parallelize + server[1]
       end
       parallel.nfork(self.servers)
-- 
cgit v1.2.3


From 1e19ab7a53cd1c956f41752e615916b601214360 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Sat, 3 Sep 2011 23:51:06 -0400
Subject: Re-thought map-reduce.

---
 BatchOptimization.lua | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index e6c94a3..8bf3e9a 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -25,12 +25,13 @@ function Batch:__init(...)
    self.sampleCounter = 0
    if self.parallelize > 1 or self.servers then
       self:setup_mapreduce()
+      self.P = self.parallelize
    end
 end
 
 function Batch:forward(inputs, targets, options)
    options = options or {}
-   if self.parallelize > 1 then
+   if self.P > 1 then
       return self:forward_mapreduce(inputs, targets, options)
    else
       return self:forward_sequential(inputs, targets, options)
@@ -99,7 +100,7 @@ end
 
 function Batch:forward_mapreduce(inputs, targets, options)
    -- parameters
-   local P = self.parallelize
+   local P = self.P
 
    -- transmit user hooks, if defined
    if not self.hooksets then
@@ -149,6 +150,7 @@ function Batch:forward_mapreduce(inputs, targets, options)
 
    -- (0c) send mini-batch to all workers
    for t = 1,P do
+      parallel.children[t]:join()
       parallel.children[t]:send(inputss[t])
       parallel.children[t]:send(targetss[t])
       parallel.children[t]:send(optionss[t])
@@ -183,6 +185,7 @@ function Batch:forward_mapreduce(inputs, targets, options)
    self.evaluate_map
       = function()
            -- transmit new parameters to all workers
+           parallel.children:join()
            parallel.children:send(self.parameters)
            -- then wait for all workers to return their partial gradParameters + outputs
            gradParametersPartial = parallel.children:receive()
@@ -215,7 +218,7 @@ function Batch:forward_mapreduce(inputs, targets, options)
 
       -- (3) reset workers so they're ready for next mini-batch
       -- only do this when we have an optimization hook
-      parallel.children:send('break')
+      parallel.children:join('break')
    end
 
    -- (4) update sample counter
@@ -238,6 +241,7 @@ function Batch:setup_mapreduce ()
          require 'nnx'
 
          -- retrieve module + criterion at startup
+         parallel.yield()
          module = parallel.parent:receive()
          criterion = parallel.parent:receive()
 
@@ -256,18 +260,21 @@ function Batch:setup_mapreduce ()
 
          -- outter loop: mini-batches
          while true do
+            -- sync
+            if parallel.yield() == 'break' then break end
+
             -- receive new mini-batch
             inputs = parallel.parent:receive()
-            if type(inputs) == 'string' and inputs == 'break' then break end
             targets = parallel.parent:receive()
             options = parallel.parent:receive()
+
             -- inner loop: evaluations
             while true do
-               -- receive new set of parameters
-               newParameters = parallel.parent:receive()
+               -- sync
+               if parallel.yield() == 'break' then break end
 
-               if type(newParameters) == 'string' and newParameters == 'break' then break end
-               parameters:copy(newParameters)
+               -- receive new set of parameters
+               parameters:copy(parallel.parent:receive())
 
                -- reset gradients
                gradParameters:zero()
@@ -313,6 +320,7 @@ function Batch:setup_mapreduce ()
    parallel.children:exec(worker_code)
 
    -- (3) and send them the module + criterion architecture
+   parallel.children:join()
    parallel.children:send(self.module)
    parallel.children:send(self.criterion)
 end
-- 
cgit v1.2.3


From 816f1bf40219624d87a9f8ea6bc8d50ae2b7e7c7 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Sun, 4 Sep 2011 01:59:01 -0400
Subject: Fixed stupid bug for SGD.

---
 BatchOptimization.lua | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index 8bf3e9a..ae7a6d9 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -25,8 +25,8 @@ function Batch:__init(...)
    self.sampleCounter = 0
    if self.parallelize > 1 or self.servers then
       self:setup_mapreduce()
-      self.P = self.parallelize
    end
+   self.P = self.parallelize
 end
 
 function Batch:forward(inputs, targets, options)
-- 
cgit v1.2.3


From d82237992e1f8ec21ff937a7117fa439e2e8c068 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Sun, 4 Sep 2011 03:04:22 -0400
Subject: Using new sfork function.

---
 BatchOptimization.lua | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index ae7a6d9..ebf2144 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -14,8 +14,6 @@ function Batch:__init(...)
                       help='a criterion to estimate the error', req=true},
                      {arg='parallelize', type='number',
                       help='parallelize onto N cores (experimental!)', default=1},
-                     {arg='servers', type='number',
-                      help='server list, to parallelize over the network (experimental!)'},
                      {arg='verbose', type='number',
                       help='verbose level during training [0-2]', default=0}
                   )
@@ -23,7 +21,7 @@ function Batch:__init(...)
    self.gradParameters = nnx.flattenParameters(nnx.getGradParameters(self.module))
    self.evalCounter = 0
    self.sampleCounter = 0
-   if self.parallelize > 1 or self.servers then
+   if self.parallelize > 1 then
       self:setup_mapreduce()
    end
    self.P = self.parallelize
@@ -308,15 +306,7 @@ function Batch:setup_mapreduce ()
    ]]
 
    -- (2) startup all workers
-   if self.servers then
-      self.parallelize = 0
-      for _,server in ipairs(self.servers) do
-         self.parallelize = self.parallelize + server[1]
-      end
-      parallel.nfork(self.servers)
-   else
-      parallel.nfork(self.parallelize)
-   end
+   parallel.sfork(self.parallelize)
    parallel.children:exec(worker_code)
 
    -- (3) and send them the module + criterion architecture
-- 
cgit v1.2.3


From ecd615e257bc819a85eb1d97d9ebac3a6de989f0 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Sun, 4 Sep 2011 17:58:37 -0400
Subject: Added serializing methods for confusion matrix.

---
 ConfusionMatrix.lua | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/ConfusionMatrix.lua b/ConfusionMatrix.lua
index 96c9aa4..ed3f000 100644
--- a/ConfusionMatrix.lua
+++ b/ConfusionMatrix.lua
@@ -11,7 +11,7 @@ function ConfusionMatrix:__init(nclasses, classes)
    self.nclasses = nclasses
    self.totalValid = 0
    self.averageValid = 0
-   self.classes = classes
+   self.classes = classes or {}
 end
 
 function ConfusionMatrix:add(prediction, target)
@@ -74,7 +74,7 @@ function ConfusionMatrix:__tostring__()
       for p = 1,nclasses do
          str = str .. '' .. string.format('%8d', self.mat[t][p])
       end
-      if self.classes then
+      if self.classes and self.classes[1] then
          if t == nclasses then
             str = str .. ']]  ' .. pclass .. '% \t[class: ' .. (self.classes[t] or '') .. ']\n'
          else
@@ -92,3 +92,21 @@ function ConfusionMatrix:__tostring__()
    str = str .. ' + global correct: ' .. (self.totalValid*100) .. '%'
    return str
 end
+
+function ConfusionMatrix:write(file)
+   file:writeObject(self.mat)
+   file:writeObject(self.valids)
+   file:writeInt(self.nclasses)
+   file:writeInt(self.totalValid)
+   file:writeInt(self.averageValid)
+   file:writeObject(self.classes)
+end
+
+function ConfusionMatrix:read(file)
+   self.mat = file:readObject()
+   self.valids = file:readObject()
+   self.nclasses = file:readInt()
+   self.totalValid = file:readInt()
+   self.averageValid = file:readInt()
+   self.classes = file:readObject()
+end
-- 
cgit v1.2.3


From 69327936c2ce1d4e521dd1d1eec8c5afbe5e0cc3 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Mon, 5 Sep 2011 00:22:39 -0400
Subject: Protected map-reduce setup.

---
 BatchOptimization.lua | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index ebf2144..9673768 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -305,12 +305,16 @@ function Batch:setup_mapreduce ()
          end
    ]]
 
-   -- (2) startup all workers
-   parallel.sfork(self.parallelize)
-   parallel.children:exec(worker_code)
-
-   -- (3) and send them the module + criterion architecture
-   parallel.children:join()
-   parallel.children:send(self.module)
-   parallel.children:send(self.criterion)
+   local setup = function()
+                    -- (2) startup all workers
+                    parallel.sfork(self.parallelize)
+                    parallel.children:exec(worker_code)
+
+                    -- (3) and send them the module + criterion architecture
+                    parallel.children:join()
+                    parallel.children:send(self.module)
+                    parallel.children:send(self.criterion)
+                 end
+   local ok,err = pcall(setup)
+   if not ok then print(err) parallel.close() end
 end
-- 
cgit v1.2.3


From bfab10dd8ab7ad2561979470a544524ceb79e4fc Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Mon, 5 Sep 2011 00:24:02 -0400
Subject: Actual error when something goes wrong in mapreduce setup.

---
 BatchOptimization.lua | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index 9673768..5ccd8fe 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -316,5 +316,5 @@ function Batch:setup_mapreduce ()
                     parallel.children:send(self.criterion)
                  end
    local ok,err = pcall(setup)
-   if not ok then print(err) parallel.close() end
+   if not ok then error(err) parallel.close() end
 end
-- 
cgit v1.2.3


From 3804abd138d4898ca5576e17cac504ff6fa8d459 Mon Sep 17 00:00:00 2001
From: Clement Farabet <clement.farabet@gmail.com>
Date: Mon, 5 Sep 2011 00:25:25 -0400
Subject: Switch close/error.

---
 BatchOptimization.lua | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/BatchOptimization.lua b/BatchOptimization.lua
index 5ccd8fe..f5feb3a 100644
--- a/BatchOptimization.lua
+++ b/BatchOptimization.lua
@@ -316,5 +316,5 @@ function Batch:setup_mapreduce ()
                     parallel.children:send(self.criterion)
                  end
    local ok,err = pcall(setup)
-   if not ok then error(err) parallel.close() end
+   if not ok then parallel.close() error(err) end
 end
-- 
cgit v1.2.3