127 files changed, 10444 insertions, 0 deletions
diff --git a/Abs.lua b/Abs.lua
new file mode 100644
index 0000000..3ccc6af
--- /dev/null
+++ b/Abs.lua
@@ -0,0 +1,15 @@
+local Abs, parent = torch.class('nn.Abs', 'nn.Module')
+
+function Abs:__init()
+   parent.__init(self)
+end
+
+function Abs:updateOutput(input)
+   input.nn.Abs_updateOutput(self, input)
+   return self.output
+end
+
+function Abs:updateGradInput(input, gradOutput)
+   input.nn.Abs_updateGradInput(self, input, gradOutput)
+   return self.gradInput
+end
diff --git a/AbsCriterion.lua b/AbsCriterion.lua
new file mode 100644
index 0000000..be7f6cb
--- /dev/null
+++ b/AbsCriterion.lua
@@ -0,0 +1,14 @@
+local AbsCriterion, parent = torch.class('nn.AbsCriterion', 'nn.Criterion')
+
+function AbsCriterion:__init()
+   parent.__init(self)
+   self.sizeAverage = true
+end
+
+function AbsCriterion:updateOutput(input, target)
+   return input.nn.AbsCriterion_updateOutput(self, input, target)
+end
+
+function AbsCriterion:updateGradInput(input, target)
+   return input.nn.AbsCriterion_updateGradInput(self, input, target)
+end
diff --git a/Add.lua b/Add.lua
new file mode 100644
index 0000000..40da79b
--- /dev/null
+++ b/Add.lua
@@ -0,0 +1,54 @@
+local Add, parent = torch.class('nn.Add', 'nn.Module')
+
+function Add:__init(inputSize,scalar)
+   parent.__init(self)
+  
+   local size = inputSize
+   if scalar then size=1 end
+   self.bias = torch.Tensor(size)
+   self.gradBias = torch.Tensor(size)
+     
+   -- state
+   self.gradInput:resize(inputSize)
+   self.output:resize(inputSize) 
+
+   self:reset()
+end
+
+function Add:reset(stdv)
+   if stdv then 
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1./math.sqrt(self.bias:size(1))
+   end
+
+   for i=1,self.bias:size(1) do
+      self.bias[i] = torch.uniform(-stdv, stdv)
+   end
+end
+
+function Add:updateOutput(input)
+   self.output:copy(input);
+   if self.gradBias:size(1)==1 then
+     self.output:add(self.bias[1]);
+   else
+     self.output:add(self.bias);
+   end
+   return self.output
+end 
+
+function Add:updateGradInput(input, gradOutput)
+   if self.gradInput then
+      self.gradInput:copy(gradOutput) 
+      return self.gradInput
+   end
+end
+
+function Add:accGradParameters(input, gradOutput, scale)
+   scale = scale or 1
+   if self.gradBias:size(1) == 1 then
+      self.gradBias[1] = self.gradBias[1] + scale*gradOutput:sumall();
+   else
+      self.gradBias:add(scale, gradOutput)
+   end
+end
diff --git a/CAddTable.lua b/CAddTable.lua
new file mode 100644
index 0000000..afe3568
--- /dev/null
+++ b/CAddTable.lua
@@ -0,0 +1,24 @@
+
+local CAddTable, parent = torch.class('nn.CAddTable', 'nn.Module')
+
+function CAddTable:__init()
+   parent.__init(self)
+   self.gradInput = {}
+end
+
+function CAddTable:updateOutput(input)
+   self.output:resizeAs(input[1]):copy(input[1])
+   for i=2,#input do
+      self.output:add(input[i])
+   end
+   return self.output
+end
+
+function CAddTable:updateGradInput(input, gradOutput)
+   for i=1,#input do
+      self.gradInput[i] = self.gradInput[i] or torch.Tensor()
+      self.gradInput[i]:resizeAs(input[i])
+      self.gradInput[i]:copy(gradOutput)
+   end
+   return self.gradInput
+end
diff --git a/CDivTable.lua b/CDivTable.lua
new file mode 100644
index 0000000..f91d024
--- /dev/null
+++ b/CDivTable.lua
@@ -0,0 +1,21 @@
+
+local CDivTable, parent = torch.class('nn.CDivTable', 'nn.Module')
+
+function CDivTable:__init()
+   parent.__init(self)
+   self.gradInput = {}
+end
+
+function CDivTable:updateOutput(input)
+   self.output:resizeAs(input[1]):copy(input[1])
+   self.output:cdiv(input[2])
+   return self.output
+end
+
+function CDivTable:updateGradInput(input, gradOutput)
+   self.gradInput[1] = self.gradInput[1] or torch.Tensor()
+   self.gradInput[2] = self.gradInput[2] or torch.Tensor()
+   self.gradInput[1]:resizeAs(input[1]):copy(gradOutput):cdiv(input[2])
+   self.gradInput[2]:resizeAs(input[2]):zero():addcdiv(-1,self.gradInput[1],input[2]):cmul(input[1])
+   return self.gradInput
+end
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..75239ad
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,9 @@
+SET(src init.c)
+
+FILE(GLOB luasrc *.lua)
+SET(luasrc ${luasrc} test/test.lua)
+
+ADD_TORCH_PACKAGE(nn "${src}" "${luasrc}" "Machine Learning")
+ADD_TORCH_DOK(dok nn "Machine Learning" "Neural Networks" 3.1)
+
+TARGET_LINK_LIBRARIES(nn luaT TH)
diff --git a/CMul.lua b/CMul.lua
new file mode 100644
index 0000000..9b59944
--- /dev/null
+++ b/CMul.lua
@@ -0,0 +1,36 @@
+local CMul, parent = torch.class('nn.CMul', 'nn.Module')
+
+function CMul:__init(inputSize)
+   parent.__init(self)
+  
+   self.weight = torch.Tensor(inputSize)
+   self.gradWeight = torch.Tensor(inputSize)
+   
+   -- state
+   self.gradInput:resize(inputSize)
+   self.output:resize(inputSize) 
+
+   self:reset()
+end
+ 
+function CMul:reset()
+   self.weight:fill(1)
+end
+
+function CMul:updateOutput(input)
+   self.output:copy(input);
+   self.output:cmul(self.weight);
+   return self.output
+end
+
+function CMul:updateGradInput(input, gradOutput)
+   if self.gradInput then
+      self.gradInput:zero()
+      self.gradInput:addcmul(1, self.weight, gradOutput)
+      return self.gradInput
+   end
+end
+
+function CMul:accGradParameters(input, gradOutput, scale)
+   self.gradWeight:addcmul(scale or 1, input, gradOutput)
+end
diff --git a/CMulTable.lua b/CMulTable.lua
new file mode 100644
index 0000000..4c058b6
--- /dev/null
+++ b/CMulTable.lua
@@ -0,0 +1,26 @@
+
+local CMulTable, parent = torch.class('nn.CMulTable', 'nn.Module')
+
+function CMulTable:__init()
+   parent.__init(self)
+   self.gradInput = {}
+end
+
+function CMulTable:updateOutput(input)
+   self.output:resizeAs(input[1]):copy(input[1])
+   for i=2,#input do
+      self.output:cmul(input[i])
+   end
+   return self.output
+end
+
+function CMulTable:updateGradInput(input, gradOutput)
+   local tout = torch.Tensor():resizeAs(self.output)
+   for i=1,#input do
+      self.gradInput[i] = self.gradInput[i] or torch.Tensor()
+      self.gradInput[i]:resizeAs(input[i]):copy(gradOutput)
+      tout:copy(self.output):cdiv(input[i])
+      self.gradInput[i]:cmul(tout)
+   end
+   return self.gradInput
+end
diff --git a/CSubTable.lua b/CSubTable.lua
new file mode 100644
index 0000000..ffc495b
--- /dev/null
+++ b/CSubTable.lua
@@ -0,0 +1,21 @@
+
+local CSubTable, parent = torch.class('nn.CSubTable', 'nn.Module')
+
+function CSubTable:__init()
+   parent.__init(self)
+   self.gradInput = {}
+end
+
+function CSubTable:updateOutput(input)
+   self.output:resizeAs(input[1]):copy(input[1])
+   self.output:add(-1,input[2])
+   return self.output
+end
+
+function CSubTable:updateGradInput(input, gradOutput)
+   self.gradInput[1] = self.gradInput[1] or torch.Tensor()
+   self.gradInput[2] = self.gradInput[2] or torch.Tensor()
+   self.gradInput[1]:resizeAs(input[1]):copy(gradOutput)
+   self.gradInput[2]:resizeAs(input[1]):copy(gradOutput):mul(-1)
+   return self.gradInput
+end
diff --git a/ClassNLLCriterion.lua b/ClassNLLCriterion.lua
new file mode 100644
index 0000000..7ac48f4
--- /dev/null
+++ b/ClassNLLCriterion.lua
@@ -0,0 +1,44 @@
+local ClassNLLCriterion, parent = torch.class('nn.ClassNLLCriterion', 'nn.Criterion')
+
+function ClassNLLCriterion:__init()
+   parent.__init(self)
+   self.sizeAverage = true
+end
+
+function ClassNLLCriterion:updateOutput(input, target)
+   if input:dim() == 1 then
+      self.output = -input[target]
+   elseif input:dim() == 2 then
+      local output = 0
+      for i=1,target:size(1) do
+         output = output - input[i][target[i]]
+      end
+      if self.sizeAverage then
+         output = output / target:size(1)
+      end
+      self.output = output
+   else
+      error('matrix or vector expected')
+   end
+   return self.output
+end
+
+function ClassNLLCriterion:updateGradInput(input, target)
+   self.gradInput:resizeAs(input)
+   self.gradInput:zero()
+
+  if input:dim() == 1 then
+      self.gradInput[target] = -1
+   else
+      local z = -1
+      if self.sizeAverage then
+         z = z / target:size(1)
+      end
+      local gradInput = self.gradInput
+      for i=1,target:size(1) do
+         gradInput[i][target[i]] = z
+      end
+   end
+
+   return self.gradInput
+end
diff --git a/Concat.lua b/Concat.lua
new file mode 100644
index 0000000..616c394
--- /dev/null
+++ b/Concat.lua
@@ -0,0 +1,119 @@
+local Concat, parent = torch.class('nn.Concat', 'nn.Module')
+
+function Concat:__init(dimension)
+   parent.__init(self)
+   self.modules = {}
+   self.size = torch.LongStorage()
+   self.dimension = dimension
+end
+
+function Concat:add(module)
+   table.insert(self.modules, module)
+   return self
+end
+
+function Concat:get(index)
+   return self.modules[index]
+end
+
+function Concat:updateOutput(input)
+   for i=1,#self.modules do
+      local currentOutput = self.modules[i]:updateOutput(input)
+      
+      if i == 1 then
+         self.size:resize(currentOutput:dim()):copy(currentOutput:size())
+      else
+         self.size[self.dimension] = self.size[self.dimension] + currentOutput:size(self.dimension)
+      end
+   end
+   self.output:resize(self.size)
+   
+   local offset = 1
+   for _,module in ipairs(self.modules) do
+      local currentOutput = module:updateOutput(input)
+      self.output:narrow(self.dimension, offset, currentOutput:size(self.dimension)):copy(currentOutput)
+      offset = offset + currentOutput:size(self.dimension)
+   end
+   return self.output
+end
+
+function Concat:updateGradInput(input, gradOutput)
+   self.gradInput:resizeAs(input)
+
+   local offset = 1
+   for i,module in ipairs(self.modules) do
+      local currentOutput = module.output
+      local currentGradInput = module:updateGradInput(input, gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)))
+        
+      if i==1 then
+         self.gradInput:copy(currentGradInput)
+      else
+         self.gradInput:add(currentGradInput)
+      end
+      offset = offset + currentOutput:size(self.dimension)
+   end
+   return self.gradInput
+end
+
+function Concat:accGradParameters(input, gradOutput, scale)
+   scale = scale or 1
+   local offset = 1
+   for i,module in ipairs(self.modules) do
+      local currentOutput = module.output
+      local currentGradInput = module:accGradParameters(input,
+                                                        gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)),
+                                                        scale)
+      offset = offset + currentOutput:size(self.dimension)
+   end
+end
+
+function Concat:accUpdateGradParameters(input, gradOutput, lr)
+   local offset = 1
+   for i,module in ipairs(self.modules) do
+      local currentOutput = module.output
+      local currentGradInput = module:accUpdateGradParameters(input,
+                                                              gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)),
+                                                              lr)
+      offset = offset + currentOutput:size(self.dimension)
+   end
+end
+
+function Concat:zeroGradParameters()
+   for _,module in ipairs(self.modules) do
+      module:zeroGradParameters()
+   end
+end
+
+function Concat:updateParameters(learningRate)
+   for _,module in ipairs(self.modules) do
+      module:updateParameters(learningRate)
+   end
+end
+
+function Concat:share(mlp,...)
+   for i=1,#self.modules do
+      self.modules[i]:share(mlp.modules[i],...); 
+   end
+end
+
+function Concat:parameters()
+   local function tinsert(to, from)
+      if type(from) == 'table' then
+         for i=1,#from do
+            tinsert(to,from[i])
+         end
+      else
+         table.insert(to,from)
+      end
+   end
+   local w = {}
+   local gw = {}
+   for i=1,#self.modules do
+      local mw,mgw = self.modules[i]:parameters()
+      if mw then
+         tinsert(w,mw)
+         tinsert(gw,mgw)
+      end
+   end
+   return w,gw
+end
diff --git a/ConcatTable.lua b/ConcatTable.lua
new file mode 100644
index 0000000..730d95e
--- /dev/null
+++ b/ConcatTable.lua
@@ -0,0 +1,72 @@
+local ConcatTable, parent = torch.class('nn.ConcatTable', 'nn.Module')
+
+function ConcatTable:__init()
+   parent.__init(self)
+   self.modules = {}
+   self.output = {}
+end
+
+function ConcatTable:add(module)
+   table.insert(self.modules, module)
+   return self
+end
+
+function ConcatTable:get(index)
+   return self.modules[index]
+end
+
+function ConcatTable:size()
+   return #self.modules 
+end
+
+function ConcatTable:updateOutput(input)
+   for i=1,#self.modules do
+      self.output[i] = self.modules[i]:updateOutput(input)
+   end
+   return self.output
+end
+
+function ConcatTable:updateGradInput(input, gradOutput)
+   for i,module in ipairs(self.modules) do
+      local currentGradInput = module:updateGradInput(input, gradOutput[i])
+      if i == 1 then
+         self.gradInput:resizeAs(currentGradInput):copy(currentGradInput)
+      else
+         self.gradInput:add(currentGradInput)
+      end
+   end
+   return self.gradInput
+end
+
+function ConcatTable:accGradParameters(input, gradOutput, scale)
+   scale = scale or 1
+   for i,module in ipairs(self.modules) do
+      module:accGradParameters(input, gradOutput[i], scale)
+   end
+end
+
+function ConcatTable:accUpdateGradParameters(input, gradOutput, lr)
+   for i,module in ipairs(self.modules) do
+      module:accUpdateGradParameters(input, gradOutput[i], lr)
+   end
+end
+
+function ConcatTable:zeroGradParameters()
+   for _,module in ipairs(self.modules) do
+      module:zeroGradParameters()
+   end
+end
+
+function ConcatTable:updateParameters(learningRate)
+   for _,module in ipairs(self.modules) do
+      module:updateParameters(learningRate)
+   end
+end
+
+function ConcatTable:share(mlp,...)
+   for i=1,#self.modules do
+      self.modules[i]:share(mlp.modules[i],...); 
+   end
+end
+
+
diff --git a/Copy.lua b/Copy.lua
new file mode 100644
index 0000000..7b6eeb3
--- /dev/null
+++ b/Copy.lua
@@ -0,0 +1,33 @@
+local Copy, parent = torch.class('nn.Copy', 'nn.Module')
+
+function Copy:__init(intype, outtype)
+   intype = intype or torch.getmetatable(torch.Tensor.__typename)
+   outtype = outtype or torch.getmetatable(torch.Tensor.__typename)
+
+   parent.__init(self)
+   self.gradInput = torch.getmetatable(intype).new()
+   self.output = torch.getmetatable(outtype).new()
+
+   if intype == outtype then
+
+      self.updateOutput = function(self, input)
+                        self.output = input
+                        return input
+                     end
+
+      self.updateGradInput = function(self, input, gradOutput)
+                         self.gradInput = gradOutput
+                         return gradOutput
+                      end
+   end
+end
+
+function Copy:updateOutput(input)
+   self.output:resize(input:size()):copy(input)
+   return self.output
+end
+
+function Copy:updateGradInput(input, gradOutput)
+   self.gradInput:resize(gradOutput:size()):copy(gradOutput)
+   return self.gradInput
+end
diff --git a/CosineDistance.lua b/CosineDistance.lua
new file mode 100644
index 0000000..061ff92
--- /dev/null
+++ b/CosineDistance.lua
@@ -0,0 +1,40 @@
+local CosineDistance, parent = torch.class('nn.CosineDistance', 'nn.Module')
+
+function CosineDistance:__init()
+   parent.__init(self)
+   self.gradInput = {torch.Tensor(), torch.Tensor()}
+   self.output=torch.Tensor(1)
+end 
+ 
+function CosineDistance:updateOutput(input)
+   local input1, input2 = input[1], input[2]
+   self.w1 = input1:dot(input2)
+   self.w22 = input1:dot(input1)
+   self.w2 = math.sqrt(self.w22)
+   self.w32 = input2:dot(input2)
+   self.w3 = math.sqrt(self.w32)
+   self.output[1] = self.w1/self.w2/self.w3
+   return self.output
+end
+
+function CosineDistance:updateGradInput(input, gradOutput)
+   local v1  = input[1]
+   local v2  = input[2]
+   local gw1 = input[1].new()
+   local gw2 = input[2].new()
+   gw1:resizeAs(v1) 
+   gw2:resizeAs(v1)
+
+   gw1:zero()
+   gw1:add(1/(self.w2*self.w3), v2)
+   gw1:add(-self.w1/(self.w22*self.w2*self.w3), v1)
+   
+   gw2:zero()
+   gw2:add(1/(self.w2*self.w3), v1)
+   gw2:add(-self.w1/(self.w32*self.w2*self.w3), v2)
+
+   gw1:mul(gradOutput[1])
+   gw2:mul(gradOutput[1])
+   self.gradInput = {gw1, gw2}
+   return self.gradInput
+end
diff --git a/CosineEmbeddingCriterion.lua b/CosineEmbeddingCriterion.lua
new file mode 100644
index 0000000..a9ee2e0
--- /dev/null
+++ b/CosineEmbeddingCriterion.lua
@@ -0,0 +1,54 @@
+local CosineEmbeddingCriterion, parent = torch.class('nn.CosineEmbeddingCriterion', 'nn.Module')
+
+function CosineEmbeddingCriterion:__init(margin)
+   parent.__init(self)
+   margin=margin or 0
+   self.margin = margin 
+   self.gradInput = {torch.Tensor(), torch.Tensor()}
+end 
+ 
+function CosineEmbeddingCriterion:updateOutput(input,y)
+   local input1, input2 = input[1], input[2]
+   self.w1 = input1:dot(input2)
+   self.w22 = input1:dot(input1)
+   self.w2 = math.sqrt(self.w22)
+   self.w32 = input2:dot(input2)
+   self.w3 = math.sqrt(self.w32)
+   self.output = self.w1/self.w2/self.w3
+   if y==-1 then
+      self.output = math.max(0, self.output - self.margin);
+   else
+      self.output = 1 - self.output
+   end
+   return self.output
+end
+
+local function mathsign(t)
+   if t>0 then return 1; end
+   if t<0 then return -1; end
+   return 2*torch.random(2)-3;
+end
+
+function CosineEmbeddingCriterion:updateGradInput(input, y)
+   local v1  = input[1]
+   local v2  = input[2]
+   local gw1 = input[1].new()
+   local gw2 = input[2].new()
+   gw1:resizeAs(v1) 
+   gw2:resizeAs(v1)
+
+   gw1:zero()
+   gw1:add(1/(self.w2*self.w3), v2)
+   gw1:add(-self.w1/(self.w22*self.w2*self.w3), v1)
+   
+   gw2:zero()
+   gw2:add(1/(self.w2*self.w3), v1)
+   gw2:add(-self.w1/(self.w32*self.w2*self.w3), v2)
+
+   if y == 1 then
+      gw1 = -gw1
+      gw2 = -gw2
+   end
+   self.gradInput = {gw1, gw2}
+   return self.gradInput
+end
diff --git a/Criterion.lua b/Criterion.lua
new file mode 100644
index 0000000..6513414
--- /dev/null
+++ b/Criterion.lua
@@ -0,0 +1,51 @@
+local Criterion = torch.class('nn.Criterion')
+
+function Criterion:__init()
+   self.gradInput = torch.Tensor()
+   self.output = 0
+end
+
+function Criterion:updateOutput(input, target)
+end
+
+function Criterion:forward(input, target)
+   return self:updateOutput(input, target)
+end
+
+function Criterion:backward(input, target)
+   return self:updateGradInput(input, target)
+end
+
+function Criterion:updateGradInput(input, target)
+end
+
+function Criterion:clone()
+   local f = torch.MemoryFile("rw"):binary()
+   f:writeObject(self)
+   f:seek(1)
+   local clone = f:readObject()
+   f:close()
+   return clone
+end
+
+function Criterion:type(type)
+   -- find all tensors and convert them
+   for key,param in pairs(self) do
+      if torch.typename(param) and torch.typename(param):find('torch%..+Tensor') then
+         self[key] = param:type(type)
+      end
+   end
+   return self
+end
+
+function Criterion:float()
+   return self:type('torch.FloatTensor')
+end
+
+function Criterion:double()
+   return self:type('torch.DoubleTensor')
+end
+
+function Criterion:cuda()
+   return self:type('torch.CudaTensor')
+end
diff --git a/CriterionTable.lua b/CriterionTable.lua
new file mode 100644
index 0000000..e5538f7
--- /dev/null
+++ b/CriterionTable.lua
@@ -0,0 +1,16 @@
+local CriterionTable, parent = torch.class('nn.CriterionTable', 'nn.Module')
+
+function CriterionTable:__init(criterion)
+   self.criterion = criterion
+   self.gradInput = {criterion.gradInput}
+end
+
+function CriterionTable:updateOutput(input) 
+   self.output = self.criterion:updateOutput(unpack(input))
+   return self.output
+end
+    
+function CriterionTable:updateGradInput(input, gradOutput)
+  self.criterion:updateGradInput(unpack(input))
+  return self.gradInput
+end 
diff --git a/DotProduct.lua b/DotProduct.lua
new file mode 100644
index 0000000..d16d295
--- /dev/null
+++ b/DotProduct.lua
@@ -0,0 +1,29 @@
+local DotProduct, parent = torch.class('nn.DotProduct', 'nn.Module')
+
+function DotProduct:__init()
+   parent.__init(self)
+   self.gradInput = {torch.Tensor(), torch.Tensor()}
+   self.output=torch.Tensor(1)
+end 
+ 
+function DotProduct:updateOutput(input,y)
+   self.output[1] = input[1]:dot(input[2])
+   return self.output
+end
+
+function DotProduct:updateGradInput(input, gradOutput)
+   local v1 = input[1]
+   local v2 = input[2]
+   local gw1=self.gradInput[1];
+   local gw2=self.gradInput[2];
+   gw1:resizeAs(v1) 
+   gw2:resizeAs(v1)
+
+   gw1:copy( v2)
+   gw1:mul(gradOutput[1])
+   
+   gw2:copy( v1)
+   gw2:mul(gradOutput[1])
+
+   return self.gradInput
+end
diff --git a/Euclidean.lua b/Euclidean.lua
new file mode 100644
index 0000000..808b7ab
--- /dev/null
+++ b/Euclidean.lua
@@ -0,0 +1,64 @@
+local Euclidean, parent = torch.class('nn.Euclidean', 'nn.Module')
+
+function Euclidean:__init(inputSize,outputSize)
+   parent.__init(self)
+
+   self.weight = torch.Tensor(inputSize,outputSize)
+   self.gradWeight = torch.Tensor(inputSize,outputSize)
+
+   -- state
+   self.gradInput:resize(inputSize)
+   self.output:resize(outputSize)
+   self.temp = torch.Tensor(inputSize)
+
+   self:reset()
+end
+
+function Euclidean:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1./math.sqrt(self.weight:size(1))
+   end
+
+   for i=1,self.weight:size(2) do
+      self.weight:select(2, i):apply(function()
+                                        return torch.uniform(-stdv, stdv)
+                                     end)
+   end
+end
+
+function Euclidean:updateOutput(input)
+   self.output:zero()
+   for o = 1,self.weight:size(2) do
+      self.output[o] = input:dist(self.weight:select(2,o))
+   end
+   return self.output
+end
+
+function Euclidean:updateGradInput(input, gradOutput)
+   self:updateOutput(input)
+   if self.gradInput then
+      self.gradInput:zero()
+      for o = 1,self.weight:size(2) do
+         if self.output[o] ~= 0 then
+            self.temp:copy(input):add(-1,self.weight:select(2,o))
+            self.temp:mul(gradOutput[o]/self.output[o])
+            self.gradInput:add(self.temp)
+         end
+      end
+      return self.gradInput
+   end
+end
+
+function Euclidean:accGradParameters(input, gradOutput, scale)
+   self:updateOutput(input)
+   scale = scale or 1
+   for o = 1,self.weight:size(2) do
+      if self.output[o] ~= 0 then
+         self.temp:copy(self.weight:select(2,o)):add(-1,input)
+         self.temp:mul(gradOutput[o]/self.output[o])
+         self.gradWeight:select(2,o):add(self.temp)
+      end
+   end
+end
diff --git a/Exp.lua b/Exp.lua
new file mode 100644
index 0000000..c4df86c
--- /dev/null
+++ b/Exp.lua
@@ -0,0 +1,9 @@
+local Exp = torch.class('nn.Exp', 'nn.Module')
+
+function Exp:updateOutput(input)
+   return input.nn.Exp_updateOutput(self, input)
+end
+
+function Exp:updateGradInput(input, gradOutput)
+   return input.nn.Exp_updateGradInput(self, input, gradOutput)
+end
diff --git a/HardShrink.lua b/HardShrink.lua
new file mode 100644
index 0000000..7dfeaca
--- /dev/null
+++ b/HardShrink.lua
@@ -0,0 +1,16 @@
+local HardShrink, parent = torch.class('nn.HardShrink', 'nn.Module')
+
+function HardShrink:__init(lam)
+   parent.__init(self)
+   self.lambda = lam or 0.5
+end
+
+function HardShrink:updateOutput(input)
+   input.nn.HardShrink_updateOutput(self, input)
+   return self.output
+end
+
+function HardShrink:updateGradInput(input, gradOutput)
+   input.nn.HardShrink_updateGradInput(self, input, gradOutput)
+   return self.gradInput
+end
diff --git a/HardTanh.lua b/HardTanh.lua
new file mode 100644
index 0000000..3391479
--- /dev/null
+++ b/HardTanh.lua
@@ -0,0 +1,9 @@
+local HardTanh = torch.class('nn.HardTanh', 'nn.Module')
+
+function HardTanh:updateOutput(input)
+   return input.nn.HardTanh_updateOutput(self, input)
+end
+
+function HardTanh:updateGradInput(input, gradOutput)
+   return input.nn.HardTanh_updateGradInput(self, input, gradOutput)
+end
diff --git a/HingeEmbeddingCriterion.lua b/HingeEmbeddingCriterion.lua
new file mode 100644
index 0000000..e88ef82
--- /dev/null
+++ b/HingeEmbeddingCriterion.lua
@@ -0,0 +1,26 @@
+local HingeEmbeddingCriterion, parent = 
+	torch.class('nn.HingeEmbeddingCriterion', 'nn.Module')
+
+function HingeEmbeddingCriterion:__init(margin)
+   parent.__init(self)
+   margin=margin or 1 
+   self.margin = margin 
+   self.gradInput = torch.Tensor(1)
+end 
+ 
+function HingeEmbeddingCriterion:updateOutput(input,y)
+   self.output=input[1]
+   if y==-1 then
+	 self.output = math.max(0,self.margin - self.output);
+   end
+   return self.output
+end
+
+function HingeEmbeddingCriterion:updateGradInput(input, y)
+  self.gradInput[1]=y
+  local dist = input[1]
+  if y == -1 and  dist > self.margin then
+     self.gradInput[1]=0;
+  end
+  return self.gradInput 
+end
diff --git a/Identity.lua b/Identity.lua
new file mode 100644
index 0000000..79b5c08
--- /dev/null
+++ b/Identity.lua
@@ -0,0 +1,12 @@
+local Identity, parent = torch.class('nn.Identity', 'nn.Module')
+
+function Identity:updateOutput(input)
+   self.output = input
+   return self.output
+end
+
+
+function Identity:updateGradInput(input, gradOutput)
+   self.gradInput = gradOutput
+   return self.gradInput
+end
diff --git a/Jacobian.lua b/Jacobian.lua
new file mode 100644
index 0000000..04330ac
--- /dev/null
+++ b/Jacobian.lua
@@ -0,0 +1,239 @@
+nn.Jacobian = {}
+
+function nn.Jacobian.backward (module, input, param, dparam)
+   local doparam = 0
+   if param then
+      doparam = 1
+   end
+   param = param or input
+   -- output deriv
+   module:forward(input)
+   local dout = module.output.new():resizeAs(module.output)
+   -- 1D view
+   local sdout = module.output.new(dout:storage(),1,dout:nElement())
+   -- jacobian matrix to calculate
+   local jacobian = torch.Tensor(param:nElement(),dout:nElement()):zero()
+
+   for i=1,sdout:nElement() do
+      dout:zero()
+      sdout[i] = 1
+      module:zeroGradParameters()
+      local din = module:updateGradInput(input, dout)
+      module:accGradParameters(input, dout)
+      if doparam == 1 then
+	 jacobian:select(2,i):copy(dparam)
+      else
+	 jacobian:select(2,i):copy(din)
+      end
+   end
+   return jacobian
+end
+
+function nn.Jacobian.backwardUpdate (module, input, param)
+
+   -- output deriv
+   module:forward(input)
+   local dout = module.output.new():resizeAs(module.output)
+   -- 1D view
+   local sdout = module.output.new(dout:storage(),1,dout:nElement())
+   -- jacobian matrix to calculate
+   local jacobian = torch.Tensor(param:nElement(),dout:nElement()):zero()
+
+   -- original param
+   local origparam = param:clone()
+
+   for i=1,sdout:nElement() do
+      param:copy(origparam)
+      dout:zero()
+      sdout[i] = 1
+      local din = module:updateGradInput(input, dout)
+      module:accUpdateGradParameters(input, dout, 1)
+      jacobian:select(2,i):copy(param)
+   end
+
+   param:copy(origparam)
+
+   return jacobian
+end
+
+function nn.Jacobian.forward(module, input, param)
+   param = param or input
+   -- perturbation amount
+   local small = 1e-6
+   -- 1D view of input
+   local tst = param:storage()
+   local sin = param.new(tst,1,tst:size())
+   -- jacobian matrix to calculate
+   local jacobian = torch.Tensor():resize(param:nElement(),module:forward(input):nElement())
+   
+   local outa = torch.Tensor(jacobian:size(2))
+   local outb = torch.Tensor(jacobian:size(2))
+   
+   for i=1,sin:nElement() do      
+      sin[i] = sin[i] - small
+      outa:copy(module:forward(input))
+      sin[i] = sin[i] + 2*small
+      outb:copy(module:forward(input))
+      sin[i] = sin[i] - small
+
+      outb:add(-1,outa):div(2*small)
+      jacobian:select(1,i):copy(outb)
+   end
+
+   return jacobian
+end
+
+function nn.Jacobian.forwardUpdate(module, input, param)
+   -- perturbation amount
+   local small = 1e-6
+   -- 1D view of input
+   local tst = param:storage()
+   local sin = param.new(tst,1,tst:size())
+   -- jacobian matrix to calculate
+   local jacobian = torch.Tensor():resize(param:nElement(),module:forward(input):nElement())
+   
+   local outa = torch.Tensor(jacobian:size(2))
+   local outb = torch.Tensor(jacobian:size(2))
+   
+   for i=1,sin:nElement() do      
+      sin[i] = sin[i] - small
+      outa:copy(module:forward(input))
+      sin[i] = sin[i] + 2*small
+      outb:copy(module:forward(input))
+      sin[i] = sin[i] - small
+
+      outb:add(-1,outa):div(2*small)
+      jacobian:select(1,i):copy(outb)
+      jacobian:select(1,i):mul(-1)
+      jacobian:select(1,i):add(sin[i])
+   end
+   return jacobian
+end
+
+function nn.Jacobian.testJacobian (module, input, minval, maxval)
+   minval = minval or -2
+   maxval = maxval or 2
+   local inrange = maxval - minval
+   input:copy(torch.rand(input:nElement()):mul(inrange):add(minval))
+   local jac_fprop = nn.Jacobian.forward(module,input)
+   local jac_bprop = nn.Jacobian.backward(module,input)
+   local error = jac_fprop-jac_bprop
+   return error:abs():maxall()
+end
+
+function nn.Jacobian.testJacobianParameters (module, input, param, dparam, minval, maxval)
+   minval = minval or -2
+   maxval = maxval or 2
+   local inrange = maxval - minval
+   input:copy(torch.rand(input:nElement()):mul(inrange):add(minval))
+   param:copy(torch.rand(param:nElement()):mul(inrange):add(minval))
+   local jac_bprop = nn.Jacobian.backward(module, input, param, dparam)
+   local jac_fprop = nn.Jacobian.forward(module, input, param)
+   local error = jac_fprop - jac_bprop
+   return error:abs():maxall()
+end
+
+function nn.Jacobian.testJacobianUpdateParameters (module, input, param, minval, maxval)
+   minval = minval or -2
+   maxval = maxval or 2
+   local inrange = maxval - minval
+   input:copy(torch.rand(input:nElement()):mul(inrange):add(minval))
+   param:copy(torch.rand(param:nElement()):mul(inrange):add(minval))
+   local params_bprop = nn.Jacobian.backwardUpdate(module, input, param)
+   local params_fprop = nn.Jacobian.forwardUpdate(module, input, param)
+
+   local error = params_fprop - params_bprop
+   return error:abs():maxall()
+end
+
+function nn.Jacobian.testIO(module,input, minval, maxval)
+   minval = minval or -2
+   maxval = maxval or 2
+   local inrange = maxval - minval
+
+   -- run module
+   module:forward(input)
+   local go = module.output:clone():copy(torch.rand(module.output:nElement()):mul(inrange):add(minval))
+   module:updateGradInput(input,go)
+   module:accGradParameters(input,go)
+
+   local fo = module.output:clone()
+   local bo = module.gradInput:clone()
+
+   -- write module
+   local f = torch.DiskFile('tmp.bin','w'):binary()
+   f:writeObject(module)
+   f:close()
+   -- read module
+   local m = torch.DiskFile('tmp.bin'):binary():readObject()
+   m:forward(input)
+   m:updateGradInput(input,go)
+   m:accGradParameters(input,go)
+   -- cleanup
+   os.remove('tmp.bin')
+
+   local fo2 = m.output:clone()
+   local bo2 = m.gradInput:clone()
+
+   local errf = fo - fo2
+   local errb = bo - bo2
+   return errf:abs():maxall(), errb:abs():maxall()
+end
+
+function nn.Jacobian.testAllUpdate(module, input, weight, gradWeight)
+   local gradOutput
+   local lr = torch.uniform(0.1, 1)
+   local errors = {}
+
+   -- accGradParameters
+   local maccgp = module:clone()
+   local weightc = maccgp[weight]:clone()
+   maccgp:forward(input)
+   gradOutput = torch.rand(maccgp.output:size())
+   maccgp:zeroGradParameters()
+   maccgp:updateGradInput(input, gradOutput)
+   maccgp:accGradParameters(input, gradOutput)
+   maccgp:updateParameters(lr)
+   errors["accGradParameters"] = (weightc-maccgp[gradWeight]*lr-maccgp[weight]):norm()
+   
+   -- accUpdateGradParameters
+   local maccugp = module:clone()
+   maccugp:forward(input)
+   maccugp:updateGradInput(input, gradOutput)
+   maccugp:accUpdateGradParameters(input, gradOutput, lr)
+   errors["accUpdateGradParameters"] = (maccugp[weight]-maccgp[weight]):norm()
+
+   -- shared, accGradParameters
+   local macsh1 = module:clone()
+   local macsh2 = module:clone()
+   macsh2:share(macsh1, weight)
+   macsh1:forward(input)
+   macsh2:forward(input)
+   macsh1:zeroGradParameters()
+   macsh2:zeroGradParameters()
+   macsh1:updateGradInput(input, gradOutput)
+   macsh2:updateGradInput(input, gradOutput)
+   macsh1:accGradParameters(input, gradOutput)
+   macsh2:accGradParameters(input, gradOutput)
+   macsh1:updateParameters(lr)
+   macsh2:updateParameters(lr)
+   local err = (weightc-maccgp[gradWeight]*(lr*2)-macsh1[weight]):norm()
+   err = err + (weightc-maccgp[gradWeight]*(lr*2)-macsh2[weight]):norm()
+   errors["accGradParameters [shared]"] = err
+   
+   -- shared, accUpdateGradParameters
+   local macshu1 = module:clone()
+   local macshu2 = module:clone()
+   macshu2:share(macshu1, weight)
+   macshu1:forward(input)
+   macshu2:forward(input)
+   macshu1:updateGradInput(input, gradOutput)
+   macshu2:updateGradInput(input, gradOutput)
+   macshu1:accUpdateGradParameters(input, gradOutput, lr)
+   macshu2:accUpdateGradParameters(input, gradOutput, lr)
+   local err = (weightc-maccgp[gradWeight]*(lr*2)-macshu1[weight]):norm()
+   err = err + (weightc-maccgp[gradWeight]*(lr*2)-macshu2[weight]):norm()
+   errors["accUpdateGradParameters [shared]"] = err
+
+   return errors
+end
diff --git a/JoinTable.lua b/JoinTable.lua
new file mode 100644
index 0000000..dc20246
--- /dev/null
+++ b/JoinTable.lua
@@ -0,0 +1,50 @@
+local JoinTable, parent = torch.class('nn.JoinTable', 'nn.Module')
+
+function JoinTable:__init(dimension)
+   parent.__init(self)
+   self.size = torch.LongStorage()
+   self.dimension = dimension
+   self.gradInput = {}
+end 
+
+function JoinTable:updateOutput(input) 
+   for i=1,#input do
+      local currentOutput = input[i]
+      if i == 1 then
+         self.size:resize(currentOutput:dim()):copy(currentOutput:size())
+      else
+         self.size[self.dimension] = self.size[self.dimension] 
+            + currentOutput:size(self.dimension)
+      end 
+   end
+   self.output:resize(self.size)
+   
+   local offset = 1  
+   for i=1,#input do
+      local currentOutput = input[i]
+      self.output:narrow(self.dimension, offset, 
+			 currentOutput:size(self.dimension)):copy(currentOutput)
+      offset = offset + currentOutput:size(self.dimension)
+   end
+   return self.output
+
+end
+
+function JoinTable:updateGradInput(input, gradOutput)
+   for i=1,#input do 
+      if self.gradInput[i] == nil then
+         self.gradInput[i] = input[i].new()
+      end
+      self.gradInput[i]:resizeAs(input[i])
+   end
+
+   local offset = 1
+   for i=1,#input do
+      local currentOutput = input[i] 
+      local currentGradInput = gradOutput:narrow(self.dimension, offset, 
+					  currentOutput:size(self.dimension))
+      self.gradInput[i]:copy(currentGradInput)
+      offset = offset + currentOutput:size(self.dimension)
+   end
+   return self.gradInput
+end
diff --git a/L1HingeEmbeddingCriterion.lua b/L1HingeEmbeddingCriterion.lua
new file mode 100644
index 0000000..5aa1ae7
--- /dev/null
+++ b/L1HingeEmbeddingCriterion.lua
@@ -0,0 +1,41 @@
+local L1HingeEmbeddingCriterion, parent = torch.class('nn.L1HingeEmbeddingCriterion', 'nn.Module')
+
+function L1HingeEmbeddingCriterion:__init(margin)
+   parent.__init(self)
+   margin=margin or 1
+   self.margin = margin 
+   self.gradInput = {torch.Tensor(), torch.Tensor()}
+end 
+ 
+function L1HingeEmbeddingCriterion:updateOutput(input,y)
+   self.output=input[1]:dist(input[2],1);
+   if y==-1 then
+	 self.output = math.max(0,self.margin - self.output);
+   end
+   return self.output
+end
+
+
+local function mathsign(t)
+   if t>0 then return 1; end
+   if t<0 then return -1; end
+   return 2*torch.random(2)-3;
+end
+
+function L1HingeEmbeddingCriterion:updateGradInput(input, y)
+  self.gradInput[1]:resizeAs(input[1]) 
+  self.gradInput[2]:resizeAs(input[2])
+  self.gradInput[1]:copy(input[1])
+  self.gradInput[1]:add(-1, input[2])
+  local dist = self.gradInput[1]:norm(1);
+  self.gradInput[1]:apply(mathsign)    -- L1 gradient
+  if y == -1 then -- just to avoid a mul by 1
+   if dist > self.margin then
+     self.gradInput[1]:zero()
+   else
+     self.gradInput[1]:mul(-1)
+   end
+  end
+  self.gradInput[2]:zero():add(-1, self.gradInput[1])
+  return self.gradInput
+end
diff --git a/Linear.lua b/Linear.lua
new file mode 100644
index 0000000..953af78
--- /dev/null
+++ b/Linear.lua
@@ -0,0 +1,82 @@
+local Linear, parent = torch.class('nn.Linear', 'nn.Module')
+
+function Linear:__init(inputSize, outputSize)
+   parent.__init(self)
+
+   self.weight = torch.Tensor(outputSize, inputSize)
+   self.bias = torch.Tensor(outputSize)
+   self.gradWeight = torch.Tensor(outputSize, inputSize)
+   self.gradBias = torch.Tensor(outputSize)
+   
+   self:reset()
+end
+
+function Linear:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1./math.sqrt(self.weight:size(2))
+   end
+
+   -- we do this so the initialization is exactly
+   -- the same than in previous torch versions
+   for i=1,self.weight:size(1) do
+      self.weight:select(1, i):apply(function()
+                                        return torch.uniform(-stdv, stdv)
+                                     end)
+      self.bias[i] = torch.uniform(-stdv, stdv)
+   end
+end
+
+function Linear:updateOutput(input)
+   if input:dim() == 1 then
+      self.output:resize(self.bias:size(1))
+      self.output:copy(self.bias)
+      self.output:addmv(1, self.weight, input)
+   elseif input:dim() == 2 then
+      local nframe = input:size(1)
+      local nunit = self.bias:size(1)
+
+      self.output:resize(nframe, nunit)
+      self.output:zero():addr(1, input.new(nframe):fill(1), self.bias)
+      self.output:addmm(1, input, self.weight:t())
+   else
+      error('input must be vector or matrix')
+   end
+
+   return self.output
+end
+
+function Linear:updateGradInput(input, gradOutput)
+   if self.gradInput then
+
+      if input:dim() == 1 then
+         self.gradInput:resizeAs(input)
+         self.gradInput:addmv(0, 1, self.weight:t(), gradOutput)
+      elseif input:dim() == 2 then
+         self.gradInput:resizeAs(input)
+         self.gradInput:addmm(0, 1, gradOutput, self.weight)
+      end
+
+      return self.gradInput
+   end
+end
+
+function Linear:accGradParameters(input, gradOutput, scale)
+   scale = scale or 1
+
+   if input:dim() == 1 then
+      self.gradWeight:addr(scale, gradOutput, input)
+      self.gradBias:add(scale, gradOutput)      
+   elseif input:dim() == 2 then
+      local nframe = input:size(1)
+      local nunit = self.bias:size(1)
+
+      self.gradWeight:addmm(scale, gradOutput:t(), input)
+      self.gradBias:addmv(scale, gradOutput:t(), input.new(nframe):fill(1))
+   end
+
+end
+
+-- we do not need to accumulate parameters when sharing
+Linear.sharedAccUpdateGradParameters = Linear.accUpdateGradParameters
diff --git a/LogSigmoid.lua b/LogSigmoid.lua
new file mode 100644
index 0000000..7485ae6
--- /dev/null
+++ b/LogSigmoid.lua
@@ -0,0 +1,14 @@
+local LogSigmoid, parent = torch.class('nn.LogSigmoid', 'nn.Module')
+
+function LogSigmoid:__init()
+   parent.__init(self)
+   self.buffer = torch.Tensor()
+end
+
+function LogSigmoid:updateOutput(input)
+   return input.nn.LogSigmoid_updateOutput(self, input)
+end
+
+function LogSigmoid:updateGradInput(input, gradOutput)
+   return input.nn.LogSigmoid_updateGradInput(self, input, gradOutput)
+end
diff --git a/LogSoftMax.lua b/LogSoftMax.lua
new file mode 100644
index 0000000..8d2947e
--- /dev/null
+++ b/LogSoftMax.lua
@@ -0,0 +1,9 @@
+local LogSoftMax = torch.class('nn.LogSoftMax', 'nn.Module')
+
+function LogSoftMax:updateOutput(input)
+   return input.nn.LogSoftMax_updateOutput(self, input)
+end
+
+function LogSoftMax:updateGradInput(input, gradOutput)
+   return input.nn.LogSoftMax_updateGradInput(self, input, gradOutput)
+end
diff --git a/LookupTable.lua b/LookupTable.lua
new file mode 100644
index 0000000..115f19c
--- /dev/null
+++ b/LookupTable.lua
@@ -0,0 +1,76 @@
+local LookupTable, parent = torch.class('nn.LookupTable', 'nn.Module')
+
+LookupTable.__version = 2
+
+function LookupTable:__init(nIndex, ...)
+   parent.__init(self)
+
+   if select('#', ...) == 1 and type(select(1, ...)) ~= "number" then
+      local size = select(1, ...)
+      self.size = torch.LongStorage(#size + 1)
+      for i=1,#size do
+         self.size[i+1] = size[i]
+      end
+   else
+      self.size = torch.LongStorage(select('#', ...)+1)
+      for i=1,select('#',...) do
+         self.size[i+1] = select(i, ...)
+      end
+   end
+
+   self.size[1] = nIndex
+   self.weight = torch.Tensor(self.size)
+   self.gradWeight = torch.Tensor(self.size):zero()
+   self.inputs = {}
+
+   self:reset()
+end
+
+function LookupTable:reset(stdv)
+   stdv = stdv or 1
+   self.weight:apply(function()
+                        return torch.normal(0, stdv)
+                     end)
+end
+
+function LookupTable:updateOutput(input)
+   local nIndex = input:size(1)
+   self.size[1] = nIndex
+   self.output:resize(self.size)
+
+   for i=1,nIndex do
+      self.output:select(1, i):copy(self.weight:select(1, input[i]))
+   end
+
+   return self.output
+end
+
+function LookupTable:zeroGradParameters()
+   for k,_ in pairs(self.inputs) do
+      self.gradWeight:select(1, k):zero()
+   end
+   self.inputs = {}
+end
+
+function LookupTable:accGradParameters(input, gradOutput, scale)
+   for i=1,input:size(1) do
+      local k = input[i]
+      self.inputs[k] = true
+      self.gradWeight:select(1, k):add(scale, gradOutput:select(1, i))
+   end
+end
+
+function LookupTable:accUpdateGradParameters(input, gradOutput, lr)
+   for i=1,input:size(1) do
+      self.weight:select(1, input[i]):add(-lr, gradOutput:select(1, i))
+   end
+end
+
+function LookupTable:updateParameters(learningRate)
+   for k,_ in pairs(self.inputs) do
+      self.weight:select(1, k):add(-learningRate, self.gradWeight:select(1, k))
+   end
+end
+
+-- we do not need to accumulate parameters when sharing
+LookupTable.sharedAccUpdateGradParameters = LookupTable.accUpdateGradParameters
diff --git a/MSECriterion.lua b/MSECriterion.lua
new file mode 100644
index 0000000..655c74f
--- /dev/null
+++ b/MSECriterion.lua
@@ -0,0 +1,14 @@
+local MSECriterion, parent = torch.class('nn.MSECriterion', 'nn.Criterion')
+
+function MSECriterion:__init()
+   parent.__init(self)
+   self.sizeAverage = true
+end
+
+function MSECriterion:updateOutput(input, target)
+   return input.nn.MSECriterion_updateOutput(self, input, target)
+end
+
+function MSECriterion:updateGradInput(input, target)
+   return input.nn.MSECriterion_updateGradInput(self, input, target)
+end
diff --git a/MarginCriterion.lua b/MarginCriterion.lua
new file mode 100644
index 0000000..deb903e
--- /dev/null
+++ b/MarginCriterion.lua
@@ -0,0 +1,23 @@
+local MarginCriterion, parent = 
+	torch.class('nn.MarginCriterion', 'nn.Module')
+
+function MarginCriterion:__init(margin)
+   parent.__init(self)
+   margin=margin or 1   
+   self.margin = margin 
+   self.gradInput = torch.Tensor(1)
+end 
+ 
+function MarginCriterion:updateOutput(input,y)
+   self.output=math.max(0, self.margin- y* input[1])
+   return self.output
+end
+
+function MarginCriterion:updateGradInput(input, y)
+  if (y*input[1])<self.margin then
+     self.gradInput[1]=-y		
+  else
+     self.gradInput[1]=0;
+  end
+  return self.gradInput 
+end
diff --git a/MarginRankingCriterion.lua b/MarginRankingCriterion.lua
new file mode 100644
index 0000000..a365ade
--- /dev/null
+++ b/MarginRankingCriterion.lua
@@ -0,0 +1,25 @@
+local MarginRankingCriterion, parent = torch.class('nn.MarginRankingCriterion', 'nn.Module')
+
+function MarginRankingCriterion:__init(margin)
+   parent.__init(self)
+   margin=margin or 1
+   self.margin = margin 
+   self.gradInput = {torch.Tensor(1), torch.Tensor(1)}
+end 
+ 
+function MarginRankingCriterion:updateOutput(input,y)
+   self.output=math.max(0, -y*(input[1][1]-input[2][1]) + self.margin  ) 
+   return self.output
+end
+
+function MarginRankingCriterion:updateGradInput(input, y)
+  local dist = -y*(input[1][1]-input[2][1]) + self.margin
+  if dist < 0 then
+     self.gradInput[1][1]=0;
+     self.gradInput[2][1]=0;
+  else	
+     self.gradInput[1][1]=-y
+     self.gradInput[2][1]=y
+  end
+  return self.gradInput 
+end
diff --git a/Max.lua b/Max.lua
new file mode 100644
index 0000000..a5ba95d
--- /dev/null
+++ b/Max.lua
@@ -0,0 +1,16 @@
+local Max, parent = torch.class('nn.Max', 'nn.Module')
+
+function Max:__init(dimension)
+   parent.__init(self)
+   dimension = dimension or 1
+   self.dimension = dimension
+   self.indices = torch.Tensor()
+end
+
+function Max:updateOutput(input)
+   return input.nn.Max_updateOutput(self, input)
+end
+
+function Max:updateGradInput(input, gradOutput)
+   return input.nn.Max_updateGradInput(self, input, gradOutput)
+end
diff --git a/Mean.lua b/Mean.lua
new file mode 100644
index 0000000..55e7609
--- /dev/null
+++ b/Mean.lua
@@ -0,0 +1,26 @@
+local Mean, parent = torch.class('nn.Mean', 'nn.Module')
+
+function Mean:__init(dimension)
+   parent.__init(self)
+   dimension = dimension or 1
+   self.dimension = dimension
+end
+
+function Mean:updateOutput(input)
+   input.torch.mean(self.output, input, self.dimension)
+   self.output = self.output:select(self.dimension, 1)
+   return self.output
+end
+
+function Mean:updateGradInput(input, gradOutput)
+   local size = gradOutput:size():totable()
+   local stride = gradOutput:stride():totable()
+   table.insert(size, self.dimension, input:size(self.dimension))
+   table.insert(stride, self.dimension, 0)
+
+   self.gradInput:resizeAs(gradOutput):copy(gradOutput)
+   self.gradInput:mul(1/input:size(self.dimension))
+   self.gradInput:resize(torch.LongStorage(size), torch.LongStorage(stride))
+
+   return self.gradInput
+end
diff --git a/Min.lua b/Min.lua
new file mode 100644
index 0000000..f4edbd8
--- /dev/null
+++ b/Min.lua
@@ -0,0 +1,16 @@
+local Min, parent = torch.class('nn.Min', 'nn.Module')
+
+function Min:__init(dimension)
+   parent.__init(self)
+   dimension = dimension or 1
+   self.dimension = dimension
+   self.indices = torch.Tensor()
+end
+
+function Min:updateOutput(input)
+   return input.nn.Min_updateOutput(self, input)
+end
+
+function Min:updateGradInput(input, gradOutput)
+   return input.nn.Min_updateGradInput(self, input, gradOutput)
+end
diff --git a/Module.lua b/Module.lua
new file mode 100644
index 0000000..2ae8115
--- /dev/null
+++ b/Module.lua
@@ -0,0 +1,211 @@
+local Module = torch.class('nn.Module')
+
+function Module:__init()
+   self.gradInput = torch.Tensor()
+   self.output = torch.Tensor()
+end
+
+function Module:parameters()
+   if self.weight and self.bias then
+      return {self.weight, self.bias}, {self.gradWeight, self.gradBias}
+   elseif self.weight then
+      return {self.weight}, {self.gradWeight}
+   elseif self.bias then
+      return {self.bias}, {self.gradBias}
+   else
+      return
+   end
+end
+
+function Module:updateOutput(input)
+   return self.output
+end
+
+function Module:forward(input)
+   return self:updateOutput(input, target)
+end
+
+function Module:backward(input, gradOutput)
+   self:updateGradInput(input, gradOutput)
+   self:accGradParameters(input, gradOutput)
+   return self.gradInput
+end
+
+function Module:backwardUpdate(input, gradOutput, lr)
+   self:updateGradInput(input, gradOutput)
+   self:accUpdateGradParameters(input, gradOutput, lr)
+   return self.gradInput
+end
+
+function Module:updateGradInput(input, gradOutput)
+   return self.gradInput
+end
+
+function Module:accGradParameters(input, gradOutput, scale)
+end
+
+function Module:accUpdateGradParameters(input, gradOutput, lr)
+   local gradWeight = self.gradWeight
+   local gradBias = self.gradBias
+   self.gradWeight = self.weight
+   self.gradBias = self.bias
+   self:accGradParameters(input, gradOutput, -lr)
+   self.gradWeight = gradWeight
+   self.gradBias = gradBias
+end
+
+function Module:sharedAccUpdateGradParameters(input, gradOutput, lr)
+   if self:parameters() then
+      self:zeroGradParameters()
+      self:accGradParameters(input, gradOutput, 1)
+      self:updateParameters(lr)
+   end
+end
+
+function Module:zeroGradParameters()
+   local _,gradParams = self:parameters()
+   if gradParams then
+      for i=1,#gradParams do
+         gradParams[i]:zero()
+      end
+   end
+end
+
+function Module:updateParameters(learningRate)
+   local params, gradParams = self:parameters()
+   if params then
+      for i=1,#params do
+         params[i]:add(-learningRate, gradParams[i])
+      end
+   end
+end
+
+function Module:share(mlp, ...)
+   for i,v in ipairs(arg) do
+      if self[v] ~= nil then
+         self[v]:set(mlp[v])
+         self.accUpdateGradParameters = self.sharedAccUpdateGradParameters
+         mlp.accUpdateGradParameters = mlp.sharedAccUpdateGradParameters
+      end
+   end
+   return self      
+end
+
+function Module:clone(...)
+   local f = torch.MemoryFile("rw"):binary()
+   f:writeObject(self)
+   f:seek(1)
+   local clone = f:readObject()
+   f:close()
+   if select('#',...) > 0 then
+      clone:share(self,...)
+   end
+   return clone
+end
+
+function Module:type(type)
+   -- find all tensors and convert them
+   for key,param in pairs(self) do
+      if torch.typename(param) and torch.typename(param):find('torch%..+Tensor') then
+         self[key] = param:type(type)
+      end
+   end
+   -- find submodules in classic containers 'modules'
+   if self.modules then
+      for _,module in ipairs(self.modules) do
+         module:type(type)
+      end
+   end
+   return self
+end
+
+function Module:float()
+   return self:type('torch.FloatTensor')
+end
+
+function Module:double()
+   return self:type('torch.DoubleTensor')
+end
+
+function Module:cuda()
+   return self:type('torch.CudaTensor')
+end
+
+function Module:getParameters()
+   -- get parameters
+   local parameters,gradParameters = self:parameters()
+
+   -- this function flattens arbitrary lists of parameters,
+   -- even complex shared ones
+   local function flatten(parameters)
+      -- already flat ?
+      local flat = true
+      for k = 2,#parameters do
+         if parameters[k]:storage() ~= parameters[k-1]:storage() then
+            flat = false
+            break
+         end
+      end
+      if flat then
+         local nParameters = 0
+         for k,param in ipairs(parameters) do
+            nParameters = nParameters + param:nElement()
+         end
+         local flatParameters = parameters[1].new(parameters[1]:storage())
+         if nParameters ~= flatParameters:nElement() then
+            error('flattenParameters(): weird parameters')
+         end
+         return flatParameters
+      end
+      -- compute offsets of each parameter
+      local offsets = {}
+      local sizes = {}
+      local strides = {}
+      local elements = {}
+      local storageOffsets = {}
+      local params = {}
+      local nParameters = 0
+      for k,param in ipairs(parameters) do
+         table.insert(offsets, nParameters+1)
+         table.insert(sizes, param:size())
+         table.insert(strides, param:stride())
+         table.insert(elements, param:nElement())
+         table.insert(storageOffsets, param:storageOffset())
+         local isView = false
+         for i = 1,k-1 do
+            if param:storage() == parameters[i]:storage() then
+               offsets[k] = offsets[i]
+               if storageOffsets[k] ~= storageOffsets[i] or elements[k] ~= elements[i] then
+                  error('flattenParameters(): cannot flatten shared weights with different structures')
+               end
+               isView = true
+               break
+            end
+         end
+         if not isView then
+            nParameters = nParameters + param:nElement()
+         end
+      end
+      -- create flat vector
+      local flatParameters = parameters[1].new(nParameters)
+      local storage = flatParameters:storage()
+      -- reallocate all parameters in flat vector
+      for i = 1,#parameters do
+         local data = parameters[i]:clone()
+         parameters[i]:set(storage, offsets[i], elements[i]):resize(sizes[i],strides[i]):copy(data)
+         data = nil
+         collectgarbage()
+      end
+      -- cleanup
+      collectgarbage()
+      -- return flat param
+      return flatParameters
+   end
+
+   -- flatten parameters and gradients
+   local flatParameters = flatten(parameters)
+   local flatGradParameters = flatten(gradParameters)
+
+   -- return new flat vector that contains all discrete parameters
+   return flatParameters, flatGradParameters
+end
diff --git a/Mul.lua b/Mul.lua
new file mode 100644
index 0000000..7841470
--- /dev/null
+++ b/Mul.lua
@@ -0,0 +1,42 @@
+local Mul, parent = torch.class('nn.Mul', 'nn.Module')
+
+function Mul:__init(inputSize)
+   parent.__init(self)
+  
+   self.weight = torch.Tensor(1)
+   self.gradWeight = torch.Tensor(1)
+   
+   -- state
+   self.gradInput:resize(inputSize)
+   self.output:resize(inputSize) 
+
+   self:reset()
+end
+
+ 
+function Mul:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1./math.sqrt(self.weight:size(1))
+   end
+
+   self.weight[1] = torch.uniform(-stdv, stdv);
+end
+
+function Mul:updateOutput(input)
+   self.output:copy(input);
+   self.output:mul(self.weight[1]);
+   return self.output 
+end
+
+function Mul:updateGradInput(input, gradOutput) 
+   self.gradInput:zero()
+   self.gradInput:add(self.weight[1], gradOutput)
+   return self.gradInput
+end
+
+function Mul:accGradParameters(input, gradOutput, scale) 
+   scale = scale or 1
+   self.gradWeight[1] = self.gradWeight[1] + scale*input:dot(gradOutput);
+end
diff --git a/MultiCriterion.lua b/MultiCriterion.lua
new file mode 100644
index 0000000..e83b97e
--- /dev/null
+++ b/MultiCriterion.lua
@@ -0,0 +1,32 @@
+local MultiCriterion, parent = torch.class('nn.MultiCriterion', 'nn.Criterion')
+
+function MultiCriterion:__init()
+   parent.__init(self)
+   self.criterions = {}
+   self.weights = torch.DoubleStorage()
+end
+
+function MultiCriterion:add(criterion, weight)
+   weight = weight or 1
+   table.insert(self.criterions, criterion)
+   self.weights:resize(#self.criterions, true)
+   self.weights[#self.criterions] = weight
+   return self
+end
+
+function MultiCriterion:updateOutput(input, target)
+   self.output = 0
+   for i=1,#self.criterions do
+      self.output = self.output + self.weights[i]*self.criterions[i]:updateOutput(input, target)
+   end
+   return self.output
+end
+
+function MultiCriterion:updateGradInput(input, target)
+   self.gradInput:resizeAs(input)
+   self.gradInput:zero()
+   for i=1,#self.criterions do
+      self.gradInput:add(self.weights[i], self.criterions[i]:updateGradInput(input, target))
+   end
+   return self.gradInput
+end
diff --git a/MultiLabelMarginCriterion.lua b/MultiLabelMarginCriterion.lua
new file mode 100644
index 0000000..c435888
--- /dev/null
+++ b/MultiLabelMarginCriterion.lua
@@ -0,0 +1,14 @@
+local MultiLabelMarginCriterion, parent = torch.class('nn.MultiLabelMarginCriterion', 'nn.Criterion')
+
+function MultiLabelMarginCriterion:__init()
+   parent.__init(self)
+   self.sizeAverage = true
+end
+
+function MultiLabelMarginCriterion:updateOutput(input, target)
+   return input.nn.MultiLabelMarginCriterion_updateOutput(self, input, target)
+end
+
+function MultiLabelMarginCriterion:updateGradInput(input, target)
+   return input.nn.MultiLabelMarginCriterion_updateGradInput(self, input, target)
+end
diff --git a/MultiMarginCriterion.lua b/MultiMarginCriterion.lua
new file mode 100644
index 0000000..e8de9d9
--- /dev/null
+++ b/MultiMarginCriterion.lua
@@ -0,0 +1,14 @@
+local MultiMarginCriterion, parent = torch.class('nn.MultiMarginCriterion', 'nn.Criterion')
+
+function MultiMarginCriterion:__init()
+   parent.__init(self)
+   self.sizeAverage = true
+end
+
+function MultiMarginCriterion:updateOutput(input, target)
+   return input.nn.MultiMarginCriterion_updateOutput(self, input, target)
+end
+
+function MultiMarginCriterion:updateGradInput(input, target)
+   return input.nn.MultiMarginCriterion_updateGradInput(self, input, target)
+end
diff --git a/Narrow.lua b/Narrow.lua
new file mode 100644
index 0000000..4445983
--- /dev/null
+++ b/Narrow.lua
@@ -0,0 +1,24 @@
+local Narrow, parent = torch.class('nn.Narrow', 'nn.Module')
+
+function Narrow:__init(dimension,offset,length)
+   parent.__init(self)
+   self.dimension=dimension
+   self.index=offset
+   self.length=length or 1
+   if not dimension or not offset then
+      error('nn.Narrow(dimension, offset, length)')
+   end
+end
+
+function Narrow:updateOutput(input)
+   local output=input:narrow(self.dimension,self.index,self.length);
+   self.output:resizeAs(output)
+   return self.output:copy(output)
+end
+
+function Narrow:updateGradInput(input, gradOutput)
+   self.gradInput:resizeAs(input)  
+   self.gradInput:zero();
+   self.gradInput:narrow(self.dimension,self.index,self.length):copy(gradOutput)
+   return self.gradInput
+end 
diff --git a/PairwiseDistance.lua b/PairwiseDistance.lua
new file mode 100644
index 0000000..638c58f
--- /dev/null
+++ b/PairwiseDistance.lua
@@ -0,0 +1,33 @@
+local PairwiseDistance, parent = torch.class('nn.PairwiseDistance', 'nn.Module')
+
+function PairwiseDistance:__init(p)
+   parent.__init(self)
+
+   -- state
+   self.gradInput = {torch.Tensor(), torch.Tensor()}
+   self.output = torch.Tensor(1)
+   self.norm=p
+end 
+  
+function PairwiseDistance:updateOutput(input)
+   self.output[1]=input[1]:dist(input[2],self.norm);
+   return self.output
+end
+
+local function mathsign(x) 
+   if x==0 then return  2*torch.random(2)-3; end
+   if x>0 then return 1; else return -1; end
+end
+
+function PairwiseDistance:updateGradInput(input, gradOutput)
+  self.gradInput[1]:resizeAs(input[1]) 
+  self.gradInput[2]:resizeAs(input[2]) 
+  self.gradInput[1]:copy(input[1])
+  self.gradInput[1]:add(-1, input[2])
+  if self.norm==1 then
+     self.gradInput[1]:apply(mathsign)
+  end
+  self.gradInput[1]:mul(gradOutput[1]);
+  self.gradInput[2]:zero():add(-1, self.gradInput[1])
+  return self.gradInput
+end
diff --git a/Parallel.lua b/Parallel.lua
new file mode 100644
index 0000000..04a8bdb
--- /dev/null
+++ b/Parallel.lua
@@ -0,0 +1,137 @@
+local Parallel, parent = torch.class('nn.Parallel', 'nn.Module')
+
+function Parallel:__init(inputDimension,outputDimension)
+   parent.__init(self)
+   self.modules = {}
+   self.size = torch.LongStorage() 
+   self.inputDimension = inputDimension
+   self.outputDimension = outputDimension
+end
+
+function Parallel:add(module)
+   table.insert(self.modules, module)
+   return self
+end
+
+function Parallel:get(index)
+   return self.modules[index]
+end
+
+function Parallel:updateOutput(input)
+   
+   local modules=input:size(self.inputDimension)
+
+   for i=1,modules do
+      local currentOutput = 
+	self.modules[i]:updateOutput(input:select(self.inputDimension,i))
+      
+      if i == 1 then
+         self.size:resize(currentOutput:dim()):copy(currentOutput:size())
+      else
+         self.size[self.outputDimension] = self.size[self.outputDimension] 
+				     + currentOutput:size(self.outputDimension)
+      end
+   end
+   self.output:resize(self.size)
+   
+   local offset = 1
+   for i=1,modules do
+      local currentOutput = self.modules[i]:updateOutput(input:select(self.inputDimension,i))
+
+      self.output:narrow(self.outputDimension, offset, 
+	                 currentOutput:size(self.outputDimension)):copy(currentOutput)
+      offset = offset + currentOutput:size(self.outputDimension)
+   end 
+   return self.output
+end
+
+function Parallel:updateGradInput(input, gradOutput)
+   local nModule=input:size(self.inputDimension)
+   self.gradInput:resizeAs(input)
+
+   local offset = 1
+   for i=1,nModule do 
+      local module=self.modules[i];
+      local currentOutput = module.output
+      local currentGradInput = 
+	module:updateGradInput(input:select(self.inputDimension,i),
+                        gradOutput:narrow(self.outputDimension, 
+                                          offset, currentOutput:size(self.outputDimension)))
+        
+      self.gradInput:select(self.inputDimension,i):copy(currentGradInput)
+      offset = offset + currentOutput:size(self.outputDimension)
+   end
+   return self.gradInput
+end
+
+function Parallel:accGradParameters(input, gradOutput, scale)
+   local nModule=input:size(self.inputDimension)
+
+   local offset = 1
+   for i=1,nModule do 
+      local module = self.modules[i];
+      local currentOutput = module.output
+      local currentGradInput = 
+         module:accGradParameters(input:select(self.inputDimension,i),
+                                  gradOutput:narrow(self.outputDimension, 
+                                                    offset, currentOutput:size(self.outputDimension)), scale)
+        
+      offset = offset + currentOutput:size(self.outputDimension)
+   end
+end
+
+function Parallel:accUpdateGradParameters(input, gradOutput, lr)
+   local nModule=input:size(self.inputDimension)
+
+   local offset = 1
+   for i=1,nModule do 
+      local module = self.modules[i];
+      local currentOutput = module.output
+      local currentGradInput = 
+         module:accUpdateGradParameters(input:select(self.inputDimension,i),
+                                        gradOutput:narrow(self.outputDimension, 
+                                                          offset, currentOutput:size(self.outputDimension)), lr)
+        
+      offset = offset + currentOutput:size(self.outputDimension)
+   end
+end
+ 
+function Parallel:zeroGradParameters()
+   for _,module in ipairs(self.modules) do
+      module:zeroGradParameters()
+   end
+end
+
+function Parallel:updateParameters(learningRate)
+   for _,module in ipairs(self.modules) do
+      module:updateParameters(learningRate)
+   end
+end
+
+function Parallel:share(mlp,...)
+   for i=1,#self.modules do
+      self.modules[i]:share(mlp.modules[i],...); 
+   end
+end
+
+function Parallel:parameters()
+   local function tinsert(to, from)
+      if type(from) == 'table' then
+         for i=1,#from do
+            tinsert(to,from[i])
+         end
+      else
+         table.insert(to,from)
+      end
+   end
+   local w = {}
+   local gw = {}
+   for i=1,#self.modules do
+      local mw,mgw = self.modules[i]:parameters()
+      if mw then
+         tinsert(w,mw)
+         tinsert(gw,mgw)
+      end
+   end
+   return w,gw
+end
diff --git a/ParallelTable.lua b/ParallelTable.lua
new file mode 100644
index 0000000..a97904f
--- /dev/null
+++ b/ParallelTable.lua
@@ -0,0 +1,71 @@
+local ParallelTable, parent = torch.class('nn.ParallelTable', 'nn.Module')
+
+function ParallelTable:__init()
+   parent.__init(self)
+   self.modules = {}
+   self.output = {}
+   self.gradInput = {}
+end
+
+function ParallelTable:add(module)
+   table.insert(self.modules, module)
+   return self
+end
+
+function ParallelTable:get(index)
+   return self.modules[index]
+end
+
+function ParallelTable:size()
+   return #self.modules 
+end
+
+function ParallelTable:updateOutput(input)
+   for i=1,#self.modules do
+      self.output[i] = self.modules[i]:updateOutput(input[i])
+   end
+   return self.output
+end
+
+
+function ParallelTable:updateGradInput(input, gradOutput)
+   for i,module in ipairs(self.modules) do
+      self.gradInput[i]= module:updateGradInput(input[i], gradOutput[i])
+   end
+   return self.gradInput
+end
+
+function ParallelTable:accGradParameters(input, gradOutput, scale)
+   scale = scale or 1
+   for i,module in ipairs(self.modules) do
+      module:accGradParameters(input[i], gradOutput[i], scale)
+   end
+end
+
+function ParallelTable:accUpdateGradParameters(input, gradOutput, lr)
+   lr = lr or 1
+   for i,module in ipairs(self.modules) do
+      module:accUpdateGradParameters(input[i], gradOutput[i], lr)
+   end
+end
+
+function ParallelTable:zeroGradParameters()
+   for _,module in ipairs(self.modules) do
+      module:zeroGradParameters()
+   end
+end
+
+function ParallelTable:updateParameters(learningRate)
+   for _,module in ipairs(self.modules) do
+      module:updateParameters(learningRate)
+   end
+end
+
+function ParallelTable:share(mlp,...)
+   for i=1,#self.modules do
+      self.modules[i]:share(mlp.modules[i],...); 
+   end
+end
+
+
+
diff --git a/Power.lua b/Power.lua
new file mode 100644
index 0000000..8052b3f
--- /dev/null
+++ b/Power.lua
@@ -0,0 +1,21 @@
+local Power, parent = torch.class('nn.Power','nn.Module')
+
+function Power:__init(p)
+   parent.__init(self)
+   self.pow = p
+   if not p then
+      error('nn.Power(power)')
+   end
+end
+
+function Power:updateOutput(input)
+   self.output:resizeAs(input):copy(input)
+   self.output:pow(self.pow)
+   return self.output
+end
+
+function Power:updateGradInput(input, gradOutput)
+   self.gradInput:resizeAs(input):copy(gradOutput)
+   self.gradInput:cmul(self.output):cdiv(input):mul(self.pow)
+   return self.gradInput
+end
diff --git a/Replicate.lua b/Replicate.lua
new file mode 100644
index 0000000..c30a86a
--- /dev/null
+++ b/Replicate.lua
@@ -0,0 +1,29 @@
+local Replicate, parent = torch.class('nn.Replicate','nn.Module')
+
+function Replicate:__init(nf)
+   parent.__init(self)
+   self.nfeatures = nf
+end
+
+function Replicate:updateOutput(input)
+   local sz = torch.LongStorage(input:dim()+1)
+   sz[1] = self.nfeatures
+   for i = 1,input:dim() do
+      sz[i+1] = input:size(i)
+   end
+   local st = torch.LongStorage(input:dim()+1)
+   st[1] = 0
+   for i = 1,input:dim() do
+      st[i+1] = input:stride(i)
+   end
+   self.output = input.new(input:storage(),input:storageOffset(),sz,st)
+   return self.output
+end
+
+function Replicate:updateGradInput(input, gradOutput)
+   self.gradInput:resizeAs(input):zero()
+   for k = 1,gradOutput:size(1) do
+      self.gradInput:add(gradOutput[k])
+   end
+   return self.gradInput
+end
diff --git a/Reshape.lua b/Reshape.lua
new file mode 100644
index 0000000..0be793f
--- /dev/null
+++ b/Reshape.lua
@@ -0,0 +1,38 @@
+local Reshape, parent = torch.class('nn.Reshape', 'nn.Module')
+
+function Reshape:__init(...)
+   parent.__init(self)
+   self.size = torch.LongStorage()
+   self.batchsize = torch.LongStorage()
+   local n = select('#', ...)
+   if n == 1 and torch.typename(select(1, ...)) == 'torch.LongStorage' then
+      self.size:resize(#select(1, ...)):copy(select(1, ...))
+   else
+      self.size:resize(n)
+      self.batchsize:resize(n+1)
+      self.nelement = 1
+      for i=1,n do
+         self.size[i] = select(i, ...)
+         self.batchsize[i+1] = select(i, ...)
+         self.nelement = self.nelement * self.size[i]
+      end
+   end
+end
+
+function Reshape:updateOutput(input)
+   input = input:contiguous()
+   local nelement = input:nElement()
+   if nelement == self.nelement then
+      self.output:set(input):resize(self.size)
+   else
+      self.batchsize[1] = input:size(1)
+      self.output:set(input):resize(self.batchsize)
+   end
+   return self.output
+end
+
+function Reshape:updateGradInput(input, gradOutput)
+   gradOutput = gradOutput:contiguous()
+   self.gradInput:set(gradOutput):resizeAs(input)
+   return self.gradInput
+end
diff --git a/Select.lua b/Select.lua
new file mode 100644
index 0000000..acf8e06
--- /dev/null
+++ b/Select.lua
@@ -0,0 +1,20 @@
+local Select, parent = torch.class('nn.Select', 'nn.Module')
+
+function Select:__init(dimension,index)
+   parent.__init(self)
+   self.dimension = dimension
+   self.index = index 
+end
+
+function Select:updateOutput(input)
+   local output = input:select(self.dimension,self.index);
+   self.output:resizeAs(output)
+   return self.output:copy(output)
+end
+
+function Select:updateGradInput(input, gradOutput)
+   self.gradInput:resizeAs(input)  
+   self.gradInput:zero()
+   self.gradInput:select(self.dimension,self.index):copy(gradOutput) 
+   return self.gradInput
+end 
diff --git a/Sequential.lua b/Sequential.lua
new file mode 100644
index 0000000..3e23350
--- /dev/null
+++ b/Sequential.lua
@@ -0,0 +1,129 @@
+local Sequential, parent = torch.class('nn.Sequential', 'nn.Module')
+
+function Sequential:__init()
+   self.modules = {}
+end
+
+function Sequential:add(module)
+   if #self.modules == 0 then
+      self.gradInput = module.gradInput
+   end
+   table.insert(self.modules, module)
+   self.output = module.output
+   return self
+end
+
+function Sequential:size()
+   return #self.modules
+end
+
+function Sequential:get(index)
+   return self.modules[index]
+end
+
+function Sequential:updateOutput(input)
+   local currentOutput = input
+   for i=1,#self.modules do 
+      currentOutput = self.modules[i]:updateOutput(currentOutput)
+   end 
+   self.output = currentOutput
+   return currentOutput
+end
+
+function Sequential:updateGradInput(input, gradOutput)
+   local currentGradOutput = gradOutput
+   local currentModule = self.modules[#self.modules]
+   for i=#self.modules-1,1,-1 do
+      local previousModule = self.modules[i]
+      currentGradOutput = currentModule:updateGradInput(previousModule.output, currentGradOutput)
+      currentModule = previousModule
+   end
+   currentGradOutput = currentModule:updateGradInput(input, currentGradOutput)
+   self.gradInput = currentGradOutput
+   return currentGradOutput
+end
+
+function Sequential:accGradParameters(input, gradOutput, scale)
+   scale = scale or 1
+
+   local currentGradOutput = gradOutput
+   local currentModule = self.modules[#self.modules]
+   for i=#self.modules-1,1,-1 do
+      local previousModule = self.modules[i]
+      currentModule:accGradParameters(previousModule.output, currentGradOutput, scale)
+      currentGradOutput = currentModule.gradInput
+      currentModule = previousModule
+   end
+   
+   currentModule:accGradParameters(input, currentGradOutput, scale)
+end
+
+function Sequential:accUpdateGradParameters(input, gradOutput, lr)
+   local currentGradOutput = gradOutput
+   local currentModule = self.modules[#self.modules]
+   for i=#self.modules-1,1,-1 do
+      local previousModule = self.modules[i]
+      currentModule:accUpdateGradParameters(previousModule.output, currentGradOutput, lr)
+      currentGradOutput = currentModule.gradInput
+      currentModule = previousModule
+   end
+   
+   currentModule:accUpdateGradParameters(input, currentGradOutput, lr)
+end
+
+function Sequential:zeroGradParameters()
+  for i=1,#self.modules do
+     self.modules[i]:zeroGradParameters()
+  end
+end
+
+function Sequential:updateParameters(learningRate)
+   for i=1,#self.modules do
+      self.modules[i]:updateParameters(learningRate)
+   end
+end
+
+function Sequential:share(mlp,...)
+   for i=1,#self.modules do
+      self.modules[i]:share(mlp.modules[i],...); 
+   end
+end
+
+function Sequential:parameters()
+   local function tinsert(to, from)
+      if type(from) == 'table' then
+         for i=1,#from do
+            tinsert(to,from[i])
+         end
+      else
+         table.insert(to,from)
+      end
+   end
+   local w = {}
+   local gw = {}
+   for i=1,#self.modules do
+      local mw,mgw = self.modules[i]:parameters()
+      if mw then
+         tinsert(w,mw)
+         tinsert(gw,mgw)
+      end
+   end
+   return w,gw
+end
+
+function Sequential:__tostring__()
+   local tab = '  '
+   local line = '\n'
+   local next = ' -> '
+   local str = 'nn.Sequential'
+   str = str .. ' {' .. line .. tab .. '[input'
+   for i=1,#self.modules do
+      str = str .. next .. '(' .. i .. ')'
+   end
+   str = str .. next .. 'output]'
+   for i=1,#self.modules do
+      str = str .. line .. tab .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab)
+   end
+   str = str .. line .. '}'
+   return str
+end
diff --git a/Sigmoid.lua b/Sigmoid.lua
new file mode 100644
index 0000000..efde004
--- /dev/null
+++ b/Sigmoid.lua
@@ -0,0 +1,9 @@
+local Sigmoid = torch.class('nn.Sigmoid', 'nn.Module')
+
+function Sigmoid:updateOutput(input)
+   return input.nn.Sigmoid_updateOutput(self, input)
+end
+
+function Sigmoid:updateGradInput(input, gradOutput)
+   return input.nn.Sigmoid_updateGradInput(self, input, gradOutput)
+end
diff --git a/SoftMax.lua b/SoftMax.lua
new file mode 100644
index 0000000..609b353
--- /dev/null
+++ b/SoftMax.lua
@@ -0,0 +1,9 @@
+local SoftMax, parent = torch.class('nn.SoftMax', 'nn.Module')
+
+function SoftMax:updateOutput(input)
+   return input.nn.SoftMax_updateOutput(self, input)
+end
+
+function SoftMax:updateGradInput(input, gradOutput)
+   return input.nn.SoftMax_updateGradInput(self, input, gradOutput)
+end
diff --git a/SoftMin.lua b/SoftMin.lua
new file mode 100644
index 0000000..90c6c60
--- /dev/null
+++ b/SoftMin.lua
@@ -0,0 +1,15 @@
+local SoftMin, parent = torch.class('nn.SoftMin', 'nn.Module')
+
+function SoftMin:updateOutput(input)
+   self.mininput = self.mininput or input.new()
+   self.mininput:resizeAs(input):copy(input):mul(-1)
+   return input.nn.SoftMax_updateOutput(self, self.mininput)
+end
+
+function SoftMin:updateGradInput(input, gradOutput)
+   self.mininput = self.mininput or input.new()
+   self.mininput:resizeAs(input):copy(input):mul(-1)
+   self.gradInput = input.nn.SoftMax_updateGradInput(self, self.mininput, gradOutput)
+   self.gradInput:mul(-1)
+   return self.gradInput
+end
diff --git a/SoftPlus.lua b/SoftPlus.lua
new file mode 100644
index 0000000..18d586a
--- /dev/null
+++ b/SoftPlus.lua
@@ -0,0 +1,9 @@
+local SoftPlus = torch.class('nn.SoftPlus', 'nn.Module')
+
+function SoftPlus:updateOutput(input)
+   return input.nn.SoftPlus_updateOutput(self, input)
+end
+
+function SoftPlus:updateGradInput(input, gradOutput)
+   return input.nn.SoftPlus_updateGradInput(self, input, gradOutput)
+end
diff --git a/SoftShrink.lua b/SoftShrink.lua
new file mode 100644
index 0000000..379dc61
--- /dev/null
+++ b/SoftShrink.lua
@@ -0,0 +1,16 @@
+local SoftShrink, parent = torch.class('nn.SoftShrink', 'nn.Module')
+
+function SoftShrink:__init(lam)
+   parent.__init(self)
+   self.lambda = lam or 0.5
+end
+
+function SoftShrink:updateOutput(input)
+   input.nn.SoftShrink_updateOutput(self, input)
+   return self.output
+end
+
+function SoftShrink:updateGradInput(input, gradOutput)
+   input.nn.SoftShrink_updateGradInput(self, input, gradOutput)
+   return self.gradInput
+end
diff --git a/SoftSign.lua b/SoftSign.lua
new file mode 100644
index 0000000..480894c
--- /dev/null
+++ b/SoftSign.lua
@@ -0,0 +1,15 @@
+local SoftSign = torch.class('nn.SoftSign', 'nn.Module')
+
+function SoftSign:updateOutput(input)
+   self.temp = self.temp or input.new()
+   self.temp:resizeAs(input):copy(input):abs():add(1)
+   self.output:resizeAs(input):copy(input):cdiv(self.temp)
+   return self.output
+end
+
+function SoftSign:updateGradInput(input, gradOutput)
+   self.tempgrad = self.tempgrad or input.new()
+   self.tempgrad:resizeAs(self.output):copy(input):abs():add(1):cmul(self.tempgrad)
+   self.gradInput:resizeAs(input):copy(gradOutput):cdiv(self.tempgrad)
+   return self.gradInput
+end
diff --git a/SparseLinear.lua b/SparseLinear.lua
new file mode 100644
index 0000000..ec8845e
--- /dev/null
+++ b/SparseLinear.lua
@@ -0,0 +1,42 @@
+local SparseLinear, parent = torch.class('nn.SparseLinear', 'nn.Module')
+
+function SparseLinear:__init(inputSize, outputSize)
+   parent.__init(self)
+
+   self.weightDecay = 0
+   self.weight = torch.Tensor(outputSize, inputSize)
+   self.bias = torch.Tensor(outputSize)
+   self.gradWeight = torch.Tensor(outputSize, inputSize)
+   self.gradBias = torch.Tensor(outputSize)
+   self.lastInput = torch.Tensor()
+   -- state
+   self.gradInput:resize(inputSize)
+   self.output:resize(outputSize)
+
+   self:reset()
+end
+
+function SparseLinear:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1./math.sqrt(self.weight:size(1))
+   end
+
+   -- we do this so the initialization is exactly
+   -- the same than in previous torch versions
+   for i=1,self.weight:size(1) do
+      self.weight:select(1, i):apply(function()
+                                        return torch.uniform(-stdv, stdv)
+                                     end)
+      self.bias[i] = torch.uniform(-stdv, stdv) * 0.000001
+   end
+end
+
+function SparseLinear:updateOutput(input)
+   return input.nn.SparseLinear_updateOutput(self, input)
+end
+
+function SparseLinear:accGradParameters(input, gradOutput, scale)
+   return input.nn.SparseLinear_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua
new file mode 100644
index 0000000..38d2737
--- /dev/null
+++ b/SpatialConvolution.lua
@@ -0,0 +1,50 @@
+local SpatialConvolution, parent = torch.class('nn.SpatialConvolution', 'nn.Module')
+
+function SpatialConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH)
+   parent.__init(self)
+
+   dW = dW or 1
+   dH = dH or 1
+
+   self.nInputPlane = nInputPlane
+   self.nOutputPlane = nOutputPlane
+   self.kW = kW
+   self.kH = kH
+   self.dW = dW
+   self.dH = dH
+
+   self.weight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW)
+   self.bias = torch.Tensor(nOutputPlane)
+   self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW)
+   self.gradBias = torch.Tensor(nOutputPlane)
+   
+   self:reset()
+end
+
+function SpatialConvolution:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane)
+   end
+   self.weight:apply(function()
+                        return torch.uniform(-stdv, stdv)
+                     end)
+   self.bias:apply(function()
+                      return torch.uniform(-stdv, stdv)
+                   end)   
+end
+
+function SpatialConvolution:updateOutput(input)
+   return input.nn.SpatialConvolution_updateOutput(self, input)
+end
+
+function SpatialConvolution:updateGradInput(input, gradOutput)
+   if self.gradInput then
+      return input.nn.SpatialConvolution_updateGradInput(self, input, gradOutput)
+   end
+end
+
+function SpatialConvolution:accGradParameters(input, gradOutput, scale)
+   return input.nn.SpatialConvolution_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/SpatialConvolutionMap.lua b/SpatialConvolutionMap.lua
new file mode 100644
index 0000000..0dbff2f
--- /dev/null
+++ b/SpatialConvolutionMap.lua
@@ -0,0 +1,119 @@
+local SpatialConvolutionMap, parent = torch.class('nn.SpatialConvolutionMap', 'nn.Module')
+
+nn.tables = nn.tables or {}
+
+function nn.tables.full(nin, nout)
+   local ft = torch.Tensor(nin*nout,2)
+   local p = 1
+   for j=1,nout do
+      for i=1,nin do
+	 ft[p][1] = i
+	 ft[p][2] = j
+	 p = p + 1
+      end
+   end
+   return ft
+end
+
+function nn.tables.oneToOne(nfeat)
+   local ft = torch.Tensor(nfeat,2)
+   for i=1,nfeat do
+      ft[i][1] = i
+      ft[i][2] = i
+   end
+   return ft
+end
+
+function nn.tables.random(nin, nout, nto)
+   local nker = nto * nout
+   local tbl = torch.Tensor(nker, 2)
+   local fi = torch.randperm(nin)
+   local frcntr = 1
+   local tocntr = 1
+   local nfi = math.floor(nin/nto) -- number of distinct nto chunks 
+   local rfi = math.mod(nin,nto) -- number of remaining from maps
+   local totbl = tbl:select(2,2)
+   local frtbl = tbl:select(2,1)
+   local fitbl = fi:narrow(1, 1, (nfi * nto)) -- part of fi that covers distinct chunks
+   local ufrtbl= frtbl:unfold(1, nto, nto)
+   local utotbl= totbl:unfold(1, nto, nto)
+   local ufitbl= fitbl:unfold(1, nto, nto)
+   
+   -- start filling frtbl
+   for i=1,nout do -- fro each unit in target map
+      ufrtbl:select(1,i):copy(ufitbl:select(1,frcntr))
+      frcntr = frcntr + 1
+      if frcntr-1 ==  nfi then -- reset fi
+	 fi:copy(torch.randperm(nin))
+	 frcntr = 1
+      end
+   end
+   for tocntr=1,utotbl:size(1) do
+      utotbl:select(1,tocntr):fill(tocntr)
+   end
+   return tbl
+end
+
+function SpatialConvolutionMap:__init(conMatrix, kW, kH, dW, dH)
+   parent.__init(self)
+
+   dW = dW or 1
+   dH = dH or 1
+
+   self.kW = kW
+   self.kH = kH
+   self.dW = dW
+   self.dH = dH
+   self.connTable = conMatrix
+   self.nInputPlane = self.connTable:select(2,1):maxall()
+   self.nOutputPlane = self.connTable:select(2,2):maxall()
+
+   self.weight = torch.Tensor(self.connTable:size(1), kH, kW)
+   self.bias = torch.Tensor(self.nOutputPlane)
+   self.gradWeight = torch.Tensor(self.connTable:size(1), kH, kW)
+   self.gradBias = torch.Tensor(self.nOutputPlane)
+   
+   self:reset()
+end
+
+function SpatialConvolutionMap:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+      self.weight:apply(function()
+			   return torch.uniform(-stdv, stdv)
+			end)
+      self.bias:apply(function()
+			 return torch.uniform(-stdv, stdv)
+		      end)
+   else
+      local ninp = torch.Tensor(self.nOutputPlane):zero()
+      for i=1,self.connTable:size(1) do ninp[self.connTable[i][2]] =  ninp[self.connTable[i][2]]+1 end
+      for k=1,self.connTable:size(1) do
+	 stdv = 1/math.sqrt(self.kW*self.kH*ninp[self.connTable[k][2]])
+	 self.weight:select(1,k):apply(function() return torch.uniform(-stdv,stdv) end)
+      end
+      for k=1,self.bias:size(1) do
+	 stdv = 1/math.sqrt(self.kW*self.kH*ninp[k])
+	 self.bias[k] = torch.uniform(-stdv,stdv)
+      end
+   end
+end
+
+function SpatialConvolutionMap:updateOutput(input)
+   input.nn.SpatialConvolutionMap_updateOutput(self, input)
+   return self.output
+end
+
+function SpatialConvolutionMap:updateGradInput(input, gradOutput)
+   input.nn.SpatialConvolutionMap_updateGradInput(self, input, gradOutput)
+   return self.gradInput
+end
+
+function SpatialConvolutionMap:accGradParameters(input, gradOutput, scale)
+   return input.nn.SpatialConvolutionMap_accGradParameters(self, input, gradOutput, scale)
+end
+
+function SpatialConvolutionMap:decayParameters(decay)
+   self.weight:add(-decay, self.weight)
+   self.bias:add(-decay, self.bias)
+end
diff --git a/SpatialLPPooling.lua b/SpatialLPPooling.lua
new file mode 100644
index 0000000..9b9c87d
--- /dev/null
+++ b/SpatialLPPooling.lua
@@ -0,0 +1,32 @@
+local SpatialLPPooling, parent = torch.class('nn.SpatialLPPooling', 'nn.Sequential')
+
+function SpatialLPPooling:__init(nInputPlane, pnorm, kW, kH, dW, dH)
+   parent.__init(self)
+
+   dW = dW or kW
+   dH = dH or kH
+   
+   self.kW = kW
+   self.kH = kH
+   self.dW = dW
+   self.dH = dH
+
+   self.nInputPlane = nInputPlane
+   self.learnKernel = learnKernel
+
+   if pnorm == 2 then
+      self:add(nn.Square())
+   else
+      self:add(nn.Power(pnorm))
+   end
+   self:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(nInputPlane), kW, kH, dW, dH))
+   if pnorm == 2 then
+      self:add(nn.Sqrt())
+   else
+      self:add(nn.Power(1/pnorm))
+   end
+
+   self:get(2).bias:zero()
+   self:get(2).weight:fill(1/(kW*kH))
+   self:get(2).accGradParameters = nil
+end
diff --git a/SpatialMaxPooling.lua b/SpatialMaxPooling.lua
new file mode 100644
index 0000000..21197ac
--- /dev/null
+++ b/SpatialMaxPooling.lua
@@ -0,0 +1,34 @@
+local SpatialMaxPooling, parent = torch.class('nn.SpatialMaxPooling', 'nn.Module')
+
+function SpatialMaxPooling:__init(kW, kH, dW, dH)
+   parent.__init(self)
+
+   dW = dW or kW
+   dH = dH or kH
+   
+   self.kW = kW
+   self.kH = kH
+   self.dW = dW
+   self.dH = dH
+
+   self.indices = torch.Tensor()
+end
+
+function SpatialMaxPooling:updateOutput(input)
+   input.nn.SpatialMaxPooling_updateOutput(self, input)
+   return self.output
+end
+
+function SpatialMaxPooling:updateGradInput(input, gradOutput)
+   input.nn.SpatialMaxPooling_updateGradInput(self, input, gradOutput)
+   return self.gradInput
+end
+
+function SpatialMaxPooling:empty()
+   self.gradInput:resize()
+   self.gradInput:storage():resize(0)
+   self.output:resize()
+   self.output:storage():resize(0)
+   self.indices:resize()
+   self.indices:storage():resize(0)
+end
diff --git a/SpatialSubSampling.lua b/SpatialSubSampling.lua
new file mode 100644
index 0000000..48b32b9
--- /dev/null
+++ b/SpatialSubSampling.lua
@@ -0,0 +1,49 @@
+local SpatialSubSampling, parent = torch.class('nn.SpatialSubSampling', 'nn.Module')
+
+function SpatialSubSampling:__init(nInputPlane, kW, kH, dW, dH)
+   parent.__init(self)
+
+   dW = dW or 1
+   dH = dH or 1
+
+   self.nInputPlane = nInputPlane
+   self.kW = kW
+   self.kH = kH
+   self.dW = dW
+   self.dH = dH
+
+   self.weight = torch.Tensor(nInputPlane)
+   self.bias = torch.Tensor(nInputPlane)
+   self.gradWeight = torch.Tensor(nInputPlane)
+   self.gradBias = torch.Tensor(nInputPlane)
+   
+   self:reset()
+end
+
+function SpatialSubSampling:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1/math.sqrt(self.kW*self.kH)
+   end
+   self.weight:apply(function()
+                        return torch.uniform(-stdv, stdv)
+                     end)
+   self.bias:apply(function()
+                      return torch.uniform(-stdv, stdv)
+                   end)   
+end
+
+function SpatialSubSampling:updateOutput(input)
+   return input.nn.SpatialSubSampling_updateOutput(self, input)
+end
+
+function SpatialSubSampling:updateGradInput(input, gradOutput)
+   if self.gradInput then
+      return input.nn.SpatialSubSampling_updateGradInput(self, input, gradOutput)
+   end
+end
+
+function SpatialSubSampling:accGradParameters(input, gradOutput, scale)
+   return input.nn.SpatialSubSampling_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/SpatialSubtractiveNormalization.lua b/SpatialSubtractiveNormalization.lua
new file mode 100644
index 0000000..4df0fc1
--- /dev/null
+++ b/SpatialSubtractiveNormalization.lua
@@ -0,0 +1,104 @@
+local SpatialSubtractiveNormalization, parent = torch.class('nn.SpatialSubtractiveNormalization','nn.Module')
+
+function SpatialSubtractiveNormalization:__init(nInputPlane, kernel)
+   parent.__init(self)
+
+   -- get args
+   self.nInputPlane = nInputPlane or 1
+   self.kernel = kernel or torch.Tensor(9,9):fill(1)
+   local kdim = self.kernel:nDimension()
+
+   -- check args
+   if kdim ~= 2 and kdim ~= 1 then
+      error('<SpatialSubtractiveNormalization> averaging kernel must be 2D or 1D')
+   end
+   if (self.kernel:size(1) % 2) == 0 or (kdim == 2 and (self.kernel:size(2) % 2) == 0) then
+      error('<SpatialSubtractiveNormalization> averaging kernel must have ODD dimensions')
+   end
+
+   -- normalize kernel
+   self.kernel:div(self.kernel:sumall() * self.nInputPlane)
+
+   -- padding values
+   local padH = math.floor(self.kernel:size(1)/2)
+   local padW = padH
+   if kdim == 2 then
+      padW = math.floor(self.kernel:size(2)/2)
+   end
+
+   -- create convolutional mean extractor
+   self.meanestimator = nn.Sequential()
+   self.meanestimator:add(nn.SpatialZeroPadding(padW, padW, padH, padH))
+   if kdim == 2 then
+      self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane),
+                                                      self.kernel:size(2), self.kernel:size(1)))
+   else
+      self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane),
+                                                      self.kernel:size(1), 1))
+      self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane),
+                                                      1, self.kernel:size(1)))
+   end
+   self.meanestimator:add(nn.Sum(1))
+   self.meanestimator:add(nn.Replicate(self.nInputPlane))
+
+   -- set kernel and bias
+   if kdim == 2 then
+      for i = 1,self.nInputPlane do 
+         self.meanestimator.modules[2].weight[i] = self.kernel
+      end
+      self.meanestimator.modules[2].bias:zero()
+   else
+      for i = 1,self.nInputPlane do 
+         self.meanestimator.modules[2].weight[i]:copy(self.kernel)
+         self.meanestimator.modules[3].weight[i]:copy(self.kernel)
+      end
+      self.meanestimator.modules[2].bias:zero()
+      self.meanestimator.modules[3].bias:zero()
+   end
+
+   -- other operation
+   self.subtractor = nn.CSubTable()
+   self.divider = nn.CDivTable()
+
+   -- coefficient array, to adjust side effects
+   self.coef = torch.Tensor(1,1,1)
+end
+
+function SpatialSubtractiveNormalization:updateOutput(input)
+   -- compute side coefficients
+   if (input:size(3) ~= self.coef:size(2)) or (input:size(2) ~= self.coef:size(1)) then
+      local ones = input.new():resizeAs(input):fill(1)
+      self.coef = self.meanestimator:updateOutput(ones)
+      self.coef = self.coef:clone()
+   end
+
+   -- compute mean
+   self.localsums = self.meanestimator:updateOutput(input)
+   self.adjustedsums = self.divider:updateOutput{self.localsums, self.coef}
+   self.output = self.subtractor:updateOutput{input, self.adjustedsums}
+
+   -- done
+   return self.output
+end
+
+function SpatialSubtractiveNormalization:updateGradInput(input, gradOutput)
+   -- resize grad
+   self.gradInput:resizeAs(input):zero()
+
+   -- backprop through all modules
+   local gradsub = self.subtractor:updateGradInput({input, self.adjustedsums}, gradOutput)
+   local graddiv = self.divider:updateGradInput({self.localsums, self.coef}, gradsub[2])
+   self.gradInput:add(self.meanestimator:updateGradInput(input, graddiv[1]))
+   self.gradInput:add(gradsub[1])
+
+   -- done
+   return self.gradInput
+end
+
+function SpatialSubtractiveNormalization:type(type)
+   parent.type(self,type)
+   self.meanestimator:type(type)
+   self.divider:type(type)
+   self.subtractor:type(type)
+   return self
+end
diff --git a/SpatialZeroPadding.lua b/SpatialZeroPadding.lua
new file mode 100644
index 0000000..af03e71
--- /dev/null
+++ b/SpatialZeroPadding.lua
@@ -0,0 +1,53 @@
+local SpatialZeroPadding, parent = torch.class('nn.SpatialZeroPadding', 'nn.Module')
+
+function SpatialZeroPadding:__init(pad_l, pad_r, pad_t, pad_b)
+   parent.__init(self)
+   self.pad_l = pad_l
+   self.pad_r = pad_r or self.pad_l
+   self.pad_t = pad_t or self.pad_l
+   self.pad_b = pad_b or self.pad_l
+end
+
+function SpatialZeroPadding:updateOutput(input)
+   if input:dim() ~= 3 then error('input must be 3-dimensional') end
+   local h = input:size(2) + self.pad_t + self.pad_b
+   local w = input:size(3) + self.pad_l + self.pad_r
+   if w < 1 or h < 1 then error('input is too small') end
+   self.output:resize(input:size(1), h, w)
+   self.output:zero()
+   -- crop input if necessary
+   local c_input = input
+   if self.pad_t < 0 then c_input = c_input:narrow(2, 1 - self.pad_t, c_input:size(2) + self.pad_t) end
+   if self.pad_b < 0 then c_input = c_input:narrow(2, 1, c_input:size(2) + self.pad_b) end
+   if self.pad_l < 0 then c_input = c_input:narrow(3, 1 - self.pad_l, c_input:size(3) + self.pad_l) end
+   if self.pad_r < 0 then c_input = c_input:narrow(3, 1, c_input:size(3) + self.pad_r) end
+   -- crop outout if necessary
+   local c_output = self.output
+   if self.pad_t > 0 then c_output = c_output:narrow(2, 1 + self.pad_t, c_output:size(2) - self.pad_t) end
+   if self.pad_b > 0 then c_output = c_output:narrow(2, 1, c_output:size(2) - self.pad_b) end
+   if self.pad_l > 0 then c_output = c_output:narrow(3, 1 + self.pad_l, c_output:size(3) - self.pad_l) end
+   if self.pad_r > 0 then c_output = c_output:narrow(3, 1, c_output:size(3) - self.pad_r) end
+   -- copy input to output
+   c_output:copy(c_input)
+   return self.output
+end
+
+function SpatialZeroPadding:updateGradInput(input, gradOutput)
+   if input:dim() ~= 3 then error('input must be 3-dimensional') end
+   self.gradInput:resizeAs(input):zero()
+   -- crop gradInput if necessary
+   local cg_input = self.gradInput
+   if self.pad_t < 0 then cg_input = cg_input:narrow(2, 1 - self.pad_t, cg_input:size(2) + self.pad_t) end
+   if self.pad_b < 0 then cg_input = cg_input:narrow(2, 1, cg_input:size(2) + self.pad_b) end
+   if self.pad_l < 0 then cg_input = cg_input:narrow(3, 1 - self.pad_l, cg_input:size(3) + self.pad_l) end
+   if self.pad_r < 0 then cg_input = cg_input:narrow(3, 1, cg_input:size(3) + self.pad_r) end
+   -- crop gradOutout if necessary
+   local cg_output = gradOutput
+   if self.pad_t > 0 then cg_output = cg_output:narrow(2, 1 + self.pad_t, cg_output:size(2) - self.pad_t) end
+   if self.pad_b > 0 then cg_output = cg_output:narrow(2, 1, cg_output:size(2) - self.pad_b) end
+   if self.pad_l > 0 then cg_output = cg_output:narrow(3, 1 + self.pad_l, cg_output:size(3) - self.pad_l) end
+   if self.pad_r > 0 then cg_output = cg_output:narrow(3, 1, cg_output:size(3) - self.pad_r) end
+   -- copy gradOuput to gradInput
+   cg_input:copy(cg_output)
+   return self.gradInput
+end
diff --git a/SplitTable.lua b/SplitTable.lua
new file mode 100644
index 0000000..d2c690e
--- /dev/null
+++ b/SplitTable.lua
@@ -0,0 +1,30 @@
+local SplitTable, parent = torch.class('nn.SplitTable', 'nn.Module')
+
+function SplitTable:__init(dimension)
+   parent.__init(self)
+   self.modules = {} 
+   self.dimension = dimension
+end
+
+function SplitTable:updateOutput(input)
+   local currentOutput= {};
+   local slices = input:size(self.dimension)
+   for i=1,slices do
+      currentOutput[#currentOutput+1] = input:select(self.dimension,i)
+   end
+   self.output = currentOutput
+   return self.output
+end 
+
+
+function SplitTable:updateGradInput(input, gradOutput)
+   local slices = input:size(self.dimension)
+   self.gradInput:resizeAs(input)
+
+   local offset = 1
+   for i=1,slices do 
+      local currentGradInput = gradOutput[i];        
+      self.gradInput:select(self.dimension,i):copy(currentGradInput)
+   end
+   return self.gradInput
+end
diff --git a/Sqrt.lua b/Sqrt.lua
new file mode 100644
index 0000000..664d434
--- /dev/null
+++ b/Sqrt.lua
@@ -0,0 +1,13 @@
+local Sqrt, parent = torch.class('nn.Sqrt','nn.Module')
+
+function Sqrt:__init(args)
+   parent.__init(self)
+end
+
+function Sqrt:updateOutput(input)
+   return input.nn.Sqrt_updateOutput(self,input)
+end
+
+function Sqrt:updateGradInput(input, gradOutput)
+   return input.nn.Sqrt_updateGradInput(self,input,gradOutput)
+end
diff --git a/Square.lua b/Square.lua
new file mode 100644
index 0000000..c1b80dc
--- /dev/null
+++ b/Square.lua
@@ -0,0 +1,13 @@
+local Square, parent = torch.class('nn.Square','nn.Module')
+
+function Square:__init(args)
+   parent.__init(self)
+end
+
+function Square:updateOutput(input)
+   return input.nn.Square_updateOutput(self, input)
+end
+
+function Square:updateGradInput(input, gradOutput)
+   return input.nn.Square_updateGradInput(self, input, gradOutput)
+end
diff --git a/StochasticGradient.lua b/StochasticGradient.lua
new file mode 100644
index 0000000..2d5e810
--- /dev/null
+++ b/StochasticGradient.lua
@@ -0,0 +1,57 @@
+local StochasticGradient = torch.class('nn.StochasticGradient')
+
+function StochasticGradient:__init(module, criterion)
+   self.learningRate = 0.01
+   self.learningRateDecay = 0
+   self.maxIteration = 25
+   self.shuffleIndices = true
+   self.module = module
+   self.criterion = criterion
+end
+
+function StochasticGradient:train(dataset)
+   local iteration = 1
+   local currentLearningRate = self.learningRate
+   local module = self.module
+   local criterion = self.criterion
+
+   local shuffledIndices = torch.randperm(dataset:size(), 'torch.LongTensor')
+   if not self.shuffleIndices then
+      for t = 1,dataset:size() do
+         shuffledIndices[t] = t
+      end
+   end
+
+   print("# StochasticGradient: training")
+
+   while true do
+      local currentError = 0
+      for t = 1,dataset:size() do
+         local example = dataset[shuffledIndices[t]]
+         local input = example[1]
+         local target = example[2]
+
+         currentError = currentError + criterion:forward(module:forward(input), target)
+
+         module:updateGradInput(input, criterion:updateGradInput(module.output, target))
+         module:accUpdateGradParameters(input, criterion.gradInput, currentLearningRate)
+
+         if self.hookExample then
+            self.hookExample(self, example)
+         end
+      end
+
+      if self.hookIteration then
+         self.hookIteration(self, iteration)
+      end
+
+      currentError = currentError / dataset:size()
+      print("# current error = " .. currentError)
+      iteration = iteration + 1
+      currentLearningRate = self.learningRate/(1+iteration*self.learningRateDecay)
+      if self.maxIteration > 0 and iteration > self.maxIteration then
+         print("# StochasticGradient: you have reached the maximum number of iterations")
+         break
+      end
+   end
+end
diff --git a/Sum.lua b/Sum.lua
new file mode 100644
index 0000000..b068e25
--- /dev/null
+++ b/Sum.lua
@@ -0,0 +1,27 @@
+local Sum, parent = torch.class('nn.Sum', 'nn.Module')
+
+function Sum:__init(dimension)
+   parent.__init(self)
+   dimension = dimension or 1
+   self.dimension = dimension
+end
+
+function Sum:updateOutput(input)
+   input.torch.sum(self.output, input, self.dimension)
+   self.output = self.output:select(self.dimension, 1)
+   return self.output
+end
+
+function Sum:updateGradInput(input, gradOutput)
+   local size = gradOutput:size():totable()
+   local stride = gradOutput:stride():totable()
+   table.insert(size, self.dimension, input:size(self.dimension))
+   table.insert(stride, self.dimension, 0)
+
+   self.gradInput:set(gradOutput:storage(),
+                      1,
+                      torch.LongStorage(size),
+                      torch.LongStorage(stride))
+                      
+   return self.gradInput
+end
diff --git a/Tanh.lua b/Tanh.lua
new file mode 100644
index 0000000..b6cf1bf
--- /dev/null
+++ b/Tanh.lua
@@ -0,0 +1,9 @@
+local Tanh = torch.class('nn.Tanh', 'nn.Module')
+
+function Tanh:updateOutput(input)
+   return input.nn.Tanh_updateOutput(self, input)
+end
+
+function Tanh:updateGradInput(input, gradOutput)
+   return input.nn.Tanh_updateGradInput(self, input, gradOutput)
+end
diff --git a/TemporalConvolution.lua b/TemporalConvolution.lua
new file mode 100644
index 0000000..a3aaa7f
--- /dev/null
+++ b/TemporalConvolution.lua
@@ -0,0 +1,51 @@
+local TemporalConvolution, parent = torch.class('nn.TemporalConvolution', 'nn.Module')
+
+function TemporalConvolution:__init(inputFrameSize, outputFrameSize, kW, dW)
+   parent.__init(self)
+
+   dW = dW or 1
+
+   self.inputFrameSize = inputFrameSize
+   self.outputFrameSize = outputFrameSize
+   self.kW = kW
+   self.dW = dW
+
+   self.weight = torch.Tensor(outputFrameSize, inputFrameSize*kW)
+   self.bias = torch.Tensor(outputFrameSize)
+   self.gradWeight = torch.Tensor(outputFrameSize, inputFrameSize*kW)
+   self.gradBias = torch.Tensor(outputFrameSize)
+   
+   self:reset()
+end
+
+function TemporalConvolution:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1/math.sqrt(self.kW*self.inputFrameSize)
+   end
+   self.weight:apply(function()
+                        return torch.uniform(-stdv, stdv)
+                     end)
+   self.bias:apply(function()
+                      return torch.uniform(-stdv, stdv)
+                   end)   
+end
+
+function TemporalConvolution:updateOutput(input)
+   return input.nn.TemporalConvolution_updateOutput(self, input)
+end
+
+function TemporalConvolution:updateGradInput(input, gradOutput)
+   if self.gradInput then
+      return input.nn.TemporalConvolution_updateGradInput(self, input, gradOutput)
+   end
+end
+
+function TemporalConvolution:accGradParameters(input, gradOutput, scale)
+   scale = scale or 1
+   input.nn.TemporalConvolution_accGradParameters(self, input, gradOutput, scale)
+end
+
+-- we do not need to accumulate parameters when sharing
+TemporalConvolution.sharedAccUpdateGradParameters = TemporalConvolution.accUpdateGradParameters
diff --git a/TemporalSubSampling.lua b/TemporalSubSampling.lua
new file mode 100644
index 0000000..3d06f6e
--- /dev/null
+++ b/TemporalSubSampling.lua
@@ -0,0 +1,48 @@
+local TemporalSubSampling, parent = torch.class('nn.TemporalSubSampling', 'nn.Module')
+
+function TemporalSubSampling:__init(inputFrameSize, kW, dW)
+   parent.__init(self)
+
+   dW = dW or 1
+
+   self.inputFrameSize = inputFrameSize
+   self.kW = kW
+   self.dW = dW
+
+   self.weight = torch.Tensor(inputFrameSize)
+   self.bias = torch.Tensor(inputFrameSize)
+   self.gradWeight = torch.Tensor(inputFrameSize)
+   self.gradBias = torch.Tensor(inputFrameSize)
+   
+   self:reset()
+end
+
+function TemporalSubSampling:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1/math.sqrt(self.kW)
+   end
+
+   self.weight:apply(function()
+                        return torch.uniform(-stdv, stdv)
+                     end)
+
+   self.bias:apply(function()
+                      return torch.uniform(-stdv, stdv)
+                   end)   
+end
+
+function TemporalSubSampling:updateOutput(input)
+   return input.nn.TemporalSubSampling_updateOutput(self, input)
+end
+
+function TemporalSubSampling:updateGradInput(input, gradOutput)
+   if self.gradInput then
+      return input.nn.TemporalSubSampling_updateGradInput(self, input, gradOutput)
+   end
+end
+
+function TemporalSubSampling:accGradParameters(input, gradOutput, scale)
+   return input.nn.TemporalSubSampling_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/Threshold.lua b/Threshold.lua
new file mode 100644
index 0000000..6083957
--- /dev/null
+++ b/Threshold.lua
@@ -0,0 +1,20 @@
+local Threshold, parent = torch.class('nn.Threshold','nn.Module')
+
+function Threshold:__init(th,v)
+   parent.__init(self)
+   self.threshold = th or 1e-6
+   self.val = v or 0
+   if (th and type(th) ~= 'number') or (v and type(v) ~= 'number') then
+      error('nn.Threshold(threshold, value)')
+   end
+end
+
+function Threshold:updateOutput(input)
+   input.nn.Threshold_updateOutput(self, input)
+   return self.output
+end
+
+function Threshold:updateGradInput(input, gradOutput)
+   input.nn.Threshold_updateGradInput(self, input, gradOutput)
+   return self.gradInput
+end
diff --git a/VolumetricConvolution.lua b/VolumetricConvolution.lua
new file mode 100644
index 0000000..4262199
--- /dev/null
+++ b/VolumetricConvolution.lua
@@ -0,0 +1,51 @@
+local VolumetricConvolution, parent = torch.class('nn.VolumetricConvolution', 'nn.Module')
+
+function VolumetricConvolution:__init(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH)
+   parent.__init(self)
+
+   dT = dT or 1
+   dW = dW or 1
+   dH = dH or 1
+
+   self.nInputPlane = nInputPlane
+   self.nOutputPlane = nOutputPlane
+   self.kT = kT
+   self.kW = kW
+   self.kH = kH
+   self.dT = dT
+   self.dW = dW
+   self.dH = dH
+
+   self.weight = torch.Tensor(nOutputPlane, nInputPlane, kT, kH, kW)
+   self.bias = torch.Tensor(nOutputPlane)
+   self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane, kT, kH, kW)
+   self.gradBias = torch.Tensor(nOutputPlane)
+   
+   self:reset()
+end
+
+function VolumetricConvolution:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1/math.sqrt(self.kT*self.kW*self.kH*self.nInputPlane)
+   end
+   self.weight:apply(function()
+                        return torch.uniform(-stdv, stdv)
+                     end)
+   self.bias:apply(function()
+                      return torch.uniform(-stdv, stdv)
+                   end)   
+end
+
+function VolumetricConvolution:updateOutput(input)
+   return input.nn.VolumetricConvolution_updateOutput(self, input)
+end
+
+function VolumetricConvolution:updateGradInput(input, gradOutput)
+   return input.nn.VolumetricConvolution_updateGradInput(self, input, gradOutput)
+end
+
+function VolumetricConvolution:accGradParameters(input, gradOutput, scale)
+   return input.nn.VolumetricConvolution_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/WeightedEuclidean.lua b/WeightedEuclidean.lua
new file mode 100644
index 0000000..2761228
--- /dev/null
+++ b/WeightedEuclidean.lua
@@ -0,0 +1,85 @@
+local WeightedEuclidean, parent = torch.class('nn.WeightedEuclidean', 'nn.Module')
+
+function WeightedEuclidean:__init(inputSize,outputSize)
+   parent.__init(self)
+
+   self.templates = torch.Tensor(inputSize,outputSize)
+   self.gradTemplates = torch.Tensor(inputSize,outputSize)
+
+   self.diagCov = torch.Tensor(inputSize,outputSize)
+   self.gradDiagCov = torch.Tensor(inputSize,outputSize)
+
+   self.gradInput:resize(inputSize)
+   self.output:resize(outputSize)
+   self.temp = torch.Tensor(inputSize)
+
+   -- for compat with Torch's modules (it's bad we have to do that)
+   do
+      self.weight = self.templates
+      self.gradWeight = self.gradTemplates
+      self.bias = self.diagCov
+      self.gradBias = self.gradDiagCov
+   end
+
+   self:reset()
+end
+
+function WeightedEuclidean:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1./math.sqrt(self.templates:size(1))
+   end
+
+   for i=1,self.templates:size(2) do
+      self.templates:select(2, i):apply(function()
+                                        return torch.uniform(-stdv, stdv)
+                                     end)
+   end
+
+   self.diagCov:fill(1)
+end
+
+function WeightedEuclidean:updateOutput(input)
+   self.output:zero()
+   for o = 1,self.templates:size(2) do
+      self.temp:copy(input):add(-1,self.templates:select(2,o))
+      self.temp:cmul(self.temp)
+      self.temp:cmul(self.diagCov:select(2,o)):cmul(self.diagCov:select(2,o))
+      self.output[o] = math.sqrt(self.temp:sumall())
+   end
+   return self.output
+end
+
+function WeightedEuclidean:updateGradInput(input, gradOutput)
+   self:forward(input)
+   self.gradInput:zero()
+   for o = 1,self.templates:size(2) do
+      if self.output[o] ~= 0 then
+         self.temp:copy(input):add(-1,self.templates:select(2,o))
+         self.temp:cmul(self.diagCov:select(2,o)):cmul(self.diagCov:select(2,o))
+         self.temp:mul(gradOutput[o]/self.output[o])
+         self.gradInput:add(self.temp)
+      end
+   end
+   return self.gradInput
+end
+
+function WeightedEuclidean:accGradParameters(input, gradOutput, scale)
+   self:forward(input)
+   scale = scale or 1
+   for o = 1,self.templates:size(2) do
+      if self.output[o] ~= 0 then
+         self.temp:copy(self.templates:select(2,o)):add(-1,input)
+         self.temp:cmul(self.diagCov:select(2,o)):cmul(self.diagCov:select(2,o))
+         self.temp:mul(gradOutput[o]/self.output[o])
+         self.gradTemplates:select(2,o):add(self.temp)
+
+         self.temp:copy(self.templates:select(2,o)):add(-1,input)
+         self.temp:cmul(self.temp)
+         self.temp:cmul(self.diagCov:select(2,o))
+         self.temp:mul(gradOutput[o]/self.output[o])
+         self.gradDiagCov:select(2,o):add(self.temp)
+      end
+   end
+end
diff --git a/dok/abs.png b/dok/abs.png
new file mode 100644
index 0000000..fa7f470
--- /dev/null
+++ b/dok/abs.png
diff --git a/dok/exp.png b/dok/exp.png
new file mode 100644
index 0000000..07d28d4
--- /dev/null
+++ b/dok/exp.png
diff --git a/dok/hshrink.png b/dok/hshrink.png
new file mode 100644
index 0000000..7f96292
--- /dev/null
+++ b/dok/hshrink.png
diff --git a/dok/htanh.png b/dok/htanh.png
new file mode 100644
index 0000000..c8e6084
--- /dev/null
+++ b/dok/htanh.png
diff --git a/dok/index.dok b/dok/index.dok
new file mode 100644
index 0000000..ded5265
--- /dev/null
+++ b/dok/index.dok
@@ -0,0 +1,3053 @@
+====== Neural Network Package =======
+{{anchor:nn.dok}}
+
+This package provides an easy way to build and train simple or complex
+neural networks.
+
+Each module of a network is composed of [[#nn.Modules|Modules]] and there
+are several sub-classes of ''Module'' available: container classes like
+[[#nn.Sequential|Sequential]], [[#nn.Parallel|Parallel]] and
+[[#nn.Concat|Concat]] , which can contain simple layers like
+[[#nn.Linear|Linear]], [[#nn.Mean|Mean]], [[#nn.Max|Max]] and
+[[#nn.Reshape|Reshape]], as well as convolutional layers, and transfer
+functions like [[#nn.Tanh|Tanh]].
+
+Loss functions are implemented as sub-classes of
+[[#nn.Criterions|Criterion]]. They are helpful to train neural network on
+classical tasks.  Common criterions are the Mean Squared Error
+criterion implemented in [[#nn.MSECriterion|MSECriterion]] and the
+cross-entropy criterion implemented in
+[[#nn.ClassNLLCriterion|ClassNLLCriterion]].
+
+Finally, the [[#nn.StochasticGradient|StochasticGradient]] class provides a
+high level way to train the neural network of choice, even though it is
+easy with a simple for loop to [[#nn.DoItYourself|train a neural network yourself]].
+
+For those who want to implement their own modules, we suggest using
+the ''nn.Jacobian'' class for testing the derivatives of their class,
+together with the [[..:torch:tester|torch.Tester]] class. The sources
+of ''nn'' package contains sufficiently many examples of such tests.
+
+
+====== Detailed Overview of the Neural Network Package ======
+{{anchor:nn.overview.dok}}
+
+**Module**
+
+A neural network is called a [[#nn.Module|Module]] (or simply
+//module// in this documentation) in Torch. ''Module'' is an abstract
+class which defines four main methods:
+  * [[#nn.Module.forward|forward(input)]] which computes the output of the module given the ''input'' [[..:torch:tensor|Tensor]].
+  * [[#nn.Module.backward|backward(input, gradOutput)]] which computes the gradients of the module with respect to its own parameters, and its own inputs.
+  * [[#nn.Module.zeroGradParameters|zeroGradParameters()]] which zeroes the gradient with respect to the parameters of the module.
+  * [[#nn.Module.updateParameters|updateParameters(learningRate)]] which updates the parameters after one has computed the gradients with ''backward()''
+
+It also declares two members:
+  * [[#nn.Module.output|output]] which is the output returned by ''forward()''.
+  * [[#nn.Module.gradInput|gradInput]] which contains the gradients with respect to the input of the module, computed in a ''backward()''.
+
+Two other perhaps less used but handy methods are also defined:
+  * [[#nn.Module.share|share(mlp,s1,s2,...,sn)]] which makes this module share the parameters s1,..sn of the module ''mlp''. This is useful if you want to have modules that share the same weights.
+  * [[#nn.Module.clone|clone(...)]] which produces a deep copy of (i.e. not just a pointer to) this Module, including the current state of its parameters (if any).
+
+Some important remarks:
+  * ''output'' contains only valid values after a [[#nn.Module.forward|forward(input)]].
+  * ''gradInput'' contains only valid values after a [[#nn.Module.backward|backward(input, gradOutput)]].
+  * [[#nn.Module.backward|backward(input, gradOutput)]] uses certain computations obtained during [[#nn.Module.forward|forward(input)]]. You //must// call ''forward()'' before calling a ''backward()'', on the //same// ''input'', or your gradients are going to be incorrect!
+
+
+**Plug and play**
+
+Building a simple neural network can be achieved by constructing an available layer.
+A linear neural network (perceptron!) is built only in one line:
+<file lua>
+nn = nn.Linear(10,1) -- perceptron with 10 inputs
+</file>
+
+More complex neural networks are easily built using container classes
+[[#nn.Sequential|Sequential]] and [[#nn.Concat|Concat]]. ''Sequential'' plugs
+layer in a feed-forward fully connected manner. ''Concat'' concatenates in
+one layer several modules: they take the same inputs, and their output is
+concatenated.
+
+Creating a one hidden-layer multi-layer perceptron is thus just as easy as:
+<file lua>
+mlp = nn.Sequential()
+mlp:add( nn.Linear(10, 25) ) -- 10 input, 25 hidden units
+mlp:add( nn.Tanh() ) -- some hyperbolic tangent transfer function
+mlp:add( nn.Linear(25, 1) ) -- 1 output
+</file>
+
+Of course, ''Sequential'' and ''Concat'' can contains other
+''Sequential'' or ''Concat'', allowing you to try the craziest neural
+networks you ever dreamt of! See the [[#nn.Modules|complete list of
+available modules]].
+
+**Training a neural network**
+
+Once you built your neural network, you have to choose a particular
+[[#nn.Criterions|Criterion]] to train it. A criterion is a class which
+describes the cost to be minimized during training.
+
+You can then train the neural network by using the
+[[#nn.StochasticGradient|StochasticGradient]] class.
+
+<file lua>
+ criterion = nn.MSECriterion() -- Mean Squared Error criterion
+ trainer = nn.StochasticGradient(mlp, criterion)
+ trainer:train(dataset) -- train using some examples
+</file>
+
+StochasticGradient expect as a ''dataset'' an object which implements
+the operator ''dataset[index]'' and implements the method
+''dataset:size()''. The ''size()'' methods returns the number of
+examples and ''dataset[i]'' has to return the i-th example.
+
+An ''example'' has to be an object which implements the operator
+''example[field]'', where ''field'' might take the value ''1'' (input
+features) or ''2'' (corresponding label which will be given to the
+criterion).  The input is usually a Tensor (except if you use special
+kind of gradient modules, like [[#nn.TableLayers|table layers]]). The
+label type depends of the criterion.  For example, the
+[[#nn.MSECriterion|MSECriterion]] expect a Tensor, but the
+[[#nn.ClassNLLCriterion|ClassNLLCriterion]] except a integer number (the
+class).
+
+Such a dataset is easily constructed by using Lua tables, but it could
+any ''C'' object for example, as long as required operators/methods
+are implemented.  [[#nn.DoItStochasticGradient|See an example]].
+
+''StochasticGradient'' being written in ''Lua'', it is extremely easy
+to cut-and-paste it and create a variant to it adapted to your needs
+(if the constraints of ''StochasticGradient'' do not satisfy you).
+
+**Low Level Training Of a Neural Network**
+
+If you want to program the ''StochasticGradient'' by hand, you
+essentially need to control the use of forwards and backwards through
+the network yourself.  For example, here is the code fragment one
+would need to make a gradient step given an input ''x'', a desired
+output ''y'', a network ''mlp'' and a given criterion ''criterion''
+and learning rate ''learningRate'':
+
+<file lua>
+function gradUpdate(mlp, x, y, criterion, learningRate) 
+  local pred = mlp:forward(x)
+  local err = criterion:forward(pred, y)
+  local gradCriterion = criterion:backward(pred, y)
+  mlp:zeroGradParameters()
+  mlp:backward(x, gradCriterion)
+  mlp:updateParameters(learningRate)
+end
+</file>
+For example, if you wish to use your own criterion you can simple replace 
+''gradCriterion'' with the gradient vector of your criterion of choice.
+
+
+======  Modules ======
+{{anchor:nn.Modules}}
+
+Modules are bricks to build neural networks. A [[#nn.Module|Module]] is a neural network
+by itself, but it can be combined with other networks using [[#nn.Containers|container classes]] to create
+complex neural networks.
+
+=====  Module =====
+{{anchor:nn.Module}}
+
+''Module'' is an abstract class which defines fundamental methods necessary
+for a training a neural network. Modules are [[..:torch:file#torch.file.serialization|serializable]].
+
+Modules contain two states variables: [[#nn.ModuleOutput|output]] and
+[[#nn.ModuleGradInput|gradInput]].
+
+====  [output] forward(input) ====
+{{anchor:nn.Module.forward}}
+
+Takes an ''input'' object, and computes the corresponding ''output'' of the
+module. In general ''input'' and ''output'' are
+[[..:torch:tensor|Tensors]]. However, some special sub-classes
+like [[#nn.TableLayers|table layers]] might expect something else. Please,
+refer to each module specification for further information.
+
+After a ''forward()'', the [[#nn.ModuleOutput|ouput]] state variable should
+have been updated to the new value.
+
+It is not advised to override this function. Instead, one should
+implement [[#nn.Module.updateOutput|updateOutput(input)]]
+function. The forward module in the abstract parent class
+[[#nn.Module|Module]] will call ''updateOutput(input)''.
+
+====  [gradInput] backward(input, gradOutput) ====
+{{anchor:nn.Module.backward}}
+
+Performs a //backpropagation step// through the module, with respect to the
+given ''input''.  In general this method makes the assumption
+[[#nn.Module.forward|forward(input)]] has been called before, //with the same input//.
+This is necessary for optimization reasons. If you do not respect
+this rule, ''backward()'' will compute incorrect gradients.
+
+In general ''input'' and ''gradOutput''  and ''gradInput'' are
+[[..:torch:tensor|Tensors]]. However, some special sub-classes
+like [[#nn.TableLayers|table layers]] might expect something else. Please,
+refer to each module specification for further information.
+
+A //backpropagation step// consist in computing two kind of gradients
+at ''input'' given ''gradOutput'' (gradients with respect to the
+output of the module).  This function simply performs this task using
+two function calls:
+
+  - A function call to [[#nn.Module.updateGradInput|updateGradInput(input, gradOutput)]].
+  - A function call to [[#nn.Module.accGradParameters|accGradParameters(input,gradOutput)]].
+
+It is not advised to override this function call in custom classes. It
+is better to override
+[[#nn.Module.updateGradInput|updateGradInput(input, gradOutput)]] and
+[[#nn.Module.accGradParameters|accGradParameters(input, gradOutput)]]
+functions.
+
+==== updateOutput(input) ====
+{{anchor:nn.Module.updateOutput}}
+
+Computes the output using the current parameter set of the class and
+input. This function returns the result which is stored in the
+[[#nn.Module.output|output]] field.
+
+==== updateGradInput(input, gradOutput) ====
+{{anchor:nn.Module.updateGradInput}}
+
+Computing the gradient of the module with respect to its own
+input. This is returned in ''gradInput''. Also, the
+[[#nn.Module.gradInput|gradInput]] state variable is updated
+accordingly.
+
+==== accGradParameters(input, gradOutput) ====
+{{anchor:nn.Module.accGradParameters}}
+
+Computing the gradient of the module with respect to its
+ownparameters. Many modules do not perform this step as they do not
+have any parameters. The state variable name for the parameters is
+module dependent. The module is expected to //accumulate// the
+gradients with respect to the parameters in some variable.
+
+Zeroing this accumulation is achieved with
+[[#nn.Module.zeroGradParameters|zeroGradParameters()]] and updating
+the parameters according to this accumulation is done with
+[[#nn.Module.updateParameters|updateParameters()]].
+
+====  zeroGradParameters() ====
+{{anchor:nn.Module.zeroGradParameters}}
+
+If the module has parameters, this will zero the accumulation of the
+gradients with respect to these parameters, accumulated through
+[[#nn.Module.accGradParameters|accGradParameters(input, gradOutput)]]
+calls. Otherwise, it does nothing.
+
+====  updateParameters(learningRate) ====
+{{anchor:nn.Module.updateParameters}}
+
+If the module has parameters, this will update these parameters, according
+to the accumulation of the gradients with respect to these parameters,
+accumulated through [[#nn.Module.backward|backward()]] calls.
+
+The update is basically:
+<file lua>
+parameters = parameters - learningRate * gradients_wrt_parameters
+</file>
+If the module does not have parameters, it does nothing.
+
+==== accUpdateGradParameters(input, gradOutput, learningRate) ====
+{{anchor:nn.Module.accUpdateGradParameters}}
+
+This is a convenience module that performs two functions at
+once. Calculates and accumulates the gradients with respect to the
+weights after mutltiplying with negative of the learning rate
+''learningRate''. Performing these two operations at once is more
+performance efficient and it might be advantageous in certain
+situations.
+
+Keep in mind that, this function uses a simple trick to achieve its
+goal and it might not be valid for a custom module.
+
+<file lua>
+function Module:accUpdateGradParameters(input, gradOutput, lr)
+   local gradWeight = self.gradWeight
+   local gradBias = self.gradBias
+   self.gradWeight = self.weight
+   self.gradBias = self.bias
+   self:accGradParameters(input, gradOutput, -lr)
+   self.gradWeight = gradWeight
+   self.gradBias = gradBias
+end
+</file>
+
+As it can be seen, the gradients are accumulated directly into
+weights. This assumption may not be true for a module that computes a
+nonlinear operation.
+
+==== share(mlp,s1,s2,...,sn) ====
+{{anchor:nn.Module.share}}
+
+This function modifies the parameters of the module named
+''s1'',..''sn'' (if they exist) so that they are shared with (pointers
+to) the parameters with the same names in the given module ''mlp''.
+
+The parameters have to be Tensors. This function is typically used if
+you want to have modules that share the same weights or biases.
+
+Note that this function if called on a [[#nn.Containers|Container]]
+module will share the same parameters for all the contained modules as
+well.
+
+Example:
+<file lua>
+
+-- make an mlp
+mlp1=nn.Sequential(); 
+mlp1:add(nn.Linear(100,10));
+
+-- make a second mlp
+mlp2=nn.Sequential(); 
+mlp2:add(nn.Linear(100,10)); 
+
+-- the second mlp shares the bias of the first
+mlp2:share(mlp1,'bias');
+
+-- we change the bias of the first
+mlp1:get(1).bias[1]=99;
+
+-- and see that the second one's bias has also changed..
+print(mlp2:get(1).bias[1])
+
+</file>
+
+
+====  clone(mlp,...) ====
+{{anchor:nn.Module.clone}}
+
+Creates a deep copy of (i.e. not just a pointer to) the module,
+including the current state of its parameters (e.g. weight, biases
+etc., if any).
+
+If arguments are provided to the ''clone(...)'' function it also calls
+[[#nn.Module.share|share(...)]] with those arguments on the cloned
+module after creating it, hence making a deep copy of this module with
+some shared parameters.
+
+Example:
+<file lua>
+-- make an mlp
+mlp1=nn.Sequential(); 
+mlp1:add(nn.Linear(100,10));
+
+-- make a copy that shares the weights and biases
+mlp2=mlp1:clone('weight','bias');
+
+-- we change the bias of the first mlp
+mlp1:get(1).bias[1]=99;
+
+-- and see that the second one's bias has also changed..
+print(mlp2:get(1).bias[1])
+
+</file>
+
+==== type(type) ====
+{{anchor:nn.Module.type}}
+
+This function converts all the parameters of a module to the given
+''type''. The ''type'' can be one of the types defined for
+[[..:torch:tensor|torch.Tensor]].
+
+==== float() ====
+{{anchor:nn.Module.float}}
+
+Convenience method for calling [[#nn.Module.type|module:type('torch.FloatTensor')]]
+
+==== double() ====
+{{anchor:nn.Module.double}}
+
+Convenience method for calling [[#nn.Module.type|module:type('torch.DoubleTensor')]]
+
+==== cuda() ====
+{{anchor:nn.Module.cuda}}
+
+Convenience method for calling [[#nn.Module.type|module:type('torch.CudaTensor')]]
+
+====  State Variables ====
+{{anchor:nn.statevars.dok}}
+
+These state variables are useful objects if one wants to check the guts of
+a ''Module''. The object pointer is //never// supposed to change. However, its
+contents (including its size if it is a Tensor) are supposed to change.
+
+In general state variables are
+[[..:torch:tensor|Tensors]]. However, some special sub-classes
+like [[#nn.TableLayers|table layers]] contain something else. Please,
+refer to each module specification for further information.
+
+===  output ===
+{{anchor:nn.Module.output}}
+
+This contains the output of the module, computed with the last call of
+[[#nn.Module.forward|forward(input)]].
+
+===  gradInput ===
+{{anchor:nn.Module.gradInput}}
+
+This contains the gradients with respect to the inputs of the module, computed with the last call of
+[[#nn.Module.updateGradInput|updateGradInput(input, gradOutput)]]. 
+
+====  Parameters and gradients w.r.t parameters ====
+
+Some modules contain parameters (the ones that we actually want to
+train!). The name of these parameters, and gradients w.r.t these parameters
+are module dependent.
+
+==== [{weights}, {gradWeights}] parameters() ====
+{{anchor:nn.Module.parameters}}
+
+This function should returns two tables. One for the learnable
+parameters ''{weights}'' and another for the gradients of the energy
+wrt to the learnable parameters ''{gradWeights}''.
+
+For custom modules, it is a good idea to also override this
+function. By default none of the built-in functions/modules use this
+function call, but it is especialy useful when one wants to obtain a
+global view of the whole network.
+
+=====  Containers =====
+{{anchor:nn.Containers}}
+
+====  Concat ====
+{{anchor:nn.Concat}}
+
+<file lua>
+module = nn.Concat(dim)
+</file>
+Concat concatenates the output of one layer of "parallel" modules along the
+provided dimension ''dim'': they take the same inputs, and their output is
+concatenated.
+<file lua>
+mlp=nn.Concat(1);
+mlp:add(nn.Linear(5,3))
+mlp:add(nn.Linear(5,7))
+require "lab"
+print(mlp:forward(lab.randn(5)))
+</file>
+which gives the output:
+<file lua>
+ 0.7486
+ 0.1349
+ 0.7924
+-0.0371
+-0.4794
+ 0.3044
+-0.0835
+-0.7928
+ 0.7856
+-0.1815
+[torch.Tensor of dimension 10]
+</file>
+
+
+====  Sequential ====
+{{anchor:nn.Sequential}}
+
+Sequential provides a means to plug layers together
+in a feed-forward fully connected manner.
+
+E.g. 
+creating a one hidden-layer multi-layer perceptron is thus just as easy as:
+<file lua>
+mlp = nn.Sequential()
+mlp:add( nn.Linear(10, 25) ) -- 10 input, 25 hidden units
+mlp:add( nn.Tanh() ) -- some hyperbolic tangent transfer function
+mlp:add( nn.Linear(25, 1) ) -- 1 output
+
+require "lab"
+print(mlp:forward(lab.randn(10)))
+</file>
+which gives the output:
+<file lua>
+-0.1815
+[torch.Tensor of dimension 1]
+</file>
+
+====  Parallel ====
+{{anchor:nn.Parallel}}
+
+''module'' = ''Parallel(inputDimension,outputDimension)''
+
+Creates a container module that applies its ''ith'' child module to the  ''ith'' slice of the input Tensor by using [[..:torch:tensor#torch.tensor.select|select]] 
+on dimension ''inputDimension''. It concatenates the results of its contained modules together along dimension ''outputDimension''.
+
+Example:
+<file lua>
+ require "lab"
+ mlp=nn.Parallel(2,1);     -- iterate over dimension 2 of input
+ mlp:add(nn.Linear(10,3)); -- apply to first slice
+ mlp:add(nn.Linear(10,2))  -- apply to first second slice
+ print(mlp:forward(lab.randn(10,2)))
+</file>
+gives the output:
+<file lua>
+-0.5300
+-1.1015
+ 0.7764
+ 0.2819
+-0.6026
+[torch.Tensor of dimension 5]
+</file>
+
+A more complicated example:
+<file lua>
+require "lab"
+
+mlp=nn.Sequential();
+c=nn.Parallel(1,2)
+for i=1,10 do
+ local t=nn.Sequential()
+ t:add(nn.Linear(3,2))
+ t:add(nn.Reshape(2,1))
+ c:add(t)
+end
+mlp:add(c)
+
+pred=mlp:forward(lab.randn(10,3))
+print(pred)
+
+for i=1,10000 do     -- Train for a few iterations
+ x=lab.randn(10,3);
+ y=lab.ones(2,10);
+ pred=mlp:forward(x)
+
+ criterion= nn.MSECriterion()
+ local err=criterion:forward(pred,y)
+ local gradCriterion = criterion:backward(pred,y);
+ mlp:zeroGradParameters();
+ mlp:backward(x, gradCriterion); 
+ mlp:updateParameters(0.01);
+ print(err)
+end
+</file>
+=====  Simple layers =====
+{{anchor:nn.simplelayers.dok}}
+====  Linear ====
+{{anchor:nn.Linear}}
+
+''module'' = ''Linear(inputDimension,outputDimension)''
+
+Applies a linear transformation to the incoming data, i.e.  //y=
+Ax+b//. The ''input'' tensor given in ''forward(input)'' must be
+either a vector (1D tensor) or matrix (2D tensor). If the input is a
+matrix, then each row is assumed to be an input sample of given batch.
+
+You can create a layer in the following way:
+<file lua>
+ module= nn.Linear(10,5)  -- 10 inputs, 5 outputs
+</file>
+Usually this would be added to a network of some kind, e.g.:
+<file lua>
+ mlp = nn.Sequential();
+ mlp:add(module)
+</file>
+The weights and biases (//A// and //b//) can be viewed with:
+<file lua>
+ print(module.weight)
+ print(module.bias)
+</file>
+The gradients for these weights can be seen with:
+<file lua>
+ print(module.gradWeight)
+ print(module.gradBias)
+</file>
+As usual with ''nn'' modules,
+applying the linear transformation is performed with:
+<file lua>
+ x=torch.Tensor(10) -- 10 inputs
+ y=module:forward(x)
+</file>
+
+====  SparseLinear ====
+{{anchor:nn.SparseLinear}}
+
+''module'' = ''SparseLinear(inputDimension,outputDimension)''
+
+Applies a linear transformation to the incoming sparse data, i.e.
+//y= Ax+b//. The ''input'' tensor given in ''forward(input)'' must
+be a sparse vector represented as 2D tensor of the form 
+torch.Tensor(N, 2) where the pairs represent indices and values.
+The SparseLinear layer is useful when the number of input 
+dimensions is very large and the input data is sparse.
+
+You can create a sparse linear layer in the following way:
+
+<file lua>
+ module= nn.SparseLinear(10000,2)  -- 10000 inputs, 2 outputs
+</file>
+The sparse linear module may be used as part of a larger network, 
+and apart from the form of the input, 
+[[#nn.SparseLinear|SparseLinear]] 
+operates in exactly the same way as the [[#nn.Linear|Linear]] layer.
+
+A sparse input vector may be created as so..
+<file lua>
+
+ x=lab.new({1, 0.1},{2, 0.3},{10, 0.3},{31, 0.2})
+
+ print(x)
+
+  1.0000   0.1000
+  2.0000   0.3000
+ 10.0000   0.3000
+ 31.0000   0.2000
+[torch.Tensor of dimension 4x2]
+
+</file>
+
+The first column contains indices, the second column contains 
+values in a a vector where all other elements are zeros. The 
+indices should not exceed the stated dimesions of the input to the 
+layer (10000 in the example).
+
+==== Abs ====
+{{anchor:nn.Abs}}
+
+''module'' = ''Abs()''
+
+''output = abs(input)''.
+
+<file lua>
+m=nn.Abs()
+ii=lab.linspace(-5,5)
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+
+{{abs.png?400}}
+
+====  Add  ====
+{{anchor:nn.Add }}
+
+''module'' = ''Add(inputDimension,scalar)''
+
+Applies a bias term to the incoming data, i.e.
+//y_i= x_i + b_i,  or if _scalar=true// then uses a single bias term,
+_y_i= x_i + b. 
+
+Example:
+<file lua>
+y=torch.Tensor(5);  
+mlp=nn.Sequential()
+mlp:add(nn.Add(5))
+
+function gradUpdate(mlp, x, y, criterion, learningRate) 
+  local pred = mlp:forward(x)
+  local err = criterion:forward(pred, y)
+  local gradCriterion = criterion:backward(pred, y)
+  mlp:zeroGradParameters()
+  mlp:backward(x, gradCriterion)
+  mlp:updateParameters(learningRate)
+  return err
+end
+
+for i=1,10000 do
+ x=lab.rand(5)
+ y:copy(x); 
+ for i=1,5 do y[i]=y[i]+i; end
+ err=gradUpdate(mlp,x,y,nn.MSECriterion(),0.01)
+end
+print(mlp:get(1).bias)
+</file>
+gives the output:
+<file lua>
+ 1.0000
+ 2.0000
+ 3.0000
+ 4.0000
+ 5.0000
+[torch.Tensor of dimension 5]
+</file>
+i.e. the network successfully learns the input //x// has been shifted 
+to produce the output //y//.
+
+
+====  Mul ====
+{{anchor:nn.Mul}}
+
+''module'' = ''Mul(inputDimension)''
+
+Applies a //single// scaling factor to the incoming data, i.e.
+//y= w x//, where //w// is a scalar. 
+
+Example:
+<file lua>
+y=torch.Tensor(5);  
+mlp=nn.Sequential()
+mlp:add(nn.Mul(5))
+
+function gradUpdate(mlp, x, y, criterion, learningRate) 
+  local pred = mlp:forward(x)
+  local err = criterion:forward(pred,y)
+  local gradCriterion = criterion:backward(pred,y);
+  mlp:zeroGradParameters();
+  mlp:backward(x, gradCriterion);
+  mlp:updateParameters(learningRate);
+  return err
+end
+
+
+for i=1,10000 do
+ x=lab.rand(5)
+ y:copy(x); y:mul(math.pi);
+ err=gradUpdate(mlp,x,y,nn.MSECriterion(),0.01)
+end
+print(mlp:get(1).weight)
+</file>
+gives the output:
+<file lua>
+ 3.1416
+[torch.Tensor of dimension 1]
+</file>
+i.e. the network successfully learns the input ''x'' has been scaled by
+pi.
+
+====  CMul ====
+{{anchor:nn.CMul  }}
+
+''module'' = ''CMul(inputDimension)''
+
+Applies a component-wise multiplication to the incoming data, i.e.
+''y_i'' = ''w_i'' =x_i=. 
+
+Example:
+<file lua>
+mlp=nn.Sequential()
+mlp:add(nn.CMul(5))
+
+y=torch.Tensor(5); 
+sc=torch.Tensor(5); for i=1,5 do sc[i]=i; end -- scale input with this
+
+function gradUpdate(mlp,x,y,criterion,learningRate) 
+  local pred = mlp:forward(x)
+  local err = criterion:forward(pred,y)
+  local gradCriterion = criterion:backward(pred,y);
+  mlp:zeroGradParameters();
+  mlp:backward(x, gradCriterion);
+  mlp:updateParameters(learningRate);
+  return err
+end
+
+for i=1,10000 do
+ x=lab.rand(5)
+ y:copy(x); y:cmul(sc);
+ err=gradUpdate(mlp,x,y,nn.MSECriterion(),0.01)
+end
+print(mlp:get(1).weight)
+</file>
+gives the output:
+<file lua>
+ 1.0000
+ 2.0000
+ 3.0000
+ 4.0000
+ 5.0000
+[torch.Tensor of dimension 5]
+</file>
+i.e. the network successfully learns the input //x// has been scaled by
+those scaling factors to produce the output //y//.
+
+
+====  Max ====
+{{anchor:nn.Max}}
+
+''module'' = ''Max(dimension)''
+
+Applies a max operation over dimension ''dimension''.
+Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2''
+then an ''nxq'' matrix would be output.
+
+
+====  Min ====
+{{anchor:nn.Min}}
+
+''module'' = ''Min(dimension)''
+
+Applies a min operation over dimension ''dimension''.
+Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2''
+then an ''nxq'' matrix would be output.
+
+
+====  Mean ====
+{{anchor:nn.Mean}}
+
+''module'' = ''Mean(dimension)''
+
+Applies a mean operation over dimension ''dimension''.
+Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2''
+then an ''nxq'' matrix would be output.
+
+====  Sum ====
+{{anchor:nn.Sum}}
+
+''module'' = ''Sum(dimension)''
+
+Applies a sum operation over dimension ''dimension''.
+Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2''
+then an ''nxq'' matrix would be output.
+
+
+====  Euclidean ====
+{{anchor:nn.Euclidean}}
+
+''module'' = ''Euclidean(inputDimension,outputDimension)''
+
+Outputs the Euclidean distance of the input to ''outputDimension'' centers,
+i.e. this layer has the weights ''c_i'', ''i'' = ''1'',..,''outputDimension'', where
+''c_i'' are vectors of dimension ''inputDimension''. Output dimension ''j'' is
+''|| c_i - x||^2'', where ''x'' is the input.
+
+====  WeightedEuclidean ====
+{{anchor:nn.WeightedEuclidean}}
+
+''module'' = ''WeightedEuclidean(inputDimension,outputDimension)''
+
+This module is similar to [[#nn.Euclidian|Euclidian]], but
+additionally learns a separate diagonal covariance matrix across the
+features of the input space for each center.
+
+
+==== Copy ====
+{{anchor:nn.Copy}}
+
+''module'' = ''Copy(inputType,outputType)''
+
+This layer copies the input to output with type casting from input
+type from ''inputType'' to ''outputType''.
+
+
+==== Narrow ====
+{{anchor:nn.Narrow}}
+
+''module'' = ''Narrow(dimension, offset, length)''
+
+Narrow is application of
+[[..:torch:tensor:#torch.Tensor.narrow|narrow]] operation in a
+module.
+
+==== Replicate ====
+{{anchor:nn.Replicate}}
+
+''module'' = ''Replicate(nFeature)''
+
+This class creates an output where the input is replicated
+''nFeature'' times along its first dimension. There is no memory
+allocation or memory copy in this module. It sets the
+[[..:torch:tensor#torch.Tensor.stride|stride]] along the first
+dimension to zero.
+
+<file lua>
+torch> x=lab.linspace(1,5,5)
+torch> =x
+ 1
+ 2
+ 3
+ 4
+ 5
+[torch.DoubleTensor of dimension 5]
+
+torch> m=nn.Replicate(3)
+torch> o=m:forward(x)
+torch> =o
+ 1  2  3  4  5
+ 1  2  3  4  5
+ 1  2  3  4  5
+[torch.DoubleTensor of dimension 3x5]
+
+torch> x:fill(13)
+torch> =x
+ 13
+ 13
+ 13
+ 13
+ 13
+[torch.DoubleTensor of dimension 5]
+
+torch> =o
+ 13  13  13  13  13
+ 13  13  13  13  13
+ 13  13  13  13  13
+[torch.DoubleTensor of dimension 3x5]
+
+</file>
+
+
+====  Reshape ====
+{{anchor:nn.Reshape}}
+
+''module'' = ''Reshape(dimension1, dimension2, ..)''
+
+Reshapes an ''nxpxqx..''  Tensor into a ''dimension1xdimension2x...'' Tensor,
+taking the elements column-wise.
+
+Example:
+<file lua>
+> x=torch.Tensor(4,4)
+> for i=1,4 do
+>  for j=1,4 do
+>   x[i][j]=(i-1)*4+j;
+>  end
+> end
+> print(x)
+
+  1   2   3   4
+  5   6   7   8
+  9  10  11  12
+ 13  14  15  16
+[torch.Tensor of dimension 4x4]
+
+> print(nn.Reshape(2,8):forward(x))
+
+  1   9   2  10   3  11   4  12
+  5  13   6  14   7  15   8  16
+[torch.Tensor of dimension 2x8]
+
+> print(nn.Reshape(8,2):forward(x))
+
+  1   3
+  5   7
+  9  11
+ 13  15
+  2   4
+  6   8
+ 10  12
+ 14  16
+[torch.Tensor of dimension 8x2]
+
+> print(nn.Reshape(16):forward(x))
+
+  1
+  5
+  9
+ 13
+  2
+  6
+ 10
+ 14
+  3
+  7
+ 11
+ 15
+  4
+  8
+ 12
+ 16
+[torch.Tensor of dimension 16]
+
+
+</file>
+
+
+====  Select ====
+{{anchor:nn.Select}}
+
+Selects a dimension and index of a  ''nxpxqx..''  Tensor.
+
+Example:
+<file lua>
+mlp=nn.Sequential();
+mlp:add(nn.Select(1,3))
+
+require "lab"
+x=lab.randn(10,5)
+print(x)
+print(mlp:forward(x))
+</file>
+gives the output:
+<file lua>
+ 0.9720 -0.0836  0.0831 -0.2059 -0.0871
+ 0.8750 -2.0432 -0.1295 -2.3932  0.8168
+ 0.0369  1.1633  0.6483  1.2862  0.6596
+ 0.1667 -0.5704 -0.7303  0.3697 -2.2941
+ 0.4794  2.0636  0.3502  0.3560 -0.5500
+-0.1898 -1.1547  0.1145 -1.1399  0.1711
+-1.5130  1.4445  0.2356 -0.5393 -0.6222
+-0.6587  0.4314  1.1916 -1.4509  1.9400
+ 0.2733  1.0911  0.7667  0.4002  0.1646
+ 0.5804 -0.5333  1.1621  1.5683 -0.1978
+[torch.Tensor of dimension 10x5]
+
+ 0.0369
+ 1.1633
+ 0.6483
+ 1.2862
+ 0.6596
+[torch.Tensor of dimension 5]
+</file>
+
+This can be used in conjunction with [[#nn.Concat|Concat]]
+to emulate the behavior 
+of [[#nn.Parallel|Parallel]], or to select various parts of an input Tensor to 
+perform operations on. Here is a fairly complicated example:
+<file lua>
+require "lab"
+
+mlp=nn.Sequential();
+c=nn.Concat(2) 
+for i=1,10 do
+ local t=nn.Sequential()
+ t:add(nn.Select(1,i))
+ t:add(nn.Linear(3,2)) 
+ t:add(nn.Reshape(2,1))
+ c:add(t)
+end
+mlp:add(c)
+
+pred=mlp:forward(lab.randn(10,3))
+print(pred)
+
+for i=1,10000 do     -- Train for a few iterations
+ x=lab.randn(10,3);
+ y=lab.ones(2,10);
+ pred=mlp:forward(x)
+
+ criterion= nn.MSECriterion()
+ err=criterion:forward(pred,y)
+ gradCriterion = criterion:backward(pred,y);
+ mlp:zeroGradParameters();
+ mlp:backward(x, gradCriterion); 
+ mlp:updateParameters(0.01);
+ print(err)
+end
+</file>
+
+====  Exp ====
+{{anchor:nn.Exp}}
+
+Applies the ''exp'' function element-wise to the input Tensor,
+thus outputting a Tensor of the same dimension.
+<file lua>
+ii=lab.linspace(-2,2)
+m=nn.Exp()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{exp.png?400}}
+
+
+==== Square ====
+{{anchor:nn.Square}}
+
+Takes the square of each element.
+
+<file lua>
+ii=lab.linspace(-5,5)
+m=nn.Square()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{square.png?400}}
+
+==== Sqrt ====
+{{anchor:nn.Sqrt}}
+
+Takes the square root of each element.
+
+<file lua>
+ii=lab.linspace(0,5)
+m=nn.Sqrt()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{sqrt.png?400}}
+
+==== Power ====
+{{anchor:nn.Power}}
+
+''module'' = ''Power(p)''
+
+Raises each element to its ''pth'' power.
+
+<file lua>
+ii=lab.linspace(0,2)
+m=nn.Power(1.25)
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{power.png?400}}
+
+=====  Transfer Function Layers =====
+{{anchor:nn.transfer.dok}}
+
+====  HardTanh ====
+{{anchor:nn.HardTanh}}
+
+Applies the ''HardTanh'' function element-wise to the input Tensor,
+thus outputting a Tensor of the same dimension.
+
+''HardTanh'' is defined as:
+
+  * ''f(x)'' = ''1, if x >''  ''1,''
+  * ''f(x)'' = ''-1, if x <''  ''-1,''
+  * ''f(x)'' = ''x,'' ''otherwise.''
+
+<file lua>
+ii=lab.linspace(-2,2)
+m=nn.HardTanh()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{htanh.png?400}}
+
+
+==== HardShrink ====
+{{anchor:nn.HardShrink}}
+
+''module = nn.HardShrink(lambda)''
+
+Applies the hard shrinkage function element-wise to the input
+[[..:torch:Tensor|Tensor]]. The output is the same size as the input.
+
+''HardShrinkage'' operator is defined as:
+
+  * ''f(x) = x, if x > lambda''
+  * ''f(x) = -x, if < -lambda''
+  * ''f(x) = 0, otherwise''
+
+<file lua>
+ii=lab.linspace(-2,2)
+m=nn.HardShrink(0.85)
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{hshrink.png?400}}
+
+==== SoftShrink ====
+{{anchor:nn.SoftShrink}}
+
+''module = nn.SoftShrink(lambda)''
+
+Applies the hard shrinkage function element-wise to the input
+[[..:torch:Tensor|Tensor]]. The output is the same size as the input.
+
+''HardShrinkage'' operator is defined as:
+
+  * ''f(x) = x-lambda, if x > lambda''
+  * ''f(x) = -x+lambda, if < -lambda''
+  * ''f(x) = 0, otherwise''
+
+<file lua>
+ii=lab.linspace(-2,2)
+m=nn.SoftShrink(0.85)
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{sshrink.png?400}}
+
+
+====  SoftMax ====
+{{anchor:nn.SoftMax}}
+
+Applies the ''Softmax'' function to an n-dimensional input Tensor,
+rescaling them so that the elements of the n-dimensional output Tensor
+lie in the range (0,1) and sum to 1. 
+
+''Softmax'' is defined as ''f_i(x)'' = ''exp(x_i-shift) / sum_j exp(x_j-shift)'',
+where ''shift'' = ''max_i x_i''.
+
+
+<file lua>
+ii=lab.exp(lab.abs(lab.randn(10)))
+m=nn.SoftMax()
+oo=m:forward(ii)
+gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'})
+gnuplot.grid(true)
+</file>
+{{softmax.png?400}}
+
+====  SoftMin ====
+{{anchor:nn.SoftMin}}
+
+Applies the ''Softmin'' function to an n-dimensional input Tensor,
+rescaling them so that the elements of the n-dimensional output Tensor
+lie in the range (0,1) and sum to 1. 
+
+''Softmin'' is defined as ''f_i(x)'' = ''exp(-x_i-shift) / sum_j exp(-x_j-shift)'',
+where ''shift'' = ''max_i x_i''.
+
+
+<file lua>
+ii=lab.exp(lab.abs(lab.randn(10)))
+m=nn.SoftMin()
+oo=m:forward(ii)
+gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'})
+gnuplot.grid(true)
+</file>
+{{softmin.png?400}}
+
+====  SoftPlus ====
+{{anchor:nn.SoftPlus}}
+
+Applies the ''SoftPlus'' function to an n-dimensioanl input Tensor.
+Can be used to constrain the output of a machine to always be positive.
+
+''SoftPlus'' is defined as ''f_i(x)'' = ''log(1 + exp(x_i)))''.
+
+<file lua>
+ii=lab.randn(10)
+m=nn.SoftPlus()
+oo=m:forward(ii)
+go=lab.ones(10)
+gi=m:backward(ii,go)
+gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'},{'gradInput',gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{softplus.png?400}}
+
+==== SoftSign ====
+{{anchor:nn.SoftSign}}
+
+Applies the ''SoftSign'' function to an n-dimensioanl input Tensor.
+
+''SoftSign'' is defined as ''f_i(x) = x_i / (1+|x_i|)''
+
+<file lua>
+ii=lab.linspace(-5,5)
+m=nn.SoftSign()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{softsign.png?400}}
+
+====  LogSigmoid ====
+{{anchor:nn.LogSigmoid}}
+
+Applies the ''LogSigmoid'' function to an n-dimensional input Tensor.
+
+''LogSigmoid'' is defined as ''f_i(x)'' = ''log(1/(1+ exp(-x_i)))''.
+
+
+<file lua>
+ii=lab.randn(10)
+m=nn.LogSigmoid()
+oo=m:forward(ii)
+go=lab.ones(10)
+gi=m:backward(ii,go)
+gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'},{'gradInput',gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{logsigmoid.png?400}}
+
+
+====  LogSoftMax ====
+{{anchor:nn.LogSoftMax}}
+
+Applies the ''LogSoftmax'' function to an n-dimensional input Tensor.
+
+''LogSoftmax'' is defined as ''f_i(x)'' = ''log(1/a exp(x_i))'',
+where  ''a'' = ''sum_j exp(x_j)''.
+
+<file lua>
+ii=lab.randn(10)
+m=nn.LogSoftMax()
+oo=m:forward(ii)
+go=lab.ones(10)
+gi=m:backward(ii,go)
+gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'},{'gradInput',gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{logsoftmax.png?400}}
+
+====  Sigmoid ====
+{{anchor:nn.Sigmoid}}
+
+Applies the ''Sigmoid'' function element-wise to the input Tensor,
+thus outputting a Tensor of the same dimension.
+
+''Sigmoid'' is defined as ''f(x)'' = ''1/(1+exp(-x))''.
+
+<file lua>
+ii=lab.linspace(-5,5)
+m=nn.Sigmoid()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{sigmoid.png?400}}
+
+====  Tanh ====
+{{anchor:nn.Tanh}}
+
+Applies the ''Tanh'' function element-wise to the input Tensor,
+thus outputting a Tensor of the same dimension.
+
+<file lua>
+ii=lab.linspace(-3,3)
+m=nn.Tanh()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{tanh.png?400}}
+
+=====  Convolutional layers =====
+{{anchor:nn.convlayers.dok}}
+
+SpatialConvolution and SpatialSubsampling apply to inputs with
+two-dimensional relationships (e.g. images).  TemporalConvolution and
+TemporalSubsampling apply to sequences with a one-dimensional
+relationship (e.g. strings of some kind).
+
+For spatial convolutional layers, the input is supposed to be 3D. The
+first dimension is the number of features, the last two dimenstions
+are spatial.
+
+====  SpatialConvolution ====
+{{anchor:nn.SpatialConvolution}}
+
+<file lua>
+module = nn.SpatialConvolution(nInputPlane, nOutputPlane, kW, kH, [dW], [dH])
+</file>
+
+Applies a 2D convolution over an input image composed of several input planes. The ''input'' tensor in
+''forward(input)'' is expected to be a 3D tensor (''width x height x nInputPlane'').
+
+The parameters are the following:
+  * ''nInputPlane'': The number of expected input planes in the image given into ''forward()''.
+  * ''nOutputPlane'': The number of output planes the convolution layer will produce.
+  * ''kW'': The kernel width of the convolution
+  * ''kH'': The kernel height of the convolution
+  * ''dW'': The step of the convolution in the width dimension. Default is ''1''.
+  * ''dH'': The step of the convolution in the height dimension. Default is ''1''.
+
+Note that depending of the size of your kernel, several (of the last)
+columns or rows of the input image might be lost. It is up to the user to
+add proper padding in images.
+
+If the input image is a 3D tensor ''nInputPlane x width x height'', the output image size
+will be ''nOutputPlane x owidth x oheight'' where
+<file lua>
+owidth  = (width  - kW) / dW + 1
+oheight = (height - kH) / dH + 1 .
+</file>
+
+The parameters of the convolution can be found in ''self.weight'' (Tensor of
+size ''nOutputPlane x nInputPlane x kH x kW'') and ''self.bias'' (Tensor of
+size ''nOutputPlane''). The corresponding gradients can be found in
+''self.gradWeight'' and ''self.gradBias''.
+
+The output value of the layer can be precisely described as:
+<file lua>
+output[i][j][k] = bias[k]
+  + sum_l sum_{s=1}^kW sum_{t=1}^kH weight[s][t][l][k]
+                                    * input[dW*(i-1)+s)][dH*(j-1)+t][l]
+</file>
+
+====  SpatialConvolutionMap ====
+{{anchor:nn.SpatialConvolutionMap}}
+
+<file lua>
+module = nn.SpatialConvolutionMap(connectionMatrix, kW, kH, [dW], [dH])
+</file>
+
+This class is a generalization of
+[[#nn.SpatialConvolution|nn.SpatialConvolution]]. It uses a geenric
+connection table between input and output features. The
+[[#nn.SpatialConvolution|nn.SpatialConvolution]] is equivalent to
+using a [[#nn.tables.full|full connection table]]. One can specify
+different types of connection tables.
+
+=== Full Connection Table ===
+{{anchor:nn.tables.full}}
+
+''table = nn.tables.full(nin,nout)''
+
+This is a precomputed table that specifies connections between every
+input and output node.
+
+=== One to One Connection Table ===
+{{anchor:nn.tables.onetoone}}
+
+''table = nn.tables.oneToOne(n)''
+
+This is a precomputed table that specifies a single connection to each
+output node from corresponding input node.
+
+=== Random Connection Table ===
+{{anchor:nn.tables.random}}
+
+''table = nn.tables.random(nin,nout, nto)''
+
+This table is randomly populated such that each output unit has
+''nto'' incoming connections. The algorihtm tries to assign uniform
+number of outgoing connections to each input node if possible.
+
+==== SpatialLPPooling ====
+{{anchor:nn.SpatialLPPooling}}
+
+<file lua>
+module = nn.SpatialLPPooling(nInputPlane, pnorm, kW, kH, [dW], [dH])
+</file>
+
+Computes the ''p'' norm in a convolutional manner on a set of 2D input planes.
+
+==== SpatialMaxPooling ====
+{{anchor:nn.SpatialMaxPooling}}
+
+<file lua>
+module = nn.SpatialMaxPooling(kW, kH [, dW, dH])
+</file>
+
+Applies 2D max-pooling operation in ''kWxkH'' regions by step size
+''dWxdH'' steps. The number of output features is equal to the number of
+input planes.
+
+====  SpatialSubSampling ====
+{{anchor:nn.SpatialSubSampling}}
+
+<file lua>
+module = nn.SpatialSubSampling(nInputPlane, kW, kH, [dW], [dH])
+</file>
+
+Applies a 2D sub-sampling over an input image composed of several input planes. The ''input'' tensor in
+''forward(input)'' is expected to be a 3D tensor (''nInputPlane x width x height''). The number of output
+planes will be the same as ''nInputPlane''.
+
+The parameters are the following:
+  * ''nInputPlane'': The number of expected input planes in the image given into ''forward()''.
+  * ''kW'': The kernel width of the sub-sampling
+  * ''kH'': The kernel height of the sub-sampling
+  * ''dW'': The step of the sub-sampling in the width dimension. Default is ''1''.
+  * ''dH'': The step of the sub-sampling in the height dimension. Default is ''1''.
+
+Note that depending of the size of your kernel, several (of the last)
+columns or rows of the input image might be lost. It is up to the user to
+add proper padding in images.
+
+If the input image is a 3D tensor ''width x height x nInputPlane'', the output image size
+will be ''owidth x oheight x nInputPlane'' where
+<file lua>
+owidth  = (width  - kW) / dW + 1
+oheight = (height - kH) / dH + 1 .
+</file>
+
+The parameters of the sub-sampling can be found in ''self.weight'' (Tensor of
+size ''nInputPlane'') and ''self.bias'' (Tensor of size ''nInputPlane''). The
+corresponding gradients can be found in ''self.gradWeight'' and
+''self.gradBias''.
+
+The output value of the layer can be precisely described as:
+<file lua>
+output[i][j][k] = bias[k]
+  + weight[k] sum_{s=1}^kW sum_{t=1}^kH input[dW*(i-1)+s)][dH*(j-1)+t][k]
+</file>
+
+==== SpatialZeroPadding ====
+{{anchor:nn.SpatialZeroPadding}}
+
+<file lua>
+module = nn.SpatialZeroPadding(padLeft, padRight, padTop, padBottom)
+</file>
+
+Each feature map of a given input is padded with specified number of
+zeros. If padding values are negative, then input is cropped.
+
+==== SpatialSubtractiveNormalization ====
+{{anchor:nn.SpatialSubtractiveNormalization}}
+
+<file lua>
+module = nn.SpatialSubtractiveNormalization(ninputplane, kernel)
+</file>
+
+Applies a spatial subtraction operation on a series of 2D inputs using
+''kernel'' for computing the weighted average in a neighborhood. The
+neighborhood is defined for a local spatial region that is the size as
+kernel and across all features. For a an input image, since there is
+only one feature, the region is only spatial. For an RGB image, the
+weighted anerage is taken over RGB channels and a spatial region.
+
+If the ''kernel'' is 1D, then it will be used for constructing and seperable
+2D kernel. The operations will be much more efficient in this case.
+
+The kernel is generally chosen as a gaussian when it is believed that
+the correlation of two pixel locations decrease with increasing
+distance. On the feature dimension, a uniform average is used since
+the weighting across features is not known.
+
+For this example we use an external package
+[[http://www.github.com/clementfarabet/lua---image/|image]]
+
+<file lua>
+require 'image'
+require 'nn'
+lena = image.rgb2y(image.lena())
+ker = lab.ones(11)
+m=nn.SpatialSubtractiveNormalization(1,ker)
+processed = m:forward(lena)
+w1=image.display(lena)
+w2=image.display(processed)
+</file>
+{{lena.jpg?300}}{{lenap.jpg?300}}
+
+====  TemporalConvolution ====
+{{anchor:nn.TemporalConvolution}}
+
+<file lua>
+module = nn.TemporalConvolution(inputFrameSize, outputFrameSize, kW, [dW])
+</file>
+
+Applies a 1D convolution over an input sequence composed of ''nInputFrame'' frames. The ''input'' tensor in
+''forward(input)'' is expected to be a 2D tensor (''nInputFrame x inputFrameSize'').
+
+The parameters are the following:
+  * ''inputFrameSize'': The input frame size expected in sequences given into ''forward()''.
+  * ''outputFrameSize'': The output frame size the convolution layer will produce.
+  * ''kW'': The kernel width of the convolution
+  * ''dW'': The step of the convolution. Default is ''1''.
+
+Note that depending of the size of your kernel, several (of the last)
+frames of the sequence might be lost. It is up to the user to add proper padding frames in the input
+sequences.
+
+If the input sequence is a 2D tensor ''inputFrameSize x nInputFrame'', the output sequence will be
+''nOutputFrame x outputFrameSize'' where
+<file lua>
+nOutputFrame = (nInputFrame - kW) / dW + 1
+</file>
+
+The parameters of the convolution can be found in ''self.weight'' (Tensor of
+size ''outputFrameSize x (inputFrameSize x kW) '') and ''self.bias'' (Tensor of
+size ''outputFrameSize''). The corresponding gradients can be found in
+''self.gradWeight'' and ''self.gradBias''.
+
+The output value of the layer can be precisely described as:
+<file lua>
+output[i][t] = bias[i]
+  + sum_j sum_{k=1}^kW weight[j][k][i]
+                                * input[j][dW*(t-1)+k)]
+</file>
+
+Here is a simple example:
+
+<file lua>
+inp=5;  -- dimensionality of one sequence element 
+outp=1; -- number of derived features for one sequence element
+kw=1;   -- kernel only operates on one sequence element at once
+dw=1;   -- we step once and go on to the next sequence element
+
+mlp=nn.TemporalConvolution(inp,outp,kw,dw)
+
+require "lab"
+x=lab.rand(7,inp) -- a sequence of 7 elements
+print(mlp:forward(x))
+</file>
+which gives:
+<file lua>
+-0.9109
+-0.9872
+-0.6808
+-0.9403
+-0.9680 
+-0.6901 
+-0.6387
+[torch.Tensor of dimension 7x1]
+</file>
+
+This is equivalent to:
+<file lua>
+weights=lab.reshape(mlp.weight,inp) -- weights applied to all
+bias= mlp.bias[1];
+for i=1,x:size(1) do -- for each sequence element
+  element= x[i]; -- features of ith sequence element
+  print(element:dot(weights) + bias)
+end
+</file>
+which gives:
+<file lua>
+-0.91094998687717
+-0.98721705771773
+-0.68075004276185
+-0.94030132495887
+-0.96798754116609
+-0.69008470895581
+-0.63871422284166
+</file>
+
+
+====  TemporalSubSampling ====
+{{anchor:nn.TemporalSubSampling}}
+
+<file lua>
+module = nn.TemporalSubSampling(inputFrameSize, kW, [dW])
+</file>
+
+Applies a 1D sub-sampling over an input sequence composed of ''nInputFrame'' frames. The ''input'' tensor in
+''forward(input)'' is expected to be a 2D tensor (''nInputFrame x inputFrameSize''). The output frame size
+will be the same as the input one (''inputFrameSize'').
+
+The parameters are the following:
+  * ''inputFrameSize'': The input frame size expected in sequences given into ''forward()''.
+  * ''kW'': The kernel width of the sub-sampling
+  * ''dW'': The step of the sub-sampling. Default is ''1''.
+
+Note that depending of the size of your kernel, several (of the last)
+frames of the sequence might be lost. It is up to the user to add proper padding frames in the input
+sequences.
+
+If the input sequence is a 2D tensor ''nInputFrame x inputFrameSize'', the output sequence will be
+''inputFrameSize x nOutputFrame'' where
+<file lua>
+nOutputFrame = (nInputFrame - kW) / dW + 1
+</file>
+
+The parameters of the sub-sampling can be found in ''self.weight'' (Tensor of
+size ''inputFrameSize'') and ''self.bias'' (Tensor of
+size ''inputFrameSize''). The corresponding gradients can be found in
+''self.gradWeight'' and ''self.gradBias''.
+
+The output value of the layer can be precisely described as:
+<file lua>
+output[i][t] = bias[i] + weight[i] * sum_{k=1}^kW input[i][dW*(t-1)+k)]
+</file>
+
+====  LookupTable ====
+{{anchor:nn.LookupTable}}
+
+<file lua>
+module = nn.LookupTable(nIndex, sizes)
+</file>
+or
+<file lua>
+module = nn.LookupTable(nIndex, size1, [size2], [size3], ...)
+</file>
+
+This layer is a particular case of a convolution, where the width of the convolution would be ''1''.
+When calling ''forward(input)'', it assumes ''input'' is a 1D tensor filled with indices. Indices start
+at ''1'' and can go up to ''nIndex''. For each index, it outputs a corresponding ''Tensor'' of size
+specified by ''sizes'' (an ''LongStorage'') or ''size1 x size2 x...''.
+
+The output tensors are concatenated, generating a ''size1 x size2 x ... x sizeN x n'' tensor, where ''n''
+is the size of the ''input'' tensor.
+
+When only ''size1'' is provided, this is equivalent to do the following matrix-matrix multiplication
+in an efficient manner:
+<file lua>
+M P
+</file>
+where ''M'' is a 2D matrix ''size1 x nIndex'' containing the parameters of the lookup-table and
+''P'' is a 2D matrix, where each column vector ''i'' is a zero vector except at index ''input[i]'' where it is ''1''.
+
+Example:
+<file lua>
+ -- a lookup table containing 10 tensors of size 3
+ module = nn.LookupTable(10, 3) 
+
+ input = torch.Tensor(4)
+ input[1] = 1; input[2] = 2; input[3] = 1; input[4] = 10;
+ print(module:forward(input))
+</file>
+
+Outputs something like:
+<file lua>
+-0.1784  2.2045 -0.1784 -0.2475
+-1.0120  0.0537 -1.0120 -0.2148
+-1.2840  0.8685 -1.2840 -0.2792
+[torch.Tensor of dimension 3x4]
+</file>
+Note that the first column vector is the same than the 3rd one!
+
+=====  Layers for manipulating tables =====
+{{anchor:nn.TableLayers}}
+
+This set of modules allows the manipulation of  Tables
+through the layers of a neural network.
+This allows one to build very rich architectures.
+
+Table-based modules work by supporting forward and backward methods that can accept 
+tables as inputs. It turns out that the usual [[#nn.Sequential|Sequential]] module can do this, so all that is needed is other child modules that take advantage of such tables.
+<file lua>
+mlp = nn.Sequential();
+t={x,y,z}
+pred=mlp:forward(t)
+pred=mlp:forward{x,y,z}      -- This is equivalent to the line before
+</file>
+
+====  ConcatTable  ====
+{{anchor:nn.ConcatTable}}
+
+ConcatTable is a container module that applies each member module to 
+the same input Tensor.
+
+Example:
+<file lua>
+mlp= nn.ConcatTable()
+mlp:add(nn.Linear(5,2))
+mlp:add(nn.Linear(5,3))
+
+require "lab"
+pred=mlp:forward(lab.randn(5));
+for i,k in pairs(pred) do print(i,k); end
+</file>
+which gives the output:
+<file lua>
+1
+-0.4073
+ 0.0110
+[torch.Tensor of dimension 2]
+
+2
+ 0.0027
+-0.0598
+-0.1189
+[torch.Tensor of dimension 3] 
+</file>
+
+====  ParallelTable ====
+{{anchor:nn.ParallelTable}}
+
+ParallelTable is a container module that, in its ''forward'' method, applies the ''ith'' member module to the ''ith'' input, and outputs a table of the set of outputs. 
+
+Example:
+<file lua>
+mlp= nn.ParallelTable()
+mlp:add(nn.Linear(10,2))
+mlp:add(nn.Linear(5,3))
+
+require "lab"
+x=lab.randn(10)
+y=lab.rand(5)
+
+pred=mlp:forward{x,y}
+for i,k in pairs(pred) do print(i,k); end
+</file>
+which gives the output:
+<file lua>
+1
+ 0.0331
+ 0.7003
+[torch.Tensor of dimension 2]
+
+2
+ 0.0677
+-0.1657
+-0.7383
+[torch.Tensor of dimension 3]
+</file>
+
+====  SplitTable  ====
+{{anchor:nn.SplitTable}}
+
+''module'' = ''SplitTable(dimension)''
+
+Creates a module that takes a Tensor as input and outputs several tables, splitting the Tensor along dimension ''dimension''.
+
+Example 1:
+<file lua>
+require "lab"
+mlp=nn.SplitTable(2)
+x=lab.randn(4,3)
+pred=mlp:forward(x)
+for i,k in pairs(pred) do print(i,k); end
+</file>
+gives the output:
+<file lua>
+1
+ 1.3885
+ 1.3295
+ 0.4281
+-1.0171
+[torch.Tensor of dimension 4]
+
+2
+-1.1565
+-0.8556
+-1.0717
+-0.8316
+[torch.Tensor of dimension 4]
+
+3
+-1.3678
+-0.1709
+-0.0191
+-2.5871
+[torch.Tensor of dimension 4]
+</file>
+
+Example 2:
+<file lua>
+require "lab"
+mlp=nn.SplitTable(1)
+pred=mlp:forward(lab.randn(10,3))
+for i,k in pairs(pred) do print(i,k); end
+</file>
+gives the output:
+<file lua>
+1
+ 1.6114
+ 0.9038
+ 0.8419
+[torch.Tensor of dimension 3]
+
+2
+ 2.4742
+ 0.2208
+ 1.6043
+[torch.Tensor of dimension 3]
+
+3
+ 1.3415
+ 0.2984
+ 0.2260
+[torch.Tensor of dimension 3]
+
+4
+ 2.0889
+ 1.2309
+ 0.0983
+[torch.Tensor of dimension 3]
+</file>
+
+A more complicated example:
+<file lua>
+require "lab"
+
+mlp=nn.Sequential();       --Create a network that takes a Tensor as input
+mlp:add(nn.SplitTable(2))
+ c=nn.ParallelTable()      --The two Tensors go through two different Linear
+ c:add(nn.Linear(10,3))	   --Layers in Parallel
+ c:add(nn.Linear(10,7))
+mlp:add(c)                 --Outputing a table with 2 elements
+ p=nn.ParallelTable()      --These tables go through two more linear layers
+ p:add(nn.Linear(3,2))	   -- separately.
+ p:add(nn.Linear(7,1)) 
+mlp:add(p) 
+mlp:add(nn.JoinTable(1))   --Finally, the tables are joined together and output. 
+
+pred=mlp:forward(lab.randn(10,2))
+print(pred)
+
+for i=1,100 do             -- A few steps of training such a network.. 
+ x=lab.ones(10,2);
+ y=torch.Tensor(3); y:copy(x:select(2,1,1):narrow(1,1,3))
+ pred=mlp:forward(x)
+
+ criterion= nn.MSECriterion()
+ local err=criterion:forward(pred,y)
+ local gradCriterion = criterion:backward(pred,y);
+ mlp:zeroGradParameters();
+ mlp:backward(x, gradCriterion); 
+ mlp:updateParameters(0.05);
+
+ print(err)
+end
+</file>
+
+====  JoinTable   ====
+{{anchor:nn.JoinTable}}
+
+''module'' = ''JoinTable(dimension)''
+
+Creates a module that takes a list of Tensors as input and outputs a Tensor by joining them together along dimension ''dimension''.
+
+Example:
+<file lua>
+require "lab"
+x=lab.randn(5,1)
+y=lab.randn(5,1)
+z=lab.randn(2,1)
+
+print(nn.JoinTable(1):forward{x,y})
+print(nn.JoinTable(2):forward{x,y})
+print(nn.JoinTable(1):forward{x,z})
+</file>
+gives the output:
+<file lua>
+1.3965
+ 0.5146
+-1.5244
+-0.9540
+ 0.4256
+ 0.1575
+ 0.4491
+ 0.6580
+ 0.1784
+-1.7362
+ 
+ 1.3965  0.1575
+ 0.5146  0.4491
+-1.5244  0.6580
+-0.9540  0.1784
+ 0.4256 -1.7362
+
+ 1.3965
+ 0.5146
+-1.5244
+-0.9540
+ 0.4256
+-1.2660
+ 1.0869
+[torch.Tensor of dimension 7x1]
+</file>
+
+A more complicated example:
+<file lua>
+require "lab"
+
+mlp=nn.Sequential();       --Create a network that takes a Tensor as input
+ c=nn.ConcatTable()        --The same Tensor goes through two different Linear
+ c:add(nn.Linear(10,3))	   --Layers in Parallel
+ c:add(nn.Linear(10,7))
+mlp:add(c)                 --Outputing a table with 2 elements
+ p=nn.ParallelTable()      --These tables go through two more linear layers
+ p:add(nn.Linear(3,2))	   -- separately.
+ p:add(nn.Linear(7,1)) 
+mlp:add(p) 
+mlp:add(nn.JoinTable(1))   --Finally, the tables are joined together and output. 
+
+pred=mlp:forward(lab.randn(10))
+print(pred)
+
+for i=1,100 do             -- A few steps of training such a network.. 
+ x=lab.ones(10);
+ y=torch.Tensor(3); y:copy(x:narrow(1,1,3))
+ pred=mlp:forward(x)
+
+ criterion= nn.MSECriterion()
+ local err=criterion:forward(pred,y)
+ local gradCriterion = criterion:backward(pred,y);
+ mlp:zeroGradParameters();
+ mlp:backward(x, gradCriterion); 
+ mlp:updateParameters(0.05);
+
+ print(err)
+end
+</file>
+
+====  Identity  ====
+{{anchor:nn.Identity}}
+
+''module'' = ''Identity()''
+
+Creates a module that returns whatever is input to it as output. 
+This is useful when combined with the module 
+[[#nn.ParallelTable|ParallelTable]]
+in case you do not wish to do anything to one of the input Tensors.
+Example:
+<file lua>
+require "lab"
+mlp=nn.Identity()
+print(mlp:forward(lab.ones(5,2)))
+</file>
+gives the output: 
+<file lua>
+ 1  1
+ 1  1
+ 1  1
+ 1  1
+ 1  1
+[torch.Tensor of dimension 5x2]
+</file>
+
+Here is a more useful example, where one can implement a network which also computes a Criterion using this module:
+<file lua> 
+pred_mlp=nn.Sequential(); -- A network that makes predictions given x.
+pred_mlp:add(nn.Linear(5,4)) 
+pred_mlp:add(nn.Linear(4,3)) 
+
+xy_mlp=nn.ParallelTable();-- A network for predictions and for keeping the
+xy_mlp:add(pred_mlp)      -- true label for comparison with a criterion
+xy_mlp:add(nn.Identity()) -- by forwarding both x and y through the network.
+
+mlp=nn.Sequential();     -- The main network that takes both x and y.
+mlp:add(xy_mlp)		 -- It feeds x and y to parallel networks;
+cr=nn.MSECriterion();
+cr_wrap=nn.CriterionTable(cr)
+mlp:add(cr_wrap)         -- and then applies the criterion.
+
+for i=1,100 do 		 -- Do a few training iterations
+  x=lab.ones(5);          -- Make input features.
+  y=torch.Tensor(3); 
+  y:copy(x:narrow(1,1,3)) -- Make output label.
+  err=mlp:forward{x,y}    -- Forward both input and output.
+  print(err)		 -- Print error from criterion.
+
+  mlp:zeroGradParameters();  -- Do backprop... 
+  mlp:backward({x, y} );   
+  mlp:updateParameters(0.05); 
+end
+</file>
+
+====  PairwiseDistance  ====
+{{anchor:nn.PairwiseDistance}}
+
+''module'' = ''PairwiseDistance(p)'' creates a module that takes a table of two vectors as input and outputs the distance between them using the ''p''-norm. 
+
+Example:
+<file lua>
+mlp_l1=nn.PairwiseDistance(1)
+mlp_l2=nn.PairwiseDistance(2)
+x=lab.new(1,2,3) 
+y=lab.new(4,5,6)
+print(mlp_l1:forward({x,y}))
+print(mlp_l2:forward({x,y}))
+</file>
+gives the output:
+<file lua>
+ 9
+[torch.Tensor of dimension 1]
+
+ 5.1962
+[torch.Tensor of dimension 1]
+</file>
+
+A more complicated example:
+<file lua>
+-- imagine we have one network we are interested in, it is called "p1_mlp"
+p1_mlp= nn.Sequential(); p1_mlp:add(nn.Linear(5,2))
+
+-- But we want to push examples towards or away from each other
+-- so we make another copy of it called p2_mlp
+-- this *shares* the same weights via the set command, but has its own set of temporary gradient storage
+-- that's why we create it again (so that the gradients of the pair don't wipe each other)
+p2_mlp= nn.Sequential(); p2_mlp:add(nn.Linear(5,2))
+p2_mlp:get(1).weight:set(p1_mlp:get(1).weight)
+p2_mlp:get(1).bias:set(p1_mlp:get(1).bias)
+
+-- we make a parallel table that takes a pair of examples as input. they both go through the same (cloned) mlp
+prl = nn.ParallelTable()
+prl:add(p1_mlp)
+prl:add(p2_mlp)
+
+-- now we define our top level network that takes this parallel table and computes the pairwise distance betweem
+-- the pair of outputs
+mlp= nn.Sequential()
+mlp:add(prl)
+mlp:add(nn.PairwiseDistance(1))
+
+-- and a criterion for pushing together or pulling apart pairs
+crit=nn.HingeEmbeddingCriterion(1)
+
+-- lets make two example vectors
+x=lab.rand(5)
+y=lab.rand(5)
+
+
+-- Use a typical generic gradient update function
+function gradUpdate(mlp, x, y, criterion, learningRate)
+local pred = mlp:forward(x)
+local err = criterion:forward(pred, y)
+local gradCriterion = criterion:backward(pred, y)
+mlp:zeroGradParameters()
+mlp:backward(x, gradCriterion)
+mlp:updateParameters(learningRate)
+end
+
+-- push the pair x and y together, notice how then the distance between them given
+-- by  print(mlp:forward({x,y})[1]) gets smaller
+for i=1,10 do
+gradUpdate(mlp,{x,y},1,crit,0.01)
+print(mlp:forward({x,y})[1])
+end
+
+
+-- pull apart the pair x and y, notice how then the distance between them given
+-- by  print(mlp:forward({x,y})[1]) gets larger
+
+for i=1,10 do
+gradUpdate(mlp,{x,y},-1,crit,0.01)
+print(mlp:forward({x,y})[1])
+end
+
+</file>
+
+====  DotProduct ====
+{{anchor:nn.DotProduct}}
+
+''module'' = ''DotProduct()'' creates a module that takes a table of two vectors as input and outputs the dot product between them.
+
+Example:
+<file lua>
+mlp=nn.DotProduct()
+x=lab.new(1,2,3) 
+y=lab.new(4,5,6)
+print(mlp:forward({x,y}))
+</file>
+gives the output:
+<file lua>
+ 32
+[torch.Tensor of dimension 1]
+</file>
+
+
+A more complicated example:
+<file lua>
+
+-- Train a ranking function so that mlp:forward({x,y},{x,z}) returns a number
+-- which indicates whether x is better matched with y or z (larger score = better match), or vice versa.
+
+mlp1=nn.Linear(5,10)
+mlp2=mlp1:clone('weight','bias')
+
+prl=nn.ParallelTable();
+prl:add(mlp1); prl:add(mlp2)
+
+mlp1=nn.Sequential()
+mlp1:add(prl)
+mlp1:add(nn.DotProduct())
+
+mlp2=mlp1:clone('weight','bias')
+
+mlp=nn.Sequential()
+prla=nn.ParallelTable()
+prla:add(mlp1)
+prla:add(mlp2)
+mlp:add(prla)
+
+x=lab.rand(5); 
+y=lab.rand(5)
+z=lab.rand(5)
+
+
+print(mlp1:forward{x,x})
+print(mlp1:forward{x,y})
+print(mlp1:forward{y,y})
+
+
+crit=nn.MarginRankingCriterion(1); 
+
+-- Use a typical generic gradient update function
+function gradUpdate(mlp, x, y, criterion, learningRate)
+   local pred = mlp:forward(x)
+   local err = criterion:forward(pred, y)
+   local gradCriterion = criterion:backward(pred, y)
+   mlp:zeroGradParameters()
+   mlp:backward(x, gradCriterion)
+   mlp:updateParameters(learningRate)
+end
+
+inp={{x,y},{x,z}}
+
+math.randomseed(1)
+
+-- make the pair x and y have a larger dot product than x and z
+
+for i=1,100 do
+   gradUpdate(mlp,inp,1,crit,0.05)
+   o1=mlp1:forward{x,y}[1]; 
+   o2=mlp2:forward{x,z}[1]; 
+   o=crit:forward(mlp:forward{{x,y},{x,z}},1)
+   print(o1,o2,o)
+end
+
+print "******************"
+
+-- make the pair x and z have a larger dot product than x and y
+
+for i=1,100 do
+   gradUpdate(mlp,inp,-1,crit,0.05)
+   o1=mlp1:forward{x,y}[1]; 
+   o2=mlp2:forward{x,z}[1]; 
+   o=crit:forward(mlp:forward{{x,y},{x,z}},-1)
+   print(o1,o2,o)
+end
+</file>
+
+
+====  CosineDistance  ====
+{{anchor:nn.CosineDistance}}
+
+''module'' = ''CosineDistance()'' creates a module that takes a table of two vectors as input and outputs the cosine distance between them.
+
+Example:
+<file lua>
+mlp=nn.CosineDistance()
+x=lab.new(1,2,3) 
+y=lab.new(4,5,6)
+print(mlp:forward({x,y}))
+</file>
+gives the output:
+<file lua>
+ 0.9746
+[torch.Tensor of dimension 1]
+</file>
+
+A more complicated example:
+<file lua>
+
+-- imagine we have one network we are interested in, it is called "p1_mlp"
+p1_mlp= nn.Sequential(); p1_mlp:add(nn.Linear(5,2))
+
+-- But we want to push examples towards or away from each other
+-- so we make another copy of it called p2_mlp
+-- this *shares* the same weights via the set command, but has its own set of temporary gradient storage
+-- that's why we create it again (so that the gradients of the pair don't wipe each other)
+p2_mlp= p1_mlp:clone('weight','bias')
+
+-- we make a parallel table that takes a pair of examples as input. they both go through the same (cloned) mlp
+prl = nn.ParallelTable()
+prl:add(p1_mlp)
+prl:add(p2_mlp)
+
+-- now we define our top level network that takes this parallel table and computes the cosine distance betweem
+-- the pair of outputs
+mlp= nn.Sequential()
+mlp:add(prl)
+mlp:add(nn.CosineDistance())
+
+
+-- lets make two example vectors
+x=lab.rand(5)
+y=lab.rand(5)
+
+-- Grad update function..
+function gradUpdate(mlp, x, y, learningRate)
+local pred = mlp:forward(x)
+if pred[1]*y < 1 then
+ gradCriterion=lab.new(-y)
+ mlp:zeroGradParameters()
+ mlp:backward(x, gradCriterion)
+ mlp:updateParameters(learningRate)
+end
+end
+
+-- push the pair x and y together, the distance should get larger..
+for i=1,1000 do
+ gradUpdate(mlp,{x,y},1,0.1)
+ if ((i%100)==0) then print(mlp:forward({x,y})[1]);end
+end
+
+
+-- pull apart the pair x and y, the distance should get smaller..
+
+for i=1,1000 do
+ gradUpdate(mlp,{x,y},-1,0.1)
+ if ((i%100)==0) then print(mlp:forward({x,y})[1]);end
+end
+</file>
+
+
+
+====  CriterionTable  ====
+{{anchor:nn.CriterionTable}}
+
+''module'' = ''CriterionTable(criterion)''
+
+Creates a module that wraps a Criterion module so that it can accept a Table of inputs. Typically the table would contain two elements: the input and output ''x'' and ''y'' that the Criterion compares.
+
+Example:
+<file lua>
+mlp = nn.CriterionTable(nn.MSECriterion())
+require "lab"
+x=lab.randn(5)
+y=lab.randn(5)
+print(mlp:forward{x,x})
+print(mlp:forward{x,y})
+</file>
+gives the output:
+<file lua>
+0
+1.9028918413199
+</file>
+
+Here is a more complex example of embedding the criterion into a network:
+<file lua>
+require "lab"
+
+function table.print(t)
+ for i,k in pairs(t) do print(i,k); end
+end
+ 
+mlp=nn.Sequential();                          -- Create an mlp that takes input
+  main_mlp=nn.Sequential();		      -- and output using ParallelTable      
+  main_mlp:add(nn.Linear(5,4)) 
+  main_mlp:add(nn.Linear(4,3))
+ cmlp=nn.ParallelTable(); 
+ cmlp:add(main_mlp)
+ cmlp:add(nn.Identity())           
+mlp:add(cmlp)
+mlp:add(nn.CriterionTable(nn.MSECriterion())) -- Apply the Criterion
+
+for i=1,20 do                                 -- Train for a few iterations
+ x=lab.ones(5);
+ y=torch.Tensor(3); y:copy(x:narrow(1,1,3))
+ err=mlp:forward{x,y}                         -- Pass in both input and output
+ print(err)
+
+ mlp:zeroGradParameters();
+ mlp:backward({x, y} );   
+ mlp:updateParameters(0.05); 
+end
+</file>
+
+==== CAddTable ====
+{{anchor:nn.CAddTable}}
+
+Takes a table of tensors and outputs summation of all tensors.
+
+<file lua>
+ii = {lab.ones(5),lab.ones(5)*2,lab.ones(5)*3}
+=ii[1]
+ 1
+ 1
+ 1
+ 1
+ 1
+[torch.DoubleTensor of dimension 5]
+
+return ii[2]
+ 2
+ 2
+ 2
+ 2
+ 2
+[torch.DoubleTensor of dimension 5]
+
+return ii[3]
+ 3
+ 3
+ 3
+ 3
+ 3
+[torch.DoubleTensor of dimension 5]
+
+m=nn.CAddTable()
+=m:forward(ii)
+ 6
+ 6
+ 6
+ 6
+ 6
+[torch.DoubleTensor of dimension 5]
+
+
+==== CSubTable ====
+{{anchor:nn.CSubTable}}
+
+Takes a table with two tensor and returns the component-wise
+subtraction between them.
+
+<file lua>
+m=nn.CSubTable()
+=m:forward({lab.ones(5)*2.2,lab.ones(5)})
+ 1.2000
+ 1.2000
+ 1.2000
+ 1.2000
+ 1.2000
+[torch.DoubleTensor of dimension 5]
+</file>
+
+==== CMulTable ====
+{{anchor:nn.CMulTable}}
+
+Takes a table of tensors and outputs the multiplication of all of them.
+
+<file lua>
+ii = {lab.ones(5)*2,lab.ones(5)*3,lab.ones(5)*4}
+m=nn.CMulTable()
+=m:forward(ii)
+ 24
+ 24
+ 24
+ 24
+ 24
+[torch.DoubleTensor of dimension 5]
+
+</file>
+
+==== CDivTable ====
+{{anchor:nn.CDivTable}}
+
+Takes a table with two tensor and returns the component-wise
+division between them.
+
+<file lua>
+m=nn.CDivTable()
+=m:forward({lab.ones(5)*2.2,lab.ones(5)*4.4})
+ 0.5000
+ 0.5000
+ 0.5000
+ 0.5000
+ 0.5000
+[torch.DoubleTensor of dimension 5]
+</file>
+
+======  Criterions ======
+{{anchor:nn.Criterions}}
+
+Criterions are helpful to train a neural network. Given an input and a
+target, they compute a gradient according to a given loss
+function. [[#nn.AbsCriterion|AbsCriterion]] and
+[[#nn.MSECriterion|MSECriterion]] are perfect for regression problems, while
+[[#nn.ClassNLLCriterion|ClassNLLCriterion]] is the criterion of choice when
+dealing with classification.
+
+Criterions are [[..:torch:file#torch.file.serialization|serializable]].
+
+=====  Criterion =====
+{{anchor:nn.Criterion}}
+
+This is an abstract class which declares methods defined in all criterions.
+This class is [[..:torch:file#torch.file.serialization|serializable]].
+
+====  [output] forward(input, target) ====
+{{anchor:nn.Criterion.forward}}
+
+Given an ''input'' and a ''target'', compute the loss function associated to the criterion and return the
+result. In general ''input'' and ''target'' are [[..:torch:tensor|tensors]], but some specific criterions
+might require some other type of object.
+
+The ''output'' returned should be a scalar in general.
+
+The state variable [[#nn.Criterion.output|self.output]] should be updated after a call to ''forward()''.
+
+====  [gradInput] backward(input, target) ====
+{{anchor:nn.Criterion.backward}}
+
+Given an ''input'' and a ''target'', compute the gradients of the loss function associated to the criterion and
+return the result.In general ''input'', ''target'' and ''gradInput'' are [[..:torch:tensor|tensors]], but some specific criterions
+might require some other type of object.
+
+The state variable [[#nn.Criterion.gradInput|self.gradInput]] should be updated after a call to ''backward()''.
+
+====  State variable: output ====
+{{anchor:nn.Criterion.output}}
+
+State variable which contains the result of the last [[#nn.Criterion.forward|forward(input, target)]] call.
+
+====  State variable: gradInput ====
+{{anchor:nn.Criterion.gradInput}}
+
+State variable which contains the result of the last [[#nn.Criterion.backward|backward(input, target)]] call.
+
+=====  AbsCriterion =====
+{{anchor:nn.AbsCriterion}}
+
+<file lua>
+criterion = AbsCriterion()
+</file>
+
+Creates a criterion that
+measures the mean absolute value between ''n'' elements in the input ''x'' 
+and output ''y'':
+
+''loss(x,y)''  = ''1/n \sum |x_i-y_i|''.
+
+If ''x'' and ''y'' are ''d''-dimensional Tensors with a total of ''n'' elements,
+the sum operation still operates over all the elements, and divides by ''n''.
+
+The division by ''n'' can be avoided if one sets the internal variable ''sizeAverage'' to ''false'':
+<file lua>
+criterion = nn.AbsCriterion()
+criterion.sizeAverage = false
+</file>
+
+=====  ClassNLLCriterion =====
+{{anchor:nn.ClassNLLCriterion}}
+
+<file lua>
+criterion = ClassNLLCriterion()
+</file>
+
+The negative log likelihood criterion. It is useful to train a classication
+problem with ''n'' classes. The ''input'' given through a ''forward()'' is
+expected to contain //log-probabilities// of each class: ''input'' has to be a
+1D tensor of size ''n''. Obtaining log-probabilities in a neural network is
+easily achieved by adding a [[#nn.LogSoftMax|LogSoftMax]] layer in the last
+layer of your neural network.
+
+This criterion expect a class index (1 to the number of class) as ''target''
+when calling [[#nn.CriterionForward|forward(input, target)]] and
+[[#nn.CriterionBackward|backward(input, target)]].
+
+The loss can be described as:
+<file lua>
+loss(x, class) = forward(x, class) = -x[class]
+</file>
+
+The following is a code fragment showing how to make a gradient step 
+given an input ''x'', a desired output ''y'' (an integer ''1'' to ''n'', 
+in this case ''n'' = ''2'' classes), 
+a network ''mlp'' and a learning rate ''learningRate'':
+<file lua>
+function gradUpdate(mlp,x,y,learningRate)
+  local criterion = nn.ClassNLLCriterion()
+  pred = mlp:forward(x)
+  local err = criterion:forward(pred, y); 
+  mlp:zeroGradParameters();
+  local t = criterion:backward(pred, y);
+  mlp:backward(x, t);
+  mlp:updateParameters(learningRate);
+end
+</file>
+
+=====  MarginCriterion =====
+{{anchor:nn.MarginCriterion}}
+
+<file lua>
+criterion = MarginCriterion()
+</file>
+
+Creates a criterion that optimizes a two-class classification hinge loss (margin-based loss) between input ''x''  (a Tensor of dimension 1) and output ''y'' (which is a scalar, either 1 or -1) :
+
+<file lua>
+loss(x,y) = forward(x,y) = max(0,m- y x).
+</file>
+
+''m'' is the margin, which is by default 1.
+
+<file lua>
+criterion = MarginCriterion(marginValue)
+</file>
+
+sets a different value of ''m''.
+
+
+Example:
+<file lua>
+require "nn"
+require "lab"
+
+function gradUpdate(mlp, x, y, criterion, learningRate)
+  local pred = mlp:forward(x)
+  local err = criterion:forward(pred, y)
+  local gradCriterion = criterion:backward(pred, y)
+  mlp:zeroGradParameters()
+  mlp:backward(x, gradCriterion)
+  mlp:updateParameters(learningRate)
+end
+
+mlp=nn.Sequential()
+mlp:add(nn.Linear(5,1))
+
+x1=lab.rand(5)
+x2=lab.rand(5)
+criterion=nn.MarginCriterion(1)
+
+for i=1,1000 do
+    gradUpdate(mlp,x1,1,criterion,0.01)
+    gradUpdate(mlp,x2,-1,criterion,0.01)
+end
+
+print(mlp:forward(x1))
+print(mlp:forward(x2))
+
+print(criterion:forward(mlp:forward(x1),1))
+print(criterion:forward(mlp:forward(x2),-1))
+</file>
+gives the output:
+<file lua>
+ 1.0043
+[torch.Tensor of dimension 1]
+
+
+-1.0061
+[torch.Tensor of dimension 1]
+
+0
+0
+</file>
+i.e. the mlp successfully separates the two data points such that they both have a margin of 1, and hence a loss of 0.
+
+=====  MSECriterion =====
+{{anchor:nn.MSECriterion}}
+
+<file lua>
+criterion = MSECriterion()
+</file>
+
+Creates a criterion that measures the mean squared error between ''n'' elements in the input ''x'' 
+and output ''y'':
+
+<file lua>
+loss(x,y) = forward(x,y) = 1/n \sum |x_i-y_i|^2 .
+</file>
+
+If ''x'' and ''y'' are ''d''-dimensional Tensors with a total of ''n'' elements,
+the sum operation still operates over all the elements, and divides by ''n''. The two tensors must
+have the same number of elements (but their sizes might be different...)
+
+The division by ''n'' can be avoided if one sets the internal variable ''sizeAverage'' to ''false'':
+<file lua>
+criterion = nn.MSECriterion()
+criterion.sizeAverage = false
+</file>
+
+=====  MultiCriterion =====
+{{anchor:nn.MultiCriterion}}
+
+<file lua>
+criterion = MultiCriterion()
+</file>
+
+This returns a Criterion which is a weighted sum of other Criterion. 
+Criterions are added using the method:
+
+''criterion:add(singleCriterion, weight)''
+
+where ''weight'' is a scalar.
+
+
+=====  HingeEmbeddingCriterion =====
+{{anchor:nn.HingeEmbeddingCriterion}}
+
+<file lua>
+criterion = HingeEmbeddingCriterion()
+</file>
+
+Creates a criterion that measures the loss given  an input
+''x'' which is a 1-dimensional vector and a label ''y'' (1 or -1).
+This is usually used for measuring whether two inputs are similar
+or dissimilar, e.g. using the L1 pairwise distance, 
+and is typically used for
+learning nonlinear embeddings or semi-supervised learning.
+
+<verbatim> 
+loss(x,y) = forward(x,y) = x, if y=1
+= max(0,margin - x), if y=-1
+</verbatim>
+
+The ''margin'' has a default value of 1, or can be set in the constructor:
+<file lua>
+criterion = HingeEmbeddingCriterion(marginValue)
+</file>
+
+Example use:
+<file lua>
+-- imagine we have one network we are interested in, it is called "p1_mlp"
+p1_mlp= nn.Sequential(); p1_mlp:add(nn.Linear(5,2))
+
+-- But we want to push examples towards or away from each other
+-- so we make another copy of it called p2_mlp
+-- this *shares* the same weights via the set command, but has its own set of temporary gradient storage
+-- that's why we create it again (so that the gradients of the pair don't wipe each other)
+p2_mlp= nn.Sequential(); p2_mlp:add(nn.Linear(5,2))
+p2_mlp:get(1).weight:set(p1_mlp:get(1).weight)
+p2_mlp:get(1).bias:set(p1_mlp:get(1).bias)
+
+-- we make a parallel table that takes a pair of examples as input. they both go through the same (cloned) mlp
+prl = nn.ParallelTable()
+prl:add(p1_mlp)
+prl:add(p2_mlp)
+
+-- now we define our top level network that takes this parallel table and computes the pairwise distance betweem
+-- the pair of outputs
+mlp= nn.Sequential()
+mlp:add(prl)
+mlp:add(nn.PairwiseDistance(1))
+
+-- and a criterion for pushing together or pulling apart pairs
+crit=nn.HingeEmbeddingCriterion(1)
+
+-- lets make two example vectors
+x=lab.rand(5)
+y=lab.rand(5)
+
+
+-- Use a typical generic gradient update function
+function gradUpdate(mlp, x, y, criterion, learningRate)
+local pred = mlp:forward(x)
+local err = criterion:forward(pred, y)
+local gradCriterion = criterion:backward(pred, y)
+mlp:zeroGradParameters()
+mlp:backward(x, gradCriterion)
+mlp:updateParameters(learningRate)
+end
+
+-- push the pair x and y together, notice how then the distance between them given
+-- by  print(mlp:forward({x,y})[1]) gets smaller
+for i=1,10 do
+gradUpdate(mlp,{x,y},1,crit,0.01)
+print(mlp:forward({x,y})[1])
+end
+
+
+-- pull apart the pair x and y, notice how then the distance between them given
+-- by  print(mlp:forward({x,y})[1]) gets larger
+
+for i=1,10 do
+gradUpdate(mlp,{x,y},-1,crit,0.01)
+print(mlp:forward({x,y})[1])
+end
+
+</file>
+
+=====  L1HingeEmbeddingCriterion =====
+{{anchor:nn.L1HingeEmbeddingCriterion}}
+
+<file lua>
+criterion = L1HingeEmbeddingCriterion(margin)
+</file>
+
+Creates a criterion that measures the loss given  an input
+''x'' = ''{x1,x2}'', a table of two tensors, and a label ''y'' (1 or -1):
+This is used for measuring whether two inputs are similar
+or dissimilar, using the L1 distance, and is typically used for
+learning nonlinear embeddings or semi-supervised learning.
+
+<verbatim> 
+loss(x,y) = forward(x,y) = ||x1-x2||_1, if y=1
+= max(0,margin - ||x1-x2||_1), if y=-1
+</verbatim>
+
+The ''margin'' has a default value of 1, or can be set in the constructor:
+<file lua>
+criterion = L1HingeEmbeddingCriterion(marginValue)
+</file>
+
+=====  CosineEmbeddingCriterion =====
+{{anchor:nn.CosineEmbeddingCriterion}}
+
+<file lua>
+criterion = nn.CosineEmbeddingCriterion(margin)
+</file>
+
+Creates a criterion that measures the loss given  an input
+''x'' = ''{x1,x2}'', a table of two tensors, and a label ''y'' (1 or -1):
+This is used for measuring whether two inputs are similar
+or dissimilar, using the cosine distance, and is typically used for
+learning nonlinear embeddings or semi-supervised learning.
+
+''margin'' should be a number from -1 to 1, 0 to 0.5 is suggested.
+Forward and Backward have to be used alternately. If ''margin'' is missing, the default value is 0.
+
+The loss function is:
+<verbatim> 
+loss(x,y) = forward(x,y) = 1-cos(x1, x2), if y=1
+= max(0,cos(x1, x2)-margin), if y=-1
+</verbatim>
+
+=====  MarginRankingCriterion =====
+{{anchor:nn.MarginRankingCriterion}}
+
+<file lua>
+criterion = nn.MarginRankingCriterion(margin)
+</file>
+
+Creates a criterion that measures the loss given  an input
+''x'' = ''{x1,x2}'', a table of two Tensors of size 1 (they contain only scalars),
+and a label ''y'' (1 or -1):
+
+If ''y'' = ''1'' then it assumed the first input should be ranked higher (have a larger value) 
+than the second input, and vice-versa for ''y'' = ''-1''.
+
+The loss function is:
+<verbatim> 
+loss(x,y) = forward(x,y) = max(0,-y*(x[1]-x[2])+margin)
+</verbatim>
+
+Example:
+<file lua>
+
+p1_mlp= nn.Linear(5,2)
+p2_mlp= p1_mlp:clone('weight','bias')
+
+prl=nn.ParallelTable()
+prl:add(p1_mlp)
+prl:add(p2_mlp)
+  
+mlp1=nn.Sequential()
+mlp1:add(prl)
+mlp1:add(nn.DotProduct())
+ 
+mlp2=mlp1:clone('weight','bias')
+
+mlpa=nn.Sequential()
+prla=nn.ParallelTable()
+prla:add(mlp1)
+prla:add(mlp2)
+mlpa:add(prla)
+
+crit=nn.MarginRankingCriterion(0.1)
+
+x=lab.randn(5)
+y=lab.randn(5)
+z=lab.randn(5)
+
+
+-- Use a typical generic gradient update function
+function gradUpdate(mlp, x, y, criterion, learningRate)
+ local pred = mlp:forward(x)
+ local err = criterion:forward(pred, y)
+ local gradCriterion = criterion:backward(pred, y)
+ mlp:zeroGradParameters()
+ mlp:backward(x, gradCriterion)
+ mlp:updateParameters(learningRate)
+end
+
+for i=1,100 do
+ gradUpdate(mlpa,{{x,y},{x,z}},1,crit,0.01)
+ if true then 
+      o1=mlp1:forward{x,y}[1]; 
+      o2=mlp2:forward{x,z}[1]; 
+      o=crit:forward(mlpa:forward{{x,y},{x,z}},1)
+      print(o1,o2,o)
+  end
+end
+
+print "--"
+
+for i=1,100 do
+ gradUpdate(mlpa,{{x,y},{x,z}},-1,crit,0.01)
+ if true then 
+      o1=mlp1:forward{x,y}[1]; 
+      o2=mlp2:forward{x,z}[1]; 
+      o=crit:forward(mlpa:forward{{x,y},{x,z}},-1)
+      print(o1,o2,o)
+  end
+end
+</file>
+
+======  Training a neural network ======
+{{anchor:nn.traningneuralnet.dok}}
+
+Training a neural network is easy with a [[#nn.DoItYourself|simple ''for'' loop]].
+While doing your own loop provides great flexibility, you might
+want sometimes a quick way of training neural
+networks. [[#nn.StochasticGradient|StochasticGradient]], a simple class
+which does the job for you is provided as standard.
+
+=====  StochasticGradient =====
+{{anchor:nn.StochasticGradient.dok}}
+
+''StochasticGradient'' is a high-level class for training [[#nn.Module|neural networks]], using a stochastic gradient
+algorithm. This class is [[..:torch:file#torch.file.serialization|serializable]].
+
+====  StochasticGradient(module, criterion) ====
+{{anchor:nn.StochasticGradient}}
+
+Create a ''StochasticGradient'' class, using the given [[#nn.Module|Module]] and [[#nn.Criterion|Criterion]].
+The class contains [[#nn.StochasticGradientParameters|several parameters]] you might want to set after initialization.
+
+====  train(dataset) ====
+{{anchor:nn.StochasticGradientTrain}}
+
+Train the module and criterion given in the
+[[#nn.StochasticGradient|constructor]] over ''dataset'', using the
+internal [[#nn.StochasticGradientParameters|parameters]].
+
+StochasticGradient expect as a ''dataset'' an object which implements the operator
+''dataset[index]'' and implements the method ''dataset:size()''. The ''size()'' methods
+returns the number of examples and ''dataset[i]'' has to return the i-th example.
+
+An ''example'' has to be an object which implements the operator
+''example[field]'', where ''field'' might take the value ''1'' (input features)
+or ''2'' (corresponding label which will be given to the criterion). 
+The input is usually a Tensor (except if you use special kind of gradient modules,
+like [[#nn.TableLayers|table layers]]). The label type depends of the criterion.
+For example, the [[#nn.MSECriterion|MSECriterion]] expects a Tensor, but the
+[[#nn.ClassNLLCriterion|ClassNLLCriterion]] except a integer number (the class).
+
+Such a dataset is easily constructed by using Lua tables, but it could any ''C'' object
+for example, as long as required operators/methods are implemented. 
+[[#nn.DoItStochasticGradient|See an example]].
+
+====  Parameters ====
+{{anchor:nn.StochasticGradientParameters}}
+
+''StochasticGradient'' has several field which have an impact on a call to [[#nn.StochasticGradientTrain|train()]].
+
+  * ''learningRate'': This is the learning rate used during training. The update of the parameters will be ''parameters = parameters - learningRate * parameters_gradient''. Default value is ''0.01''.
+  * ''learningRateDecay'': The learning rate decay. If non-zero, the learning rate (note: the field learningRate will not change value) will be computed after each iteration (pass over the dataset) with: ''current_learning_rate =learningRate / (1 + iteration * learningRateDecay)''
+  * ''maxIteration'': The maximum number of iteration (passes over the dataset). Default is ''25''.
+  * ''shuffleIndices'': Boolean which says if the examples will be randomly sampled or not. Default is ''true''. If ''false'', the examples will be taken in the order of the dataset.
+  * ''hookExample'': A possible hook function which will be called (if non-nil) during training after each example forwarded and backwarded through the network. The function takes ''(self, example)'' as parameters. Default is ''nil''.
+  * ''hookIteration'': A possible hook function which will be called (if non-nil) during training after a complete pass over the dataset. The function takes ''(self, iteration)'' as parameters. Default is ''nil''.
+
+=====  Example of training using StochasticGradient =====
+{{anchor:nn.DoItStochasticGradient}}
+
+We show an example here on a classical XOR problem.
+
+**Dataset**
+
+We first need to create a dataset, following the conventions described in
+[[#nn.StochasticGradientTrain|StochasticGradient]].
+<file lua>
+require "lab"
+dataset={};
+function dataset:size() return 100 end -- 100 examples
+for i=1,dataset:size() do 
+  local input = lab.randn(2);     -- normally distributed example in 2d
+  local output = torch.Tensor(1);
+  if input[1]*input[2]>0 then     -- calculate label for XOR function
+    output[1] = -1;
+  else
+    output[1] = 1
+  end
+  dataset[i] = {input, output}
+end
+</file>
+
+**Neural Network**
+
+We create a simple neural network with one hidden layer.
+<file lua>
+require "nn"
+mlp = nn.Sequential();  -- make a multi-layer perceptron
+inputs = 2; outputs = 1; HUs = 20; -- parameters
+mlp:add(nn.Linear(inputs, HUs))
+mlp:add(nn.Tanh())
+mlp:add(nn.Linear(HUs, outputs))
+</file>
+
+**Training**
+
+We choose the Mean Squared Error criterion and train the beast.
+<file lua>
+criterion = nn.MSECriterion()  
+trainer = nn.StochasticGradient(mlp, criterion)
+trainer.learningRate = 0.01
+trainer:train(dataset)
+</file>
+
+**Test the network**
+
+<file lua>
+x = torch.Tensor(2)
+x[1] =  0.5; x[2] =  0.5; print(mlp:forward(x))
+x[1] =  0.5; x[2] = -0.5; print(mlp:forward(x))
+x[1] = -0.5; x[2] =  0.5; print(mlp:forward(x))
+x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x))
+</file>
+
+You should see something like:
+<file lua>
+> x = torch.Tensor(2)
+> x[1] =  0.5; x[2] =  0.5; print(mlp:forward(x))
+
+-0.3490
+[torch.Tensor of dimension 1]
+
+> x[1] =  0.5; x[2] = -0.5; print(mlp:forward(x))
+
+ 1.0561
+[torch.Tensor of dimension 1]
+
+> x[1] = -0.5; x[2] =  0.5; print(mlp:forward(x))
+
+ 0.8640
+[torch.Tensor of dimension 1]
+
+> x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x))
+
+-0.2941
+[torch.Tensor of dimension 1]
+</file>
+
+=====  Example of manual training of a neural network =====
+{{anchor:nn.DoItYourself}}
+
+We show an example here on a classical XOR problem.
+
+**Neural Network**
+
+We create a simple neural network with one hidden layer.
+<file lua>
+require "nn"
+mlp = nn.Sequential();  -- make a multi-layer perceptron
+inputs = 2; outputs = 1; HUs = 20; -- parameters
+mlp:add(nn.Linear(inputs, HUs))
+mlp:add(nn.Tanh())
+mlp:add(nn.Linear(HUs, outputs))
+</file>
+
+**Loss function**
+
+We choose the Mean Squared Error criterion.
+<file lua>
+criterion = nn.MSECriterion()  
+</file>
+
+**Training**
+
+We create data //on the fly// and feed it to the neural network.
+
+<file lua>
+require "lab"
+for i = 1,2500 do
+  -- random sample
+  local input= lab.randn(2);     -- normally distributed example in 2d
+  local output= torch.Tensor(1);
+  if input[1]*input[2] > 0 then  -- calculate label for XOR function
+    output[1] = -1
+  else
+    output[1] = 1
+  end
+
+  -- feed it to the neural network and the criterion
+  criterion:forward(mlp:forward(input), output)
+
+  -- train over this example in 3 steps
+  -- (1) zero the accumulation of the gradients
+  mlp:zeroGradParameters()
+  -- (2) accumulate gradients
+  mlp:backward(input, criterion:backward(mlp.output, output))
+  -- (3) update parameters with a 0.01 learning rate
+  mlp:updateParameters(0.01)
+end
+</file>
+
+**Test the network**
+
+<file lua>
+x = torch.Tensor(2)
+x[1] =  0.5; x[2] =  0.5; print(mlp:forward(x))
+x[1] =  0.5; x[2] = -0.5; print(mlp:forward(x))
+x[1] = -0.5; x[2] =  0.5; print(mlp:forward(x))
+x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x))
+</file>
+
+You should see something like:
+<file lua>
+> x = torch.Tensor(2)
+> x[1] =  0.5; x[2] =  0.5; print(mlp:forward(x))
+
+-0.6140
+[torch.Tensor of dimension 1]
+
+> x[1] =  0.5; x[2] = -0.5; print(mlp:forward(x))
+
+ 0.8878
+[torch.Tensor of dimension 1]
+
+> x[1] = -0.5; x[2] =  0.5; print(mlp:forward(x))
+
+ 0.8548
+[torch.Tensor of dimension 1]
+
+> x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x))
+
+-0.5498
+[torch.Tensor of dimension 1]
+</file>
diff --git a/dok/lena.jpg b/dok/lena.jpg
new file mode 100644
index 0000000..d4a8c36
--- /dev/null
+++ b/dok/lena.jpg
diff --git a/dok/lenap.jpg b/dok/lenap.jpg
new file mode 100644
index 0000000..0e6916d
--- /dev/null
+++ b/dok/lenap.jpg
diff --git a/dok/logsigmoid.png b/dok/logsigmoid.png
new file mode 100644
index 0000000..f632ed8
--- /dev/null
+++ b/dok/logsigmoid.png
diff --git a/dok/logsoftmax.png b/dok/logsoftmax.png
new file mode 100644
index 0000000..dec5be5
--- /dev/null
+++ b/dok/logsoftmax.png
diff --git a/dok/power.png b/dok/power.png
new file mode 100644
index 0000000..958eeb4
--- /dev/null
+++ b/dok/power.png
diff --git a/dok/sigmmoid.png b/dok/sigmmoid.png
new file mode 100644
index 0000000..48aad7e
--- /dev/null
+++ b/dok/sigmmoid.png
diff --git a/dok/sigmoid.png b/dok/sigmoid.png
new file mode 100644
index 0000000..48aad7e
--- /dev/null
+++ b/dok/sigmoid.png
diff --git a/dok/softmax.png b/dok/softmax.png
new file mode 100644
index 0000000..29c5534
--- /dev/null
+++ b/dok/softmax.png
diff --git a/dok/softmin.png b/dok/softmin.png
new file mode 100644
index 0000000..d1807a4
--- /dev/null
+++ b/dok/softmin.png
diff --git a/dok/softplus.png b/dok/softplus.png
new file mode 100644
index 0000000..a5ee028
--- /dev/null
+++ b/dok/softplus.png
diff --git a/dok/softsign.png b/dok/softsign.png
new file mode 100644
index 0000000..0805433
--- /dev/null
+++ b/dok/softsign.png
diff --git a/dok/sqrt.png b/dok/sqrt.png
new file mode 100644
index 0000000..29b1d42
--- /dev/null
+++ b/dok/sqrt.png
diff --git a/dok/square.png b/dok/square.png
new file mode 100644
index 0000000..c191eaf
--- /dev/null
+++ b/dok/square.png
diff --git a/dok/sshrink.png b/dok/sshrink.png
new file mode 100644
index 0000000..99c5d11
--- /dev/null
+++ b/dok/sshrink.png
diff --git a/dok/tanh.png b/dok/tanh.png
new file mode 100644
index 0000000..d2f77aa
--- /dev/null
+++ b/dok/tanh.png
diff --git a/generic/Abs.c b/generic/Abs.c
new file mode 100644
index 0000000..8c65813
--- /dev/null
+++ b/generic/Abs.c
@@ -0,0 +1,43 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Abs.c"
+#else
+
+static int nn_(Abs_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input, \
+                   *output_data = fabs(*input_data);)
+  return 1;
+}
+
+static int nn_(Abs_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, input);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \
+                   real z = *input_data;                              \
+                   *gradInput_data = *gradOutput_data * (z >= 0 ? 1 : -1);)
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Abs__) [] = {
+  {"Abs_updateOutput", nn_(Abs_updateOutput)},
+  {"Abs_updateGradInput", nn_(Abs_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Abs_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Abs__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/AbsCriterion.c b/generic/AbsCriterion.c
new file mode 100644
index 0000000..b9b948d
--- /dev/null
+++ b/generic/AbsCriterion.c
@@ -0,0 +1,54 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/AbsCriterion.c"
+#else
+
+static int nn_(AbsCriterion_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  real sum;
+
+  sum = 0;
+  TH_TENSOR_APPLY2(real, input, real, target,
+                   sum += fabs(*input_data - *target_data);)
+
+  if(sizeAverage)
+    sum /= THTensor_(nElement)(input);
+
+  lua_pushnumber(L, sum);
+  lua_setfield(L, 1, "output");
+
+  lua_pushnumber(L, sum);
+  return 1;
+}
+
+static int nn_(AbsCriterion_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id));
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
+
+  THTensor_(resizeAs)(gradInput, input);
+  TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
+                   *gradInput_data = ( (*input_data - *target_data) >= 0 ? norm : -norm);)
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(AbsCriterion__) [] = {
+  {"AbsCriterion_updateOutput", nn_(AbsCriterion_updateOutput)},
+  {"AbsCriterion_updateGradInput", nn_(AbsCriterion_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(AbsCriterion_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(AbsCriterion__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Exp.c b/generic/Exp.c
new file mode 100644
index 0000000..b56f379
--- /dev/null
+++ b/generic/Exp.c
@@ -0,0 +1,43 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Exp.c"
+#else
+
+static int nn_(Exp_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input,         \
+                   *output_data = exp(*input_data);)
+    
+  return 1;
+}
+
+static int nn_(Exp_updateGradInput)(lua_State *L)
+{
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, output);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,     \
+                   *gradInput_data = *gradOutput_data * *output_data;);
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Exp__) [] = {
+  {"Exp_updateOutput", nn_(Exp_updateOutput)},
+  {"Exp_updateGradInput", nn_(Exp_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Exp_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Exp__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/HardShrink.c b/generic/HardShrink.c
new file mode 100644
index 0000000..be98ddc
--- /dev/null
+++ b/generic/HardShrink.c
@@ -0,0 +1,50 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/HardShrink.c"
+#else
+
+static int nn_(HardShrink_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  real lambda = luaT_getfieldchecknumber(L, 1, "lambda");
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+  
+  TH_TENSOR_APPLY2(real, output, real, input,				\
+                   if ((*input_data) > lambda) *output_data = *input_data; \
+                   else if ((*input_data) < -lambda) *output_data = *input_data; \
+                   else *output_data = 0;);
+  return 1;
+}
+
+static int nn_(HardShrink_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  real lambda = luaT_getfieldchecknumber(L, 1, "lambda");
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, input);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,	\
+                   if ((*input_data) > lambda || (*input_data) < -lambda) \
+		     *gradInput_data = (*gradOutput_data);		\
+		   else							\
+		     *gradInput_data = 0;				\
+    );
+  return 1;
+}
+
+static const struct luaL_Reg nn_(HardShrink__) [] = {
+  {"HardShrink_updateOutput", nn_(HardShrink_updateOutput)},
+  {"HardShrink_updateGradInput", nn_(HardShrink_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(HardShrink_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(HardShrink__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/HardTanh.c b/generic/HardTanh.c
new file mode 100644
index 0000000..3764095
--- /dev/null
+++ b/generic/HardTanh.c
@@ -0,0 +1,50 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/HardTanh.c"
+#else
+
+static int nn_(HardTanh_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input, \
+                   if(*input_data < -1)          \
+                     *output_data = -1;          \
+                   else if(*input_data <= 1)     \
+                     *output_data = *input_data;    \
+                   else                       \
+                     *output_data = 1;)
+  return 1;
+}
+
+static int nn_(HardTanh_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, input);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \
+                   if(*input_data < -1 || *input_data > 1)               \
+                     *gradInput_data = 0;                             \
+                   else                                            \
+                     *gradInput_data = *gradOutput_data;);
+  return 1;
+}
+
+static const struct luaL_Reg nn_(HardTanh__) [] = {
+  {"HardTanh_updateOutput", nn_(HardTanh_updateOutput)},
+  {"HardTanh_updateGradInput", nn_(HardTanh_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(HardTanh_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(HardTanh__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/LogSigmoid.c b/generic/LogSigmoid.c
new file mode 100644
index 0000000..b5bdae4
--- /dev/null
+++ b/generic/LogSigmoid.c
@@ -0,0 +1,49 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/LogSigmoid.c"
+#else
+
+static int nn_(LogSigmoid_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *buffer = luaT_getfieldcheckudata(L, 1, "buffer", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+  THTensor_(resizeAs)(buffer, input);
+
+  TH_TENSOR_APPLY3(real, output, real, input, real, buffer,    \
+                   real z = exp(-*input_data);                 \
+                   *buffer_data = z;                           \
+                   *output_data = -log(1. + z);)
+
+  return 1;
+}
+
+static int nn_(LogSigmoid_updateGradInput)(lua_State *L)
+{
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *buffer = luaT_getfieldcheckudata(L, 1, "buffer", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, buffer);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, buffer,    \
+                   real z = *buffer_data;                              \
+                   *gradInput_data = *gradOutput_data * z / (1. + z);)
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(LogSigmoid__) [] = {
+  {"LogSigmoid_updateOutput", nn_(LogSigmoid_updateOutput)},
+  {"LogSigmoid_updateGradInput", nn_(LogSigmoid_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(LogSigmoid_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(LogSigmoid__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/LogSoftMax.c b/generic/LogSoftMax.c
new file mode 100644
index 0000000..5d4dbfc
--- /dev/null
+++ b/generic/LogSoftMax.c
@@ -0,0 +1,111 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/LogSoftMax.c"
+#else
+
+static int nn_(LogSoftMax_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  real *input_data, *output_data;
+  long nframe = 0, dim = 0;
+  long t, d;
+
+  if(input->nDimension == 1)
+  {
+    nframe = 1;
+    dim = input->size[0];
+  }
+  else if(input->nDimension == 2)
+  {
+    nframe = input->size[0];
+    dim = input->size[1];
+  }
+  else
+    THArgCheck(0, 2, "vector or matrix expected");
+
+  input = THTensor_(newContiguous)(input);
+  THTensor_(resizeAs)(output, input);
+
+  input_data = THTensor_(data)(input);
+  output_data = THTensor_(data)(output);
+  for(t = 0; t < nframe; t++)
+  {
+    accreal logsum = 0;
+    real maxInput = -THInf;
+
+    for(d = 0; d < dim; d++)
+      maxInput = THMax(maxInput, input_data[d]);
+
+    for(d = 0; d < dim; d++)
+      logsum += THExpMinusApprox(maxInput-input_data[d]);
+    logsum = maxInput + log(logsum);
+
+    for(d = 0; d < dim; d++)
+      output_data[d] = input_data[d] - logsum;
+
+    input_data += dim;
+    output_data += dim;
+  }
+
+  THTensor_(free)(input);
+
+  return 1;
+}
+
+static int nn_(LogSoftMax_updateGradInput)(lua_State *L)
+{
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  real *gradInput_data, *gradOutput_data, *output_data;
+  long nframe = 0, dim = 0;
+  long t, d;
+
+  if(output->nDimension == 1)
+  {
+    nframe = 1;
+    dim = output->size[0];
+  }
+  else if(output->nDimension == 2)
+  {
+    nframe = output->size[0];
+    dim = output->size[1];
+  }
+  else
+    THError("vector or matrix expected");
+
+  THTensor_(resizeAs)(gradInput, output);
+  gradInput_data = THTensor_(data)(gradInput);
+  output_data = THTensor_(data)(output);
+  gradOutput_data = THTensor_(data)(gradOutput);
+  for(t = 0; t < nframe; t++)
+  {
+    accreal sum = 0;
+    for(d = 0; d < dim; d++)
+      sum += gradOutput_data[d];
+
+    for(d = 0; d < dim; d++)
+      gradInput_data[d] = gradOutput_data[d] - exp(output_data[d])*sum;
+
+    gradInput_data += dim;
+    output_data += dim;
+    gradOutput_data += dim;
+  }
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(LogSoftMax__) [] = {
+  {"LogSoftMax_updateOutput", nn_(LogSoftMax_updateOutput)},
+  {"LogSoftMax_updateGradInput", nn_(LogSoftMax_updateGradInput)},
+  {NULL, NULL}
+};
+
+void nn_(LogSoftMax_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(LogSoftMax__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/MSECriterion.c b/generic/MSECriterion.c
new file mode 100644
index 0000000..c53735c
--- /dev/null
+++ b/generic/MSECriterion.c
@@ -0,0 +1,54 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/MSECriterion.c"
+#else
+
+static int nn_(MSECriterion_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  real sum;
+
+  sum = 0;
+  TH_TENSOR_APPLY2(real, input, real, target,
+                   real z = (*input_data - *target_data);
+                   sum += z*z;)
+
+  if(sizeAverage)
+    sum /= THTensor_(nElement)(input);
+
+  lua_pushnumber(L, sum);
+  lua_setfield(L, 1, "output");
+
+  lua_pushnumber(L, sum);
+  return 1;
+}
+
+static int nn_(MSECriterion_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id));
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  real norm = (sizeAverage ? 2./((real)THTensor_(nElement)(input)) : 2.);
+
+  THTensor_(resizeAs)(gradInput, input);
+  TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
+                   *gradInput_data = norm * (*input_data - *target_data);)
+  return 1;
+}
+
+static const struct luaL_Reg nn_(MSECriterion__) [] = {
+  {"MSECriterion_updateOutput", nn_(MSECriterion_updateOutput)},
+  {"MSECriterion_updateGradInput", nn_(MSECriterion_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(MSECriterion_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(MSECriterion__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Max.c b/generic/Max.c
new file mode 100644
index 0000000..87f52f1
--- /dev/null
+++ b/generic/Max.c
@@ -0,0 +1,100 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Max.c"
+#else
+
+static int nn_(Max_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1;
+  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THLongStorage *dim;
+  long i;
+
+  luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range");
+
+  dim = THLongStorage_newWithSize(input->nDimension);
+  for(i = 0; i < input->nDimension; i++)
+    dim->data[i] = input->size[i];
+  dim->data[dimension] = 1;
+  THTensor_(resize)(output, dim, NULL);
+  THTensor_(resize)(indices, dim, NULL);
+  THLongStorage_free(dim);
+
+  TH_TENSOR_DIM_APPLY3(real, output, real, input, real, indices, dimension,
+                       long theIndex = 0;
+                       real theMax = input_data[0];
+                       for(i = 1; i < input_size; i++)
+                       {
+                         if(input_data[i*input_stride] > theMax)
+                         {
+                           theIndex = i;
+                           theMax = input_data[i*input_stride];
+                         }
+                       }
+                       *indices_data = theIndex+1;
+                       *output_data = theMax;)
+
+  THTensor_(select)(output, NULL, dimension, 0);
+
+  return 1;
+}
+
+static int nn_(Max_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+  int dimension  = luaT_getfieldcheckint(L, 1, "dimension")-1;
+  THTensor *gradInput  = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor *gradOutputPlusOneDim;
+  THLongStorage *dim, *str;
+  int i, j;
+
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+
+  dim = THLongStorage_newWithSize(gradOutput->nDimension+1);
+  str = THLongStorage_newWithSize(gradOutput->nDimension+1);
+  for(i = 0, j =  0; j < gradOutput->nDimension+1; j++)
+  {
+    if(j == dimension)
+    {
+      dim->data[j] = input->size[dimension];
+      str->data[j] = 0;
+      continue;
+    }
+
+    dim->data[j] = gradOutput->size[i];
+    str->data[j] = gradOutput->stride[i];
+    i++;
+  }
+
+  gradOutputPlusOneDim = THTensor_(newWithStorage)(gradOutput->storage, gradOutput->storageOffset, dim, str);
+  THLongStorage_free(dim);
+  THLongStorage_free(str);
+
+  TH_TENSOR_DIM_APPLY3(real, gradInput, real, gradOutputPlusOneDim, real, indices, dimension,
+                       gradInput_data[ ((long)(*indices_data)-1)*gradInput_stride ] = *gradOutputPlusOneDim_data;)
+
+  THTensor_(free)(gradOutputPlusOneDim);
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Max__) [] = {
+  {"Max_updateOutput", nn_(Max_updateOutput)},
+  {"Max_updateGradInput", nn_(Max_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Max_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Max__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Min.c b/generic/Min.c
new file mode 100644
index 0000000..d3309df
--- /dev/null
+++ b/generic/Min.c
@@ -0,0 +1,100 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Min.c"
+#else
+
+static int nn_(Min_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1;
+  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THLongStorage *dim;
+  long i;
+
+  luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range");
+
+  dim = THLongStorage_newWithSize(input->nDimension);
+  for(i = 0; i < input->nDimension; i++)
+    dim->data[i] = input->size[i];
+  dim->data[dimension] = 1;
+  THTensor_(resize)(output, dim, NULL);
+  THTensor_(resize)(indices, dim, NULL);
+  THLongStorage_free(dim);
+
+  TH_TENSOR_DIM_APPLY3(real, output, real, input, real, indices, dimension,
+                       long theIndex = 0;
+                       real theMin = input_data[0];
+                       for(i = 1; i < input_size; i++)
+                       {
+                         if(input_data[i*input_stride] < theMin)
+                         {
+                           theIndex = i;
+                           theMin = input_data[i*input_stride];
+                         }
+                       }
+                       *indices_data = theIndex+1;
+                       *output_data = theMin;)
+
+  THTensor_(select)(output, NULL, dimension, 0);
+
+  return 1;
+}
+
+static int nn_(Min_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+  int dimension  = luaT_getfieldcheckint(L, 1, "dimension")-1;
+  THTensor *gradInput  = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor *gradOutputPlusOneDim;
+  THLongStorage *dim, *str;
+  int i, j;
+
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+
+  dim = THLongStorage_newWithSize(gradOutput->nDimension+1);
+  str = THLongStorage_newWithSize(gradOutput->nDimension+1);
+  for(i = 0, j =  0; j < gradOutput->nDimension+1; j++)
+  {
+    if(j == dimension)
+    {
+      dim->data[j] = input->size[dimension];
+      str->data[j] = 0;
+      continue;
+    }
+
+    dim->data[j] = gradOutput->size[i];
+    str->data[j] = gradOutput->stride[i];
+    i++;
+  }
+
+  gradOutputPlusOneDim = THTensor_(newWithStorage)(gradOutput->storage, gradOutput->storageOffset, dim, str);
+  THLongStorage_free(dim);
+  THLongStorage_free(str);
+
+  TH_TENSOR_DIM_APPLY3(real, gradInput, real, gradOutputPlusOneDim, real, indices, dimension,
+                       gradInput_data[ ((long)(*indices_data)-1)*gradInput_stride ] = *gradOutputPlusOneDim_data;)
+
+  THTensor_(free)(gradOutputPlusOneDim);
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Min__) [] = {
+  {"Min_updateOutput", nn_(Min_updateOutput)},
+  {"Min_updateGradInput", nn_(Min_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Min_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Min__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/MultiLabelMarginCriterion.c b/generic/MultiLabelMarginCriterion.c
new file mode 100644
index 0000000..f4c3914
--- /dev/null
+++ b/generic/MultiLabelMarginCriterion.c
@@ -0,0 +1,185 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/MultiLabelMarginCriterion.c"
+#else
+
+static int nn_(MultiLabelMarginCriterion_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  real *input_data, *target_data;
+  long nframe, dim;
+  long t, d, dt, ddt;
+  THTensor *target;
+  real sum;
+
+  THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
+
+  if(input->nDimension == 1)
+  {
+    nframe = 1;
+    dim = input->size[0]; 
+    target = luaT_checkudata(L, 3, torch_(Tensor_id));
+    THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, "inconsistent target size");
+  }
+  else
+  {
+    nframe = input->size[0];
+    dim = input->size[1];
+    target = luaT_checkudata(L, 3, torch_(Tensor_id));
+    THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) && (target->size[1] == dim), 3, "inconsistent target size");
+  }
+
+  THArgCheck(THTensor_(minall)(target) >= 0, 3, "target out of range");
+  THArgCheck(THTensor_(maxall)(target) <= dim, 3, "target out of range");
+
+  target = THTensor_(newContiguous)(target);
+  input = THTensor_(newContiguous)(input);
+  input_data = THTensor_(data)(input);
+  target_data = THTensor_(data)(target);
+
+  sum = 0;
+  for(t = 0; t < nframe; t++)
+  {
+    for(dt = 0; dt < dim; dt++)
+    {
+      long target_idx = (long)target_data[dt]-1;
+      real input_target;
+      if(target_idx < 0)
+        break;
+      
+      input_target = input_data[target_idx];
+      for(d = 0; d < dim; d++)
+      {
+        int istarget = 0;
+        for(ddt = 0; ddt < dim; ddt++)
+        {
+          if(!target_data[ddt])
+            break;
+          if(((long)target_data[ddt])-1 == d)
+            istarget = 1;
+        }
+        
+        if(!istarget)
+        {
+          real z = 1 - input_target + input_data[d];
+          if(z > 0)
+            sum += z;
+        }
+      }
+    }
+    input_data += dim;
+    target_data += dim;
+  }
+
+  if(sizeAverage)
+    sum /= dim;
+
+  lua_pushnumber(L, sum);
+  lua_setfield(L, 1, "output");
+
+  THTensor_(free)(input);
+  THTensor_(free)(target);
+  lua_pushnumber(L, sum);
+  return 1;
+}
+
+static int nn_(MultiLabelMarginCriterion_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  real *input_data;
+  real *gradInput_data;
+  real *target_data;
+  long nframe, dim;
+  long t, d, dt, ddt;
+  THTensor *target;
+  real g;
+
+  THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
+
+  if(input->nDimension == 1)
+  {
+    nframe = 1;
+    dim = input->size[0]; 
+    target = luaT_checkudata(L, 3, torch_(Tensor_id));
+    THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, "inconsistent target size");
+  }
+  else
+  {
+    nframe = input->size[0];
+    dim = input->size[1];
+    target = luaT_checkudata(L, 3, torch_(Tensor_id));
+    THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) && (target->size[1] == dim), 3, "inconsistent target size");
+  }
+
+  THArgCheck(THTensor_(minall)(target) >= 0, 3, "target out of range");
+  THArgCheck(THTensor_(maxall)(target) <= dim, 3, "target out of range");
+
+  target = THTensor_(newContiguous)(target);
+  input = THTensor_(newContiguous)(input);
+  input_data = THTensor_(data)(input);
+  target_data = THTensor_(data)(target);
+
+  g = (sizeAverage ? 1./((real)dim) : 1.);
+
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+  gradInput_data = THTensor_(data)(gradInput);
+
+  for(t = 0; t < nframe; t++)
+  {
+    for(dt = 0; dt < dim; dt++)
+    {
+      long target_idx = (long)target_data[dt]-1;
+      real input_target;
+      if(target_idx < 0)
+        break;
+      
+      input_target = input_data[target_idx];
+      for(d = 0; d < dim; d++)
+      {
+        int istarget = 0;
+        for(ddt = 0; ddt < dim; ddt++)
+        {
+          if(!target_data[ddt])
+            break;
+          if(((long)target_data[ddt])-1 == d)
+            istarget = 1;
+        }
+        
+        if(!istarget)
+        {
+          real z = 1 - input_target + input_data[d];
+          if(z > 0)
+          {
+            gradInput_data[target_idx] -= g;
+            gradInput_data[d] += g;
+          }
+        }
+      }
+    }
+    input_data += dim;
+    target_data += dim;
+    gradInput_data += dim;
+  }
+
+  THTensor_(free)(input);  
+  THTensor_(free)(target);
+  return 1;
+}
+
+static const struct luaL_Reg nn_(MultiLabelMarginCriterion__) [] = {
+  {"MultiLabelMarginCriterion_updateOutput", nn_(MultiLabelMarginCriterion_updateOutput)},
+  {"MultiLabelMarginCriterion_updateGradInput", nn_(MultiLabelMarginCriterion_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(MultiLabelMarginCriterion_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(MultiLabelMarginCriterion__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/MultiMarginCriterion.c b/generic/MultiMarginCriterion.c
new file mode 100644
index 0000000..ca73bc9
--- /dev/null
+++ b/generic/MultiMarginCriterion.c
@@ -0,0 +1,162 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/MultiMarginCriterion.c"
+#else
+
+static int nn_(MultiMarginCriterion_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  real *input_data, *target_data;
+  long nframe, dim;
+  long t, d;
+  real target_;
+  THTensor *target;
+  real sum;
+
+  THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
+
+  if(input->nDimension == 1)
+  {
+    nframe = 1;
+    dim = input->size[0]; 
+    target_ = luaL_checknumber(L, 3);
+    target = THTensor_(newWithSize1d)(1);
+    THTensor_(fill)(target, target_);
+  }
+  else
+  {
+    nframe = input->size[0];
+    dim = input->size[1];
+    target = luaT_checkudata(L, 3, torch_(Tensor_id));
+    THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, "inconsistent target size");
+    target = THTensor_(newContiguous)(target);
+  }
+
+  for(t = 0; t < nframe; t++)
+  {
+    real idx = THTensor_(get1d)(target, t);
+    THArgCheck((idx >= 1) && (idx <= dim), 3, "target out of range");
+  }
+
+  input = THTensor_(newContiguous)(input);
+  input_data = THTensor_(data)(input);
+  target_data = THTensor_(data)(target);
+
+  sum = 0;
+  for(t = 0; t < nframe; t++)
+  {
+    long target_idx = (long)(target_data[t]-1);
+    real input_target = input_data[target_idx];
+    for(d = 0; d < dim; d++)
+    {
+      real z = 1 - input_target + input_data[d];
+      if(d == target_idx)
+        continue;
+    
+      if(z > 0)
+        sum += z;
+    }
+    input_data += dim;
+  }
+
+  if(sizeAverage)
+    sum /= dim;
+
+  lua_pushnumber(L, sum);
+  lua_setfield(L, 1, "output");
+
+  THTensor_(free)(input);
+  THTensor_(free)(target);
+  lua_pushnumber(L, sum);
+  return 1;
+}
+
+static int nn_(MultiMarginCriterion_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  real *input_data;
+  real *gradInput_data;
+  real *target_data;
+  THTensor *target;
+  long nframe, dim;
+  long t, d;
+  real target_;
+  real g;
+  real sum;
+
+  THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
+
+  if(input->nDimension == 1)
+  {
+    nframe = 1;
+    dim = input->size[0]; 
+    target_ = luaL_checknumber(L, 3);
+    target = THTensor_(newWithSize1d)(1);
+    THTensor_(fill)(target, target_);
+  }
+  else
+  {
+    nframe = input->size[0];
+    dim = input->size[1];
+    target = luaT_checkudata(L, 3, torch_(Tensor_id));
+    THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, "inconsistent target size");
+    target = THTensor_(newContiguous)(target);
+  }
+
+  g = (sizeAverage ? 1./((real)dim) : 1.);
+
+  input = THTensor_(newContiguous)(input);
+  input_data = THTensor_(data)(input);
+
+  THTensor_(resizeAs)(gradInput, input);
+  gradInput_data = THTensor_(data)(gradInput);
+
+  target_data = THTensor_(data)(target);
+    
+  for(t = 0; t < nframe; t++)
+  {
+    long target_idx = (long)(target_data[t])-1;
+    real input_target = input_data[target_idx];
+    real gradInput_target = 0;
+    for(d = 0; d < dim; d++)
+    {
+      real z = 1 - input_target + input_data[d];
+      if(d == target_idx)
+        continue;
+    
+      if(z > 0)
+      {
+        gradInput_target -= g;
+        gradInput_data[d] = g;
+      }
+      else
+        gradInput_data[d] = 0;
+    }
+    gradInput_data[target_idx] = gradInput_target;
+    
+    input_data += dim;
+    gradInput_data += dim;
+  }
+
+
+  THTensor_(free)(input);  
+  THTensor_(free)(target);
+  return 1;
+}
+
+static const struct luaL_Reg nn_(MultiMarginCriterion__) [] = {
+  {"MultiMarginCriterion_updateOutput", nn_(MultiMarginCriterion_updateOutput)},
+  {"MultiMarginCriterion_updateGradInput", nn_(MultiMarginCriterion_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(MultiMarginCriterion_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(MultiMarginCriterion__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Sigmoid.c b/generic/Sigmoid.c
new file mode 100644
index 0000000..20348b9
--- /dev/null
+++ b/generic/Sigmoid.c
@@ -0,0 +1,44 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Sigmoid.c"
+#else
+
+static int nn_(Sigmoid_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input, \
+                   *output_data = 1./(1.+ exp(- *input_data));)
+
+  return 1;
+}
+
+static int nn_(Sigmoid_updateGradInput)(lua_State *L)
+{
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, output);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \
+                   real z = *output_data; \
+                   *gradInput_data = *gradOutput_data * (1. - z) * z;)
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Sigmoid__) [] = {
+  {"Sigmoid_updateOutput", nn_(Sigmoid_updateOutput)},
+  {"Sigmoid_updateGradInput", nn_(Sigmoid_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Sigmoid_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Sigmoid__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SoftMax.c b/generic/SoftMax.c
new file mode 100644
index 0000000..3aaae65
--- /dev/null
+++ b/generic/SoftMax.c
@@ -0,0 +1,114 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SoftMax.c"
+#else
+
+static int nn_(SoftMax_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  real *input_data, *output_data;
+  long nframe = 0, dim = 0;
+  long t, d;
+
+  if(input->nDimension == 1)
+  {
+    nframe = 1;
+    dim = input->size[0];
+  }
+  else if(input->nDimension == 2)
+  {
+    nframe = input->size[0];
+    dim = input->size[1];
+  }
+  else
+    THArgCheck(0, 2, "vector or matrix expected");
+
+  input = THTensor_(newContiguous)(input);
+  THTensor_(resizeAs)(output, input);
+
+  input_data = THTensor_(data)(input);
+  output_data = THTensor_(data)(output);
+  for(t = 0; t < nframe; t++)
+  {
+    real inputMax = -THInf;
+    for(d = 0; d < dim; d++) {
+      if (input_data[d] >= inputMax) inputMax = input_data[d];
+    }
+
+    accreal sum = 0;
+    for(d = 0; d < dim; d++) {
+      real z = THExpMinusApprox(inputMax - input_data[d]);
+      output_data[d] = z;
+      sum += z;
+    }
+
+    for(d = 0; d < dim; d++) {
+      output_data[d] *= 1/sum;
+    }
+
+    input_data += dim;
+    output_data += dim;
+  }
+
+  THTensor_(free)(input);
+
+  return 1;
+}
+
+static int nn_(SoftMax_updateGradInput)(lua_State *L)
+{
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  real *gradInput_data, *gradOutput_data, *output_data;
+  long nframe = 0, dim = 0;
+  long t, d;
+
+  if(output->nDimension == 1)
+  {
+    nframe = 1;
+    dim = output->size[0];
+  }
+  else if(output->nDimension == 2)
+  {
+    nframe = output->size[0];
+    dim = output->size[1];
+  }
+  else
+    THError("vector or matrix expected");
+
+  THTensor_(resizeAs)(gradInput, output);
+  gradInput_data = THTensor_(data)(gradInput);
+  output_data = THTensor_(data)(output);
+  gradOutput_data = THTensor_(data)(gradOutput);
+  for(t = 0; t < nframe; t++)
+  {
+    accreal sum = 0;
+    for(d = 0; d < dim; d++)
+      sum += (accreal)gradOutput_data[d] * output_data[d];
+
+    for(d = 0; d < dim; d++)
+      gradInput_data[d] = output_data[d] * (gradOutput_data[d] - sum);
+
+    gradInput_data += dim;
+    output_data += dim;
+    gradOutput_data += dim;
+  }
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(SoftMax__) [] = {
+  {"SoftMax_updateOutput", nn_(SoftMax_updateOutput)},
+  {"SoftMax_updateGradInput", nn_(SoftMax_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(SoftMax_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SoftMax__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SoftPlus.c b/generic/SoftPlus.c
new file mode 100644
index 0000000..7a097fb
--- /dev/null
+++ b/generic/SoftPlus.c
@@ -0,0 +1,44 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SoftPlus.c"
+#else
+
+static int nn_(SoftPlus_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input,               \
+                   *output_data = log1p(exp(*input_data));)
+    
+    return 1;
+}
+
+static int nn_(SoftPlus_updateGradInput)(lua_State *L)
+{
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, output);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,    \
+                   real z = exp(*output_data);                         \
+                   *gradInput_data = *gradOutput_data * (z - 1.)/z;)
+    return 1;
+}
+
+static const struct luaL_Reg nn_(SoftPlus__) [] = {
+  {"SoftPlus_updateOutput", nn_(SoftPlus_updateOutput)},
+  {"SoftPlus_updateGradInput", nn_(SoftPlus_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(SoftPlus_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SoftPlus__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SoftShrink.c b/generic/SoftShrink.c
new file mode 100644
index 0000000..0bc4075
--- /dev/null
+++ b/generic/SoftShrink.c
@@ -0,0 +1,50 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SoftShrink.c"
+#else
+
+static int nn_(SoftShrink_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  real lambda = luaT_getfieldchecknumber(L, 1, "lambda");
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+  
+  TH_TENSOR_APPLY2(real, output, real, input,				\
+                   if ((*input_data) > lambda) *output_data = *input_data - lambda; \
+                   else if ((*input_data) < -lambda) *output_data = *input_data + lambda; \
+                   else *output_data = 0;);
+  return 1;
+}
+
+static int nn_(SoftShrink_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  real lambda = luaT_getfieldchecknumber(L, 1, "lambda");
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, input);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,	\
+                   if ((*input_data) > lambda || (*input_data) < -lambda) \
+		     *gradInput_data = (*gradOutput_data);		\
+		   else							\
+		     *gradInput_data = 0;				\
+    );
+  return 1;
+}
+
+static const struct luaL_Reg nn_(SoftShrink__) [] = {
+  {"SoftShrink_updateOutput", nn_(SoftShrink_updateOutput)},
+  {"SoftShrink_updateGradInput", nn_(SoftShrink_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(SoftShrink_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SoftShrink__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SparseLinear.c b/generic/SparseLinear.c
new file mode 100644
index 0000000..d29a1aa
--- /dev/null
+++ b/generic/SparseLinear.c
@@ -0,0 +1,130 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SparseLinear.c"
+#else
+
+static int nn_(SparseLinear_updateOutput)(lua_State *L)
+{
+  long i;
+  THTensor * input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  long dim = weight->size[0]; /* number of weights.. */
+
+  THTensor_(copy)(output, bias);
+  for(i = 0; i < input->size[1]; i++)
+  {
+    long offset = (long)(THTensor_(get2d)(input, 0, i))-1;
+    
+    if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */
+    {
+      real val = THTensor_(get2d)(input, 1, i);
+      THBlas_(axpy)(output->size[0], 
+                    val, 
+                    THTensor_(data)(weight)+offset*weight->stride[0],
+                    weight->stride[1], 
+                    THTensor_(data)(output), 
+                    output->stride[0]);
+    }
+    else
+      luaL_error(L, "index out of bound");
+  }
+  return 1;
+}
+
+static int nn_(SparseLinear_accGradParameters)(lua_State *L)
+{
+  long i;
+  THTensor * input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor * gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  real scale = luaL_optnumber(L, 4, 1);
+  THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor * gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+  THTensor * gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor * lastInput = luaT_getfieldcheckudata(L, 1, "lastInput", torch_(Tensor_id));
+  real weightDecay = luaT_getfieldchecknumber(L, 1, "weightDecay");
+  long dim = gradWeight->size[0]; /* number of weights.. */
+
+  for(i = 0; i < input->size[1]; i++)
+  {
+    long offset = (long)(THTensor_(get2d)(input, 0, i))-1;
+
+    if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */
+    {
+      real val = scale*THTensor_(get2d)(input, 1, i);
+      THBlas_(scal)(gradOutput->size[0],
+                    0, 
+                    THTensor_(data)(gradWeight)+offset*gradWeight->stride[0],
+                    gradWeight->stride[1]); /* zero */
+
+      THBlas_(axpy)(gradOutput->size[0], 
+                    val, 
+                    THTensor_(data)(gradOutput), 
+                    gradOutput->stride[0], 
+                    THTensor_(data)(gradWeight)+offset*gradWeight->stride[0], 
+                    gradWeight->stride[1]);
+    }
+    else
+      luaL_error(L, "index out of bound");
+  }
+  
+  THTensor_(cadd)(gradBias, gradBias, 1, gradOutput); 
+  
+  if(weightDecay != 0)
+    THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight);
+  
+  THTensor_(resizeAs)(lastInput, input);
+  THTensor_(copy)(lastInput, input);
+  
+  return 0;
+}
+
+int nn_(SparseLinear_updateParameters)(lua_State *L)
+{
+  long i;
+  real learningRate = luaL_checknumber(L, 2);
+  THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor * gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+  THTensor * gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor * lastInput = luaT_getfieldcheckudata(L, 1, "lastInput", torch_(Tensor_id));
+  real weightDecay = luaT_getfieldchecknumber(L, 1, "weightDecay");
+  
+  long dim = weight->size[0]; /* number of weights.. */
+  THTensor_(cadd)(bias, bias, -learningRate, gradBias);
+  
+  for(i = 0; i < lastInput->size[1]; i++) 
+  {
+    long offset = (long)(THTensor_(get2d)(lastInput, 0, i))-1;
+    
+    if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */
+    {
+      THBlas_(axpy)(bias->size[0], 
+                    -learningRate, 
+                    THTensor_(data)(gradWeight)+offset*gradWeight->stride[0], 
+                    gradWeight->stride[1], 
+                    THTensor_(data)(weight)+offset*weight->stride[0], 
+                    weight->stride[1]);
+    }
+    else
+      luaL_error(L, "index out of bound");
+  }
+  return 0;
+}
+
+static const struct luaL_Reg nn_(SparseLinear__) [] = {
+  {"SparseLinear_updateOutput", nn_(SparseLinear_updateOutput)},
+  {"SparseLinear_updateParameters", nn_(SparseLinear_updateParameters)},
+  {NULL, NULL}
+};
+
+void nn_(SparseLinear_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SparseLinear__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SpatialConvolution.c b/generic/SpatialConvolution.c
new file mode 100644
index 0000000..de0de1d
--- /dev/null
+++ b/generic/SpatialConvolution.c
@@ -0,0 +1,201 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialConvolution.c"
+#else
+
+static void nn_(convolution_updateOutput_)(THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, int dH, int dW)
+{
+  /* add bias */
+  long i;
+  THTensor *outn = THTensor_(new)();
+  for (i=0; i<bias->size[0]; i++) {
+    THTensor_(select)(outn,output,0,i);
+    THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
+  }
+  THTensor_(free)(outn);
+
+  /* do convolutions */
+  THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "V","X");
+}
+
+static int nn_(SpatialConvolution_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
+
+  int dimw = 2;
+  int dimh = 1;
+  if (input->nDimension == 4) {
+    dimw++;
+    dimh++;
+  }
+
+  long nOutputPlane = weight->size[0];
+  long nInputPlane  = weight->size[1];
+  long kW           = weight->size[3];
+  long kH           = weight->size[2];
+  long inputWidth   = input->size[dimw];
+  long inputHeight  = input->size[dimh];
+  long outputWidth  = (inputWidth - kW) / dW + 1;
+  long outputHeight = (inputHeight - kH) / dH + 1;
+
+  if (input->nDimension == 3)
+  {
+    THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
+/*     printf("\n*************\nstochastic\n"); */
+/*     printf("no=%d\n",output->nDimension); */
+/*     printf("no=%ld,%ld,%ld\n",nOutputPlane,outputHeight,outputWidth); */
+/*     printf("ni=%d\n",input->nDimension); */
+    nn_(convolution_updateOutput_)(input,output,weight,bias,dH,dW);
+/*    printf("stochastic\n");*/
+  }
+  else
+  {
+    THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth);
+    THTensor *outn = THTensor_(new)();
+    THTensor *inpn = THTensor_(new)();
+    long i;
+    for (i=0; i<input->size[0]; i++)
+    {
+      THTensor_(select)(outn,output,0,i);
+      THTensor_(select)(inpn,input,0,i);
+      nn_(convolution_updateOutput_)(inpn,outn,weight,bias,dH,dW);
+    }
+    THTensor_(free)(outn);
+    THTensor_(free)(inpn);
+  }
+
+/*   /\* add bias *\/ */
+/*   long i; */
+/*   THTensor *outn = THTensor_(new)(); */
+/*   for (i=0; i<bias->size[0]; i++) { */
+/*     THTensor_(select)(outn,output,0,i); */
+/*     THTensor_(fill)(outn, THTensor_(get1d)(bias, i)); */
+/*   } */
+/*   THTensor_(free)(outn); */
+
+/*   /\* do convolutions *\/ */
+/*   THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "vx"); */
+
+  return 1;
+}
+
+
+static int nn_(SpatialConvolution_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );
+
+  long k;
+
+  /* gradient to input */
+  THTensor *tweight = THTensor_(newTranspose)(weight,0,1);
+
+  if(input->nDimension == 3)
+  {
+    THTensor_(conv2Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dH, dW, "F", "C");
+  }
+  else
+  {
+
+    THTensor_(resizeAs)(gradInput,input);
+    THTensor *outn = THTensor_(new)();
+    THTensor *inpn = THTensor_(new)();
+    long i;
+    for (i=0; i<input->size[0]; i++)
+    {
+      THTensor_(select)(outn,gradOutput,0,i);
+      THTensor_(select)(inpn,gradInput,0,i);
+      THTensor_(conv2Dmv)(inpn, 0.0, 1.0, outn, tweight, dH, dW, "F", "C");
+    }
+    THTensor_(free)(outn);
+    THTensor_(free)(inpn);
+  }
+  THTensor_(free)(tweight);
+
+  return 1;
+}
+
+static void nn_(convolution_accGradParameters_)(THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, real scale, int dH, int dW)
+{
+  long k;
+
+  /* gradient to bias */
+  real *gradBias_data = THTensor_(data)(gradBias);
+  THTensor* gradOutSlice = THTensor_(new)();
+  for(k = 0; k < gradOutput->size[0]; k++)
+  {
+    THTensor_(select)(gradOutSlice, gradOutput, 0, k);
+    gradBias_data[k] += scale*THTensor_(sumall)(gradOutSlice);
+  }
+  THTensor_(free)(gradOutSlice);
+
+  /* gradient to kernels */
+  THTensor_(conv2DRevger)(gradWeight, 1.0, scale, input, gradOutput, dH, dW);
+}
+
+static int nn_(SpatialConvolution_accGradParameters)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  real scale = luaL_optnumber(L, 4, 1);
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+  
+  THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );
+
+  if(input->nDimension == 3)
+  {
+    nn_(convolution_accGradParameters_)(input,gradOutput,gradWeight,gradBias,scale,dH,dW);
+  }
+  else
+  {
+    THTensor *outn = THTensor_(new)();
+    THTensor *inpn = THTensor_(new)();
+    long i;
+    for (i=0; i<input->size[0]; i++)
+    {
+      THTensor_(select)(outn,gradOutput,0,i);
+      THTensor_(select)(inpn,input,0,i);
+      nn_(convolution_accGradParameters_)(inpn,outn,gradWeight,gradBias,scale,dH,dW);
+    }
+    THTensor_(free)(outn);
+    THTensor_(free)(inpn);
+  }
+
+  return 0;
+}
+
+static const struct luaL_Reg nn_(SpatialConvolution__) [] = {
+  {"SpatialConvolution_updateOutput", nn_(SpatialConvolution_updateOutput)},
+  {"SpatialConvolution_updateGradInput", nn_(SpatialConvolution_updateGradInput)},
+  {"SpatialConvolution_accGradParameters", nn_(SpatialConvolution_accGradParameters)},
+  {NULL, NULL}
+};
+
+static void nn_(SpatialConvolution_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SpatialConvolution__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SpatialConvolutionMap.c b/generic/SpatialConvolutionMap.c
new file mode 100644
index 0000000..ff7d8ca
--- /dev/null
+++ b/generic/SpatialConvolutionMap.c
@@ -0,0 +1,229 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialConvolutionMap.c"
+#else
+
+static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+  THTensor *connTable = luaT_getfieldcheckudata(L, 1, "connTable", torch_(Tensor_id));
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected");
+  luaL_argcheck(L, input->size[0] == nInputPlane, 2, "invalid number of input planes");
+  luaL_argcheck(L, input->size[2] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size");
+
+  THTensor_(resize3d)(output, nOutputPlane,
+                      (input->size[1] - kH) / dH + 1, 
+                      (input->size[2] - kW) / dW + 1);
+
+  // contiguous
+  input = THTensor_(newContiguous)(input);
+  output = THTensor_(newContiguous)(output);
+
+  // get raw pointers
+  real *input_data = THTensor_(data)(input);
+  real *output_data = THTensor_(data)(output);
+  real *weight_data = THTensor_(data)(weight);
+
+  // and dims
+  long input_n = input->size[0];
+  long input_h = input->size[1];
+  long input_w = input->size[2];
+  long output_n = output->size[0];
+  long output_h = output->size[1];
+  long output_w = output->size[2];
+  long weight_n = weight->size[0];
+  long weight_h = weight->size[1];
+  long weight_w = weight->size[2];
+
+  // add bias
+  THTensor *outputPlane = THTensor_(new)();
+  int k;
+  for (k = 0; k < nOutputPlane; k++) {
+    THTensor_(select)(outputPlane,output,0,k);
+    THTensor_(fill)(outputPlane, THTensor_(get1d)(bias, k));
+  }
+  THTensor_(free)(outputPlane);
+
+  // convolve all maps
+  int i,o;
+  int nweight = connTable->size[0];
+  for (k = 0; k < nweight; k++) {
+    // get offsets for input/output
+    o = (int)THTensor_(get2d)(connTable,k,1)-1;
+    i = (int)THTensor_(get2d)(connTable,k,0)-1;
+
+    // convolve each map
+    THTensor_(validXCorr2Dptr)(output_data + o*output_w*output_h,
+                            1.0,
+                            input_data + i*input_w*input_h, input_h, input_w,
+                            weight_data + k*weight_w*weight_h, weight_h, weight_w,
+                            dH, dW);
+  }
+
+  // clean up
+  THTensor_(free)(input);
+  THTensor_(free)(output);
+
+  return 1;
+}
+
+static int nn_(SpatialConvolutionMap_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+  THTensor *connTable = luaT_getfieldcheckudata(L, 1, "connTable", torch_(Tensor_id));
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  // contiguous
+  gradInput = THTensor_(newContiguous)(gradInput);
+  gradOutput = THTensor_(newContiguous)(gradOutput);
+
+  // Resize/Zero
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+
+  // get raw pointers
+  real *gradInput_data = THTensor_(data)(gradInput);
+  real *gradOutput_data = THTensor_(data)(gradOutput);
+  real *weight_data = THTensor_(data)(weight);
+  real *gradWeight_data = THTensor_(data)(gradWeight);
+
+  // and dims
+  long input_n = input->size[0];
+  long input_h = input->size[1];
+  long input_w = input->size[2];
+  long output_n = gradOutput->size[0];
+  long output_h = gradOutput->size[1];
+  long output_w = gradOutput->size[2];
+  long weight_n = weight->size[0];
+  long weight_h = weight->size[1];
+  long weight_w = weight->size[2];
+
+  // updateGradInput all
+  int k;
+  int nkernel = connTable->size[0];
+  for(k = 0; k < nkernel; k++)
+  {
+    int o = (int)THTensor_(get2d)(connTable,k,1)-1;
+    int i = (int)THTensor_(get2d)(connTable,k,0)-1;
+
+    // gradient to input
+    THTensor_(fullConv2Dptr)(gradInput_data + i*input_w*input_h,
+                          1.0,
+                          gradOutput_data + o*output_w*output_h,  output_h,  output_w,
+                          weight_data + k*weight_w*weight_h, weight_h, weight_w,
+                          dH, dW);
+  }
+
+  // clean up
+  THTensor_(free)(gradInput);
+  THTensor_(free)(gradOutput);
+  
+  return 1;
+}
+
+static int nn_(SpatialConvolutionMap_accGradParameters)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+  real scale = luaL_optnumber(L, 4, 1);
+
+  THTensor *connTable = luaT_getfieldcheckudata(L, 1, "connTable", torch_(Tensor_id));
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  // contiguous
+  input = THTensor_(newContiguous)(input);
+  gradOutput = THTensor_(newContiguous)(gradOutput);
+
+  // get raw pointers
+  real *input_data = THTensor_(data)(input);
+  real *gradOutput_data = THTensor_(data)(gradOutput);
+  real *weight_data = THTensor_(data)(weight);
+  real *gradWeight_data = THTensor_(data)(gradWeight);
+
+  // and dims
+  long input_n = input->size[0];
+  long input_h = input->size[1];
+  long input_w = input->size[2];
+  long output_n = gradOutput->size[0];
+  long output_h = gradOutput->size[1];
+  long output_w = gradOutput->size[2];
+  long weight_n = weight->size[0];
+  long weight_h = weight->size[1];
+  long weight_w = weight->size[2];
+
+  // gradients wrt bias
+  int k;
+  THTensor *gradOutputPlane = THTensor_(new)();
+  real *gradBias_data = THTensor_(data)(gradBias);
+  for(k = 0; k < nOutputPlane; k++) {
+    THTensor_(select)(gradOutputPlane, gradOutput, 0, k);
+    gradBias_data[k] += scale * THTensor_(sumall)(gradOutputPlane);
+  }
+  THTensor_(free)(gradOutputPlane);
+
+  // gradients wrt weight
+  int nkernel = connTable->size[0];
+  for(k = 0; k < nkernel; k++)
+  {
+    int o = (int)THTensor_(get2d)(connTable,k,1)-1;
+    int i = (int)THTensor_(get2d)(connTable,k,0)-1;
+
+    // gradient to kernel
+    THTensor_(validXCorr2DRevptr)(gradWeight_data + k*weight_w*weight_h,
+                               scale,
+                               input_data + i*input_w*input_h, input_h, input_w,
+                               gradOutput_data + o*output_w*output_h, output_h, output_w,
+                               dH, dW);
+  }
+
+  // clean up
+  THTensor_(free)(input);
+  THTensor_(free)(gradOutput);
+  return 0;
+}
+
+static const struct luaL_Reg nn_(SpatialConvolutionMap__) [] = {
+  {"SpatialConvolutionMap_updateOutput", nn_(SpatialConvolutionMap_updateOutput)},
+  {"SpatialConvolutionMap_updateGradInput", nn_(SpatialConvolutionMap_updateGradInput)},
+  {"SpatialConvolutionMap_accGradParameters", nn_(SpatialConvolutionMap_accGradParameters)},
+  {NULL, NULL}
+};
+
+static void nn_(SpatialConvolutionMap_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SpatialConvolutionMap__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SpatialMaxPooling.c b/generic/SpatialMaxPooling.c
new file mode 100644
index 0000000..b9fab3b
--- /dev/null
+++ b/generic/SpatialMaxPooling.c
@@ -0,0 +1,163 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialMaxPooling.c"
+#else
+
+static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected");
+  luaL_argcheck(L, input->size[2] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size");
+
+  // sizes
+  long nslices = input->size[0];
+  long iheight = input->size[1];
+  long iwidth = input->size[2];
+  long oheight = (iheight - kH) / dH + 1;
+  long owidth = (iwidth - kW) / dW + 1;
+
+  // get contiguous input
+  input = THTensor_(newContiguous)(input);
+
+  // resize output
+  THTensor_(resize3d)(output, nslices, oheight, owidth);
+
+  // indices will contain i,j locatyions for each output point
+  THTensor_(resize4d)(indices, 2, nslices, oheight, owidth);
+
+  // get raw pointers
+  real *input_data = THTensor_(data)(input);
+  real *output_data = THTensor_(data)(output);
+  real *indices_data = THTensor_(data)(indices);
+
+  // compute max pooling for each input slice
+  long k;
+  for (k = 0; k < nslices; k++) {
+    // pointers to slices
+    real *input_p = input_data + k*iwidth*iheight;
+    real *output_p = output_data + k*owidth*oheight;
+    real *indy_p = indices_data + k*owidth*oheight;
+    real *indx_p = indices_data + (k+nslices)*owidth*oheight;
+
+    // loop over output
+    int i,j;
+    for(i = 0; i < oheight; i++) {
+      for(j = 0; j < owidth; j++) {
+        // local pointers
+        real *ip = input_p + i*iwidth*dH + j*dW;
+        real *op = output_p + i*owidth + j;
+        real *indyp = indy_p + i*owidth + j;
+        real *indxp = indx_p + i*owidth + j;
+
+        // compute local max:
+	long maxindex = -1;
+	real maxval = -THInf;
+	long tcntr = 0;
+        int x,y;
+        for(y = 0; y < kH; y++) {
+          for(x = 0; x < kW; x++) {
+            real val = *(ip + y*iwidth + x);
+            if (val > maxval) {
+              maxval = val;
+              maxindex = tcntr;
+            }
+            tcntr++;
+          }
+        }
+
+        // set output to local max
+        *op = maxval;
+
+        // store location of max (x,y)
+        *indyp = (int)(maxindex / dW)+1;
+        *indxp = (maxindex % dW) +1;
+      }
+    }
+  }
+
+  // cleanup
+  THTensor_(free)(input);
+
+  return 1;
+}
+
+static int nn_(SpatialMaxPooling_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  // get contiguous gradOutput
+  gradOutput = THTensor_(newContiguous)(gradOutput);
+
+  // resize
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+
+  // sizes
+  int ichannels = input->size[0];
+  int iheight = input->size[1];
+  int iwidth = input->size[2];
+  int ochannels = ichannels;
+  int oheight = gradOutput->size[1];
+  int owidth = gradOutput->size[2];
+
+  // get raw pointers
+  real *gradInput_data = THTensor_(data)(gradInput);
+  real *gradOutput_data = THTensor_(data)(gradOutput);
+  real *indices_data = THTensor_(data)(indices);
+
+  // backprop
+  long k;
+  for (k = 0; k < input->size[0]; k++) {
+    // pointers to slices
+    real *gradOutput_p = gradOutput_data + k*owidth*oheight;
+    real *gradInput_p = gradInput_data + k*iwidth*iheight;
+    real *indy_p = indices_data + k*owidth*oheight;
+    real *indx_p = indices_data + (k+ochannels)*owidth*oheight;
+
+    // calculate max points
+    int i,j;
+    for(i = 0; i < oheight; i++) {
+      for(j = 0; j < owidth; j++) {
+        // retrieve position of max
+ 	long maxi = *(indy_p + i*owidth + j) - 1 + i*dH;
+ 	long maxj = *(indx_p + i*owidth + j) - 1 + j*dW;
+
+        // update gradient
+        *(gradInput_p + maxi*iwidth + maxj) += *(gradOutput_p + i*owidth + j);
+      }
+    }
+  }
+
+  // cleanup
+  THTensor_(free)(gradOutput);
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(SpatialMaxPooling__) [] = {
+  {"SpatialMaxPooling_updateOutput", nn_(SpatialMaxPooling_updateOutput)},
+  {"SpatialMaxPooling_updateGradInput", nn_(SpatialMaxPooling_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(SpatialMaxPooling_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SpatialMaxPooling__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SpatialSubSampling.c b/generic/SpatialSubSampling.c
new file mode 100644
index 0000000..705253f
--- /dev/null
+++ b/generic/SpatialSubSampling.c
@@ -0,0 +1,278 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialSubSampling.c"
+#else
+
+static int nn_(SpatialSubSampling_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  real *weight_data = THTensor_(data)(weight);
+  real *bias_data = THTensor_(data)(bias);
+  real *output_data;
+  real *input_data;
+
+
+  luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
+
+  int dimw = 2;
+  int dimh = 1;
+  if (input->nDimension == 4) {
+    dimw++;
+    dimh++;
+  }
+
+  long inputWidth = input->size[dimw];
+  long inputHeight = input->size[dimh];
+  long outputWidth = (inputWidth - kW) / dW + 1;
+  long outputHeight = (inputHeight - kH) / dH + 1;
+
+
+  luaL_argcheck(L, input->size[dimh-1] == nInputPlane, 2, "invalid number of input planes");
+  luaL_argcheck(L, inputWidth >= kW && inputHeight >= kH, 2, "input image smaller than kernel size");
+
+  input = THTensor_(newContiguous)(input);
+  input_data = THTensor_(data)(input);
+
+  long nbatch = 1;
+  if (input->nDimension == 3) 
+  {
+    THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth);
+  }
+  else
+  {
+    nbatch = input->size[0];
+    THTensor_(resize4d)(output, nbatch, nInputPlane, outputHeight, outputWidth);
+  }
+
+  output_data = THTensor_(data)(output);
+
+  long i, k, p;
+
+  for(p = 0; p < nbatch; p++)
+  {
+    //input_data += p*nInputPlane*inputWidth*inputHeight;
+    //output_data += p*nInputPlane*outputHeight*outputWidth;
+    for(k = 0; k < nInputPlane; k++)
+    {
+      real *ptr_output;
+      long xx, yy;
+
+      /* Get the good mask for (k,i) (k out, i in) */
+      real the_weight = weight_data[k];
+
+      /* Initialize to the bias */
+      real z = bias_data[k];
+      for(i = 0; i < outputWidth*outputHeight; i++)
+	output_data[i] = z;
+      
+      /* For all output pixels... */
+      ptr_output = output_data;
+      for(yy = 0; yy < outputHeight; yy++)
+      {
+	for(xx = 0; xx < outputWidth; xx++)
+	{
+	  // Compute the mean of the input image...
+	  real *ptr_input = input_data+yy*dH*inputWidth+xx*dW;
+	  real sum = 0;
+	  long kx, ky;
+
+	  for(ky = 0; ky < kH; ky++)
+	  {
+	    for(kx = 0; kx < kW; kx++)
+	      sum += ptr_input[kx];
+	    ptr_input += inputWidth; // next input line
+	  }
+	  
+	  // Update output
+	  *ptr_output++ += the_weight*sum;
+	}
+      }
+
+      // Next input/output plane
+      output_data += outputWidth*outputHeight;
+      input_data += inputWidth*inputHeight;
+    }
+  }
+
+  THTensor_(free)(input);
+
+  return 1;
+}
+
+static int nn_(SpatialSubSampling_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  
+  int dimw = 2;
+  int dimh = 1;
+  long nbatch = 1;
+  if (input->nDimension == 4) {
+    dimw++;
+    dimh++;
+    nbatch = input->size[0];
+  }
+
+  long inputWidth = input->size[dimw];
+  long inputHeight = input->size[dimh];
+  long outputWidth = (inputWidth - kW) / dW + 1;
+  long outputHeight = (inputHeight - kH) / dH + 1;
+
+  real *weight_data = THTensor_(data)(weight);
+  real *gradOutput_data = THTensor_(data)(gradOutput);
+  real *gradInput_data;
+
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);  
+  gradInput_data = THTensor_(data)(gradInput);
+  gradOutput_data = THTensor_(data)(gradOutput);
+
+  long i, k, p;
+
+  for(p = 0; p < nbatch; p++)
+  {
+    //gradInput_data += p*nInputPlane*inputWidth*inputHeight;
+    //gradOutput_data += p*nInputPlane*outputWidth*outputHeight;
+    for(k = 0; k < nInputPlane; k++)
+    {
+      real the_weight = weight_data[k];
+      real *ptr_gradOutput = gradOutput_data;
+      long xx, yy;
+      
+      for(yy = 0; yy < outputHeight; yy++)
+      {
+	for(xx = 0; xx < outputWidth; xx++)
+	{
+	  real *ptr_gradInput = gradInput_data+yy*dH*inputWidth+xx*dW;
+	  real z = *ptr_gradOutput++ * the_weight;
+	  long kx, ky;
+	  
+	  for(ky = 0; ky < kH; ky++)
+	  {
+	    for(kx = 0; kx < kW; kx++)
+	      ptr_gradInput[kx] += z;
+	    ptr_gradInput += inputWidth;
+	  }    
+	}
+      }
+      gradOutput_data += outputWidth*outputHeight;
+      gradInput_data += inputWidth*inputHeight;
+    }
+  }
+
+  return 1;
+}
+
+static int nn_(SpatialSubSampling_accGradParameters)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  real scale = luaL_optnumber(L, 4, 1);
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+
+  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+  
+  int dimw = 2;
+  int dimh = 1;
+  long nbatch = 1;
+  if (input->nDimension == 4) {
+    dimw++;
+    dimh++;
+    nbatch = input->size[0];
+  }
+
+  long inputWidth = input->size[dimw];
+  long inputHeight = input->size[dimh];
+  long outputWidth = (inputWidth - kW) / dW + 1;
+  long outputHeight = (inputHeight - kH) / dH + 1;
+
+  real *gradWeight_data = THTensor_(data)(gradWeight);
+  real *gradBias_data = THTensor_(data)(gradBias);
+  real *gradOutput_data = THTensor_(data)(gradOutput);
+  real *input_data;
+
+  input = THTensor_(newContiguous)(input);
+  input_data = THTensor_(data)(input);
+
+  long i, k, p;
+  for(p = 0; p < nbatch; p++)
+  {
+    //input_data += p*nInputPlane*inputWidth*inputHeight;
+    //gradOutput_data += p*nInputPlane*inputWidth*inputHeight;
+    for(k = 0; k < nInputPlane; k++)
+    {
+      real *ptr_gradOutput = gradOutput_data;
+      real sum;
+      long xx, yy;
+
+      sum = 0;
+      for(i = 0; i < outputWidth*outputHeight; i++)
+	sum += gradOutput_data[i];
+      gradBias_data[k] += scale*sum;
+
+      sum = 0;
+      for(yy = 0; yy < outputHeight; yy++)
+      {
+	for(xx = 0; xx < outputWidth; xx++)
+	{
+	  real *ptr_input = input_data+yy*dH*inputWidth+xx*dW;
+	  real z = *ptr_gradOutput++;
+	  long kx, ky;
+
+	  for(ky = 0; ky < kH; ky++)
+	  {
+	    for(kx = 0; kx < kW; kx++)
+	      sum += z * ptr_input[kx];
+	    ptr_input += inputWidth;
+	  }    
+	}
+      }
+      gradWeight_data[k] += scale*sum;
+      gradOutput_data += outputWidth*outputHeight;
+      input_data += inputWidth*inputHeight;
+    }
+  }
+
+
+  THTensor_(free)(input);
+
+  return 0;
+}
+
+static const struct luaL_Reg nn_(SpatialSubSampling__) [] = {
+  {"SpatialSubSampling_updateOutput", nn_(SpatialSubSampling_updateOutput)},
+  {"SpatialSubSampling_updateGradInput", nn_(SpatialSubSampling_updateGradInput)},
+  {"SpatialSubSampling_accGradParameters", nn_(SpatialSubSampling_accGradParameters)},
+  {NULL, NULL}
+};
+
+static void nn_(SpatialSubSampling_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SpatialSubSampling__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Sqrt.c b/generic/Sqrt.c
new file mode 100644
index 0000000..a739e96
--- /dev/null
+++ b/generic/Sqrt.c
@@ -0,0 +1,46 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Sqrt.c"
+#else
+
+static int nn_(Sqrt_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input,		\
+		   *output_data = sqrt(*input_data););
+
+  return 1;
+}
+
+static int nn_(Sqrt_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, input);
+
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,	\
+		   *gradInput_data  = 0.5 * (*gradOutput_data / *output_data););
+  
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Sqrt__) [] = {
+  {"Sqrt_updateOutput", nn_(Sqrt_updateOutput)},
+  {"Sqrt_updateGradInput", nn_(Sqrt_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Sqrt_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Sqrt__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Square.c b/generic/Square.c
new file mode 100644
index 0000000..409055d
--- /dev/null
+++ b/generic/Square.c
@@ -0,0 +1,45 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Square.c"
+#else
+
+static int nn_(Square_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input,	\
+		   *output_data = *input_data * *input_data;);
+
+  return 1;
+}
+
+static int nn_(Square_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, input);
+
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \
+		   *gradInput_data = 2.0 * (*gradOutput_data) * (*input_data););
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Square__) [] = {
+  {"Square_updateOutput", nn_(Square_updateOutput)},
+  {"Square_updateGradInput", nn_(Square_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Square_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Square__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Tanh.c b/generic/Tanh.c
new file mode 100644
index 0000000..5c24d15
--- /dev/null
+++ b/generic/Tanh.c
@@ -0,0 +1,45 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Tanh.c"
+#else
+
+static int nn_(Tanh_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input,   \
+                   *output_data = tanh(*input_data);)
+
+  return 1;
+}
+
+static int nn_(Tanh_updateGradInput)(lua_State *L)
+{
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, output);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \
+                   real z = *output_data;                              \
+                   *gradInput_data = *gradOutput_data * (1. - z*z););
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Tanh__) [] = {
+  {"Tanh_updateOutput", nn_(Tanh_updateOutput)},
+  {"Tanh_updateGradInput", nn_(Tanh_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Tanh_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Tanh__), "nn");
+  lua_pop(L,1);
+
+}
+
+#endif
diff --git a/generic/TemporalConvolution.c b/generic/TemporalConvolution.c
new file mode 100644
index 0000000..fa14a22
--- /dev/null
+++ b/generic/TemporalConvolution.c
@@ -0,0 +1,194 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/TemporalConvolution.c"
+#else
+
+static int nn_(TemporalConvolution_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int inputFrameSize = luaT_getfieldcheckint(L, 1, "inputFrameSize");
+  int outputFrameSize = luaT_getfieldcheckint(L, 1, "outputFrameSize");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor *outputWindow, *inputWindow;
+  int nInputFrame, nOutputFrame;
+  long k;
+  
+  luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected");
+  luaL_argcheck(L, input->size[1] == inputFrameSize, 2, "invalid input frame size");
+  luaL_argcheck(L, input->size[0] >= kW, 2, "input sequence smaller than kernel size");
+
+  input = THTensor_(newContiguous)(input);
+  outputWindow = THTensor_(new)();
+  inputWindow = THTensor_(new)();
+
+  nInputFrame = input->size[0];
+  nOutputFrame = (nInputFrame - kW) / dW + 1;
+
+  THTensor_(resize2d)(output,
+                      nOutputFrame,
+                      outputFrameSize);
+
+  /* bias first */
+  for(k = 0; k < nOutputFrame; k++)
+  {
+    THTensor_(select)(outputWindow, output, 0, k);
+    THTensor_(copy)(outputWindow, bias);
+  }
+
+  /* ouch */
+  for(k = 0; nOutputFrame > 0; k++)
+  {
+    long outputFrameStride = (kW-1)/dW+1;
+    long inputFrameStride = outputFrameStride*dW;
+    long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
+    nOutputFrame -= nFrame;
+
+    THTensor_(setStorage2d)(inputWindow, input->storage,
+                            input->storageOffset+k*dW*input->size[1],
+                            nFrame, inputFrameStride*input->size[1],
+                            kW*input->size[1], 1);
+
+    THTensor_(setStorage2d)(outputWindow, output->storage, 
+                            output->storageOffset + k*output->size[1],
+                            nFrame, outputFrameStride*output->size[1],
+                            output->size[1], 1);
+
+    THTensor_(transpose)(weight, NULL, 0, 1);
+    THTensor_(addmm)(outputWindow, 1, outputWindow, 1, inputWindow, weight);
+    THTensor_(transpose)(weight, NULL, 0, 1);
+  }
+
+  THTensor_(free)(outputWindow);
+  THTensor_(free)(inputWindow);
+  THTensor_(free)(input);
+
+  return 1;
+}
+
+static int nn_(TemporalConvolution_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  long nInputFrame = input->size[0];
+  long nOutputFrame = gradOutput->size[0];
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor *gradOutputWindow;
+  THTensor *gradInputWindow;
+  long k;
+
+  gradOutputWindow = THTensor_(new)();
+  gradInputWindow = THTensor_(new)();
+
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+
+  /* ouch */
+  for(k = 0; nOutputFrame > 0; k++)
+  {
+    long outputFrameStride = (kW-1)/dW+1;
+    long inputFrameStride = outputFrameStride*dW;
+    long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
+    nOutputFrame -= nFrame;
+
+    THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage,
+                            gradOutput->storageOffset + k*gradOutput->size[1],
+                            nFrame, outputFrameStride*gradOutput->size[1],
+                            gradOutput->size[1], 1);
+
+    THTensor_(setStorage2d)(gradInputWindow, gradInput->storage,
+                            gradInput->storageOffset+k*dW*gradInput->size[1],
+                            nFrame, inputFrameStride*gradInput->size[1],
+                            kW*gradInput->size[1], 1);
+
+    THTensor_(addmm)(gradInputWindow, 1, gradInputWindow, 1, gradOutputWindow, weight);
+  }
+
+  THTensor_(free)(gradOutputWindow);
+  THTensor_(free)(gradInputWindow);
+
+  return 1;
+}
+
+static int nn_(TemporalConvolution_accGradParameters)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  real scale = luaL_optnumber(L, 4, 1);
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  long nInputFrame = input->size[0];
+  long nOutputFrame = gradOutput->size[0];
+
+  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+
+  THTensor *gradOutputWindow;
+  THTensor *inputWindow;
+  long k;
+
+  input = THTensor_(newContiguous)(input);
+  gradOutputWindow = THTensor_(new)();
+  inputWindow = THTensor_(new)();
+
+  /* bias first */
+  for(k = 0; k < nOutputFrame; k++)
+  {
+    THTensor_(select)(gradOutputWindow, gradOutput, 0, k);
+    THTensor_(cadd)(gradBias, gradBias, scale, gradOutputWindow);
+  }
+
+  /* ouch */
+  for(k = 0; nOutputFrame > 0; k++)
+  {
+    long outputFrameStride = (kW-1)/dW+1;
+    long inputFrameStride = outputFrameStride*dW;
+    long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
+    nOutputFrame -= nFrame;
+
+    THTensor_(setStorage2d)(inputWindow, input->storage,
+                            input->storageOffset+k*dW*input->size[1],
+                            nFrame, inputFrameStride*input->size[1],
+                            kW*input->size[1], 1);
+
+    THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage, 
+                            gradOutput->storageOffset + k*gradOutput->size[1],
+                            nFrame, outputFrameStride*gradOutput->size[1],
+                            gradOutput->size[1], 1);
+
+    THTensor_(transpose)(gradOutputWindow, NULL, 0, 1);
+    THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutputWindow, inputWindow);
+    THTensor_(transpose)(gradOutputWindow, NULL, 0, 1);
+  }
+
+  THTensor_(free)(gradOutputWindow);
+  THTensor_(free)(inputWindow);
+  THTensor_(free)(input);
+
+  return 0;
+}
+
+static const struct luaL_Reg nn_(TemporalConvolution__) [] = {
+  {"TemporalConvolution_updateOutput", nn_(TemporalConvolution_updateOutput)},
+  {"TemporalConvolution_updateGradInput", nn_(TemporalConvolution_updateGradInput)},
+  {"TemporalConvolution_accGradParameters", nn_(TemporalConvolution_accGradParameters)},
+  {NULL, NULL}
+};
+
+static void nn_(TemporalConvolution_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(TemporalConvolution__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/TemporalSubSampling.c b/generic/TemporalSubSampling.c
new file mode 100644
index 0000000..39e7f3b
--- /dev/null
+++ b/generic/TemporalSubSampling.c
@@ -0,0 +1,139 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/TemporalSubSampling.c"
+#else
+
+static int nn_(TemporalSubSampling_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int inputFrameSize = luaT_getfieldcheckint(L, 1, "inputFrameSize");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor *outputFrame, *inputWindow;
+  int nInputFrame, nOutputFrame;
+  long k;
+  
+  luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected");
+  luaL_argcheck(L, input->size[1] == inputFrameSize, 2, "invalid input frame size");
+  luaL_argcheck(L, input->size[0] >= kW, 2, "input sequence smaller than kernel size");
+
+  outputFrame = THTensor_(new)();
+  inputWindow = THTensor_(new)();
+
+  nInputFrame = input->size[0];
+  nOutputFrame = (nInputFrame - kW) / dW + 1;
+
+  THTensor_(resize2d)(output,
+                      nOutputFrame,
+                      inputFrameSize);
+  
+  for(k = 0; k < nOutputFrame; k++)
+  {
+    THTensor_(narrow)(inputWindow, input, 0, k*dW, kW);
+    THTensor_(select)(outputFrame, output, 0, k);
+    THTensor_(sum)(outputFrame, inputWindow, 0);
+    THTensor_(cmul)(outputFrame, outputFrame, weight);
+    THTensor_(cadd)(outputFrame, outputFrame, 1, bias);
+  }
+
+  THTensor_(free)(outputFrame);
+  THTensor_(free)(inputWindow);
+
+  return 1;
+}
+
+static int nn_(TemporalSubSampling_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor *gradOutputFrame;
+  THTensor *gradInputWindow, *buffer, *kwunit;
+  long k;
+
+  gradOutputFrame = THTensor_(new)();
+  gradInputWindow = THTensor_(new)();
+  buffer = THTensor_(new)();
+  kwunit = THTensor_(newWithSize1d)(kW);
+
+  THTensor_(fill)(kwunit, 1);
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+
+  for(k = 0; k < gradOutput->size[0]; k++)
+  {
+    THTensor_(narrow)(gradInputWindow, gradInput, 0, k*dW, kW);
+    THTensor_(select)(gradOutputFrame, gradOutput, 0, k);
+    THTensor_(cmul)(buffer, weight, gradOutputFrame);
+    THTensor_(addr)(gradInputWindow, 1, gradInputWindow, 1, kwunit, buffer);
+  }
+
+  THTensor_(free)(gradOutputFrame);
+  THTensor_(free)(gradInputWindow);
+  THTensor_(free)(buffer);
+  THTensor_(free)(kwunit);
+
+  return 1;
+}
+
+static int nn_(TemporalSubSampling_accGradParameters)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  real scale = luaL_optnumber(L, 4, 1);
+
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+
+  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+
+  THTensor *gradOutputFrame;
+  THTensor *inputWindow, *buffer;
+  long k;
+
+
+  gradOutputFrame = THTensor_(new)();
+  inputWindow = THTensor_(new)();
+  buffer = THTensor_(new)();
+
+  for(k = 0; k < gradOutput->size[0]; k++)
+  {
+    THTensor_(narrow)(inputWindow, input, 0, k*dW, kW);
+    THTensor_(select)(gradOutputFrame, gradOutput, 0, k);
+    THTensor_(sum)(buffer, inputWindow, 0);
+    THTensor_(addcmul)(gradWeight, gradWeight, scale, buffer, gradOutputFrame);
+    THTensor_(cadd)(gradBias, gradBias, scale, gradOutputFrame);
+  }
+
+  THTensor_(free)(gradOutputFrame);
+  THTensor_(free)(inputWindow);
+  THTensor_(free)(buffer);
+
+  return 0;
+}
+
+static const struct luaL_Reg nn_(TemporalSubSampling__) [] = {
+  {"TemporalSubSampling_updateOutput", nn_(TemporalSubSampling_updateOutput)},
+  {"TemporalSubSampling_updateGradInput", nn_(TemporalSubSampling_updateGradInput)},
+  {"TemporalSubSampling_accGradParameters", nn_(TemporalSubSampling_accGradParameters)},
+  {NULL, NULL}
+};
+
+static void nn_(TemporalSubSampling_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(TemporalSubSampling__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Threshold.c b/generic/Threshold.c
new file mode 100644
index 0000000..760e842
--- /dev/null
+++ b/generic/Threshold.c
@@ -0,0 +1,47 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Threshold.c"
+#else
+
+static int nn_(Threshold_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  real val = luaT_getfieldchecknumber(L, 1, "val");
+  real threshold = luaT_getfieldchecknumber(L, 1, "threshold");
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  
+  THTensor_(resizeAs)(output, input);
+  TH_TENSOR_APPLY2(real, output, real, input, \
+                  *output_data = (*input_data > threshold) ? *input_data : val;);
+
+  return 1;
+}
+
+static int nn_(Threshold_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  real threshold = luaT_getfieldchecknumber(L, 1, "threshold");
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, input);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,      \
+                   if ((*input_data) > threshold) *gradInput_data = 1;  \
+                   else *gradInput_data = 0;                            \
+                   *gradInput_data = (*gradOutput_data) * (*gradInput_data););
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Threshold__) [] = {
+  {"Threshold_updateOutput", nn_(Threshold_updateOutput)},
+  {"Threshold_updateGradInput", nn_(Threshold_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Threshold_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Threshold__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/VolumetricConvolution.c b/generic/VolumetricConvolution.c
new file mode 100644
index 0000000..0ec2247
--- /dev/null
+++ b/generic/VolumetricConvolution.c
@@ -0,0 +1,118 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricConvolution.c"
+#else
+
+static int nn_(VolumetricConvolution_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int dT = luaT_getfieldcheckint(L, 1, "dT");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  luaL_argcheck(L, input->nDimension == 4, 2, "4D tensor expected");
+
+  long nOutputPlane = weight->size[0];
+  long nInputPlane  = weight->size[1];
+  long kT           = weight->size[2];
+  long kH           = weight->size[3];
+  long kW           = weight->size[4];
+  long inputDepth   = input->size[1];
+  long inputHeight  = input->size[2];
+  long inputWidth   = input->size[3];
+  long outputDepth  = (inputDepth - kT) / dT + 1;
+  long outputWidth  = (inputWidth - kW) / dW + 1;
+  long outputHeight = (inputHeight - kH) / dH + 1;
+
+  THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
+
+  /* add bias */
+  long i;
+  THTensor *outn = THTensor_(new)();
+  for (i=0; i<bias->size[0]; i++) {
+    THTensor_(select)(outn,output,0,i);
+    THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
+  }
+  THTensor_(free)(outn);
+
+  /* do convolutions */
+  THTensor_(conv3Dmv)(output, 1.0, 1.0, input, weight, dT, dH, dW, "V", "X");
+
+  return 1;
+}
+
+
+static int nn_(VolumetricConvolution_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int dT = luaT_getfieldcheckint(L, 1, "dT");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  
+  THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" );
+
+  /* gradient to input */
+  THTensor *tweight = THTensor_(newTranspose)(weight,0,1);
+  THTensor_(conv3Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dT, dH, dW, "F", "C");
+  THTensor_(free)(tweight);
+
+  return 1;
+}
+
+static int nn_(VolumetricConvolution_accGradParameters)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  real scale = luaL_optnumber(L, 4, 1);
+  int dT = luaT_getfieldcheckint(L, 1, "dT");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+  
+  THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" );
+
+  long k;
+
+  /* gradient to bias */
+  real *gradBias_data = THTensor_(data)(gradBias);
+  THTensor* gradOutSlice = THTensor_(new)();
+  for(k = 0; k < nOutputPlane; k++)
+  {
+    THTensor_(select)(gradOutSlice, gradOutput, 0, k);
+    gradBias_data[k] += scale*THTensor_(sumall)(gradOutSlice);
+  }
+  THTensor_(free)(gradOutSlice);
+
+  /* gradient to kernels */
+  THTensor_(conv3DRevger)(gradWeight, 1.0, scale, input, gradOutput, dT, dH, dW);
+
+  return 0;
+}
+
+static const struct luaL_Reg nn_(VolumetricConvolution__) [] = {
+  {"VolumetricConvolution_updateOutput", nn_(VolumetricConvolution_updateOutput)},
+  {"VolumetricConvolution_updateGradInput", nn_(VolumetricConvolution_updateGradInput)},
+  {"VolumetricConvolution_accGradParameters", nn_(VolumetricConvolution_accGradParameters)},
+  {NULL, NULL}
+};
+
+static void nn_(VolumetricConvolution_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(VolumetricConvolution__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/init.c b/init.c
new file mode 100644
index 0000000..b2a528e
--- /dev/null
+++ b/init.c
@@ -0,0 +1,163 @@
+#include "TH.h"
+#include "luaT.h"
+
+#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
+#define torch_string_(NAME) TH_CONCAT_STRING_3(torch., Real, NAME)
+#define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME)
+
+static const void* torch_FloatTensor_id = NULL;
+static const void* torch_DoubleTensor_id = NULL;
+
+#include "generic/Square.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Sqrt.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/HardTanh.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Exp.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/LogSigmoid.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/LogSoftMax.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Sigmoid.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SoftPlus.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Tanh.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Abs.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/HardShrink.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SoftShrink.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Threshold.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SoftMax.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Max.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Min.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/MSECriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/AbsCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SparseLinear.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/TemporalConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/TemporalSubSampling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialConvolutionMap.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialSubSampling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialMaxPooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/MultiMarginCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/MultiLabelMarginCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+DLL_EXPORT int luaopen_libnn(lua_State *L)
+{
+  torch_FloatTensor_id = luaT_checktypename2id(L, "torch.FloatTensor");
+  torch_DoubleTensor_id = luaT_checktypename2id(L, "torch.DoubleTensor");
+
+  lua_newtable(L);
+  lua_pushvalue(L, -1);
+  lua_setfield(L, LUA_GLOBALSINDEX, "nn");
+
+  nn_FloatMin_init(L);
+  nn_FloatMax_init(L);
+  nn_FloatExp_init(L);
+  nn_FloatSqrt_init(L);
+  nn_FloatSquare_init(L);
+  nn_FloatHardTanh_init(L);
+  nn_FloatLogSoftMax_init(L);
+  nn_FloatMSECriterion_init(L);
+  nn_FloatAbsCriterion_init(L);
+  nn_FloatLogSigmoid_init(L);
+  nn_FloatSigmoid_init(L);
+  nn_FloatSoftMax_init(L);
+  nn_FloatSoftPlus_init(L);
+  nn_FloatTanh_init(L);
+  nn_FloatAbs_init(L);
+  nn_FloatHardShrink_init(L);
+  nn_FloatSoftShrink_init(L);
+  nn_FloatThreshold_init(L);
+  nn_FloatSparseLinear_init(L);
+  nn_FloatTemporalConvolution_init(L);
+  nn_FloatTemporalSubSampling_init(L);
+  nn_FloatSpatialConvolution_init(L);
+  nn_FloatSpatialConvolutionMap_init(L);
+  nn_FloatSpatialSubSampling_init(L);
+  nn_FloatSpatialMaxPooling_init(L);
+  nn_FloatVolumetricConvolution_init(L);
+  nn_FloatMultiMarginCriterion_init(L);
+  nn_FloatMultiLabelMarginCriterion_init(L);
+
+  nn_DoubleMin_init(L);
+  nn_DoubleMax_init(L);
+  nn_DoubleExp_init(L);
+  nn_DoubleSqrt_init(L);
+  nn_DoubleSquare_init(L);
+  nn_DoubleHardTanh_init(L);
+  nn_DoubleLogSoftMax_init(L);
+  nn_DoubleMSECriterion_init(L);
+  nn_DoubleAbsCriterion_init(L);
+  nn_DoubleLogSigmoid_init(L);
+  nn_DoubleSigmoid_init(L);
+  nn_DoubleSoftMax_init(L);
+  nn_DoubleSoftPlus_init(L);
+  nn_DoubleTanh_init(L);
+  nn_DoubleAbs_init(L);
+  nn_DoubleHardShrink_init(L);
+  nn_DoubleSoftShrink_init(L);
+  nn_DoubleThreshold_init(L);
+  nn_DoubleSparseLinear_init(L);
+  nn_DoubleTemporalConvolution_init(L);
+  nn_DoubleTemporalSubSampling_init(L);
+  nn_DoubleSpatialConvolution_init(L);
+  nn_DoubleSpatialConvolutionMap_init(L);
+  nn_DoubleSpatialSubSampling_init(L);
+  nn_DoubleSpatialMaxPooling_init(L);
+  nn_DoubleVolumetricConvolution_init(L);
+  nn_DoubleMultiMarginCriterion_init(L);
+  nn_DoubleMultiLabelMarginCriterion_init(L);
+
+  return 1;
+}
diff --git a/init.lua b/init.lua
new file mode 100644
index 0000000..c6e7df0
--- /dev/null
+++ b/init.lua
@@ -0,0 +1,91 @@
+require('torch')
+require('libnn')
+
+torch.include('nn', 'Module.lua')
+ 
+torch.include('nn', 'Concat.lua')
+torch.include('nn', 'Parallel.lua')  
+torch.include('nn', 'Sequential.lua')
+
+torch.include('nn', 'Linear.lua')
+torch.include('nn', 'SparseLinear.lua')
+torch.include('nn', 'Reshape.lua')  
+torch.include('nn', 'Select.lua')
+torch.include('nn', 'Narrow.lua')
+torch.include('nn', 'Replicate.lua')
+  
+torch.include('nn', 'Copy.lua')
+torch.include('nn', 'Min.lua')
+torch.include('nn', 'Max.lua')
+torch.include('nn', 'Mean.lua')
+torch.include('nn', 'Sum.lua')
+torch.include('nn', 'CMul.lua')  
+torch.include('nn', 'Mul.lua')  
+torch.include('nn', 'Add.lua')  
+
+torch.include('nn', 'CAddTable.lua')
+torch.include('nn', 'CDivTable.lua')
+torch.include('nn', 'CMulTable.lua')
+torch.include('nn', 'CSubTable.lua')
+
+torch.include('nn', 'Euclidean.lua')  
+torch.include('nn', 'WeightedEuclidean.lua')  
+torch.include('nn', 'PairwiseDistance.lua')  
+torch.include('nn', 'CosineDistance.lua')  
+torch.include('nn', 'DotProduct.lua')  
+
+torch.include('nn', 'Exp.lua')
+torch.include('nn', 'HardTanh.lua')
+torch.include('nn', 'LogSigmoid.lua')
+torch.include('nn', 'LogSoftMax.lua')
+torch.include('nn', 'Sigmoid.lua')
+torch.include('nn', 'SoftMax.lua')
+torch.include('nn', 'SoftMin.lua')
+torch.include('nn', 'SoftPlus.lua')
+torch.include('nn', 'SoftSign.lua')
+torch.include('nn', 'Tanh.lua')
+torch.include('nn', 'Abs.lua')
+torch.include('nn', 'Power.lua')
+torch.include('nn', 'Square.lua')
+torch.include('nn', 'Sqrt.lua')
+torch.include('nn', 'HardShrink.lua')
+torch.include('nn', 'SoftShrink.lua')
+torch.include('nn', 'Threshold.lua')
+
+torch.include('nn', 'LookupTable.lua')
+torch.include('nn', 'SpatialConvolution.lua')
+torch.include('nn', 'SpatialConvolutionMap.lua')
+torch.include('nn', 'SpatialSubSampling.lua')
+torch.include('nn', 'SpatialMaxPooling.lua')
+torch.include('nn', 'SpatialLPPooling.lua')
+torch.include('nn', 'TemporalConvolution.lua')
+torch.include('nn', 'TemporalSubSampling.lua')
+torch.include('nn', 'SpatialSubtractiveNormalization.lua')
+torch.include('nn', 'SpatialZeroPadding.lua')
+
+torch.include('nn', 'VolumetricConvolution.lua')
+
+torch.include('nn', 'ParallelTable.lua')  
+torch.include('nn', 'ConcatTable.lua')  
+torch.include('nn', 'SplitTable.lua')  
+torch.include('nn', 'JoinTable.lua')  
+torch.include('nn', 'CriterionTable.lua')
+torch.include('nn', 'Identity.lua')  
+
+torch.include('nn', 'Criterion.lua')
+torch.include('nn', 'MSECriterion.lua')
+torch.include('nn', 'MarginCriterion.lua')
+torch.include('nn', 'AbsCriterion.lua')
+torch.include('nn', 'ClassNLLCriterion.lua')
+torch.include('nn', 'MultiCriterion.lua')
+torch.include('nn', 'L1HingeEmbeddingCriterion.lua')
+torch.include('nn', 'HingeEmbeddingCriterion.lua')
+torch.include('nn', 'CosineEmbeddingCriterion.lua')
+torch.include('nn', 'MarginRankingCriterion.lua')
+torch.include('nn', 'MultiMarginCriterion.lua')
+torch.include('nn', 'MultiLabelMarginCriterion.lua')
+
+torch.include('nn', 'StochasticGradient.lua')
+
+torch.include('nn', 'Jacobian.lua')
+torch.include('nn', 'test.lua')
diff --git a/test/test.lua b/test/test.lua
new file mode 100644
index 0000000..c18d3a2
--- /dev/null
+++ b/test/test.lua
@@ -0,0 +1,1029 @@
+require 'torch'
+require 'random'
+
+local mytester = torch.Tester()
+local jac
+
+local precision = 1e-5
+
+local nntest = {}
+local nntestx = {}
+
+function nntest.Add()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.Add(ini*inj*ink)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err,precision, 'error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err,precision, 'error on bias [direct update]')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.CMul()
+   local ini = math.random(5,15)
+   local inj = math.random(5,15)
+   local ink = math.random(5,15)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.CMul(ini*inj*ink)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err,precision, 'error on weight ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Exp()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.Exp()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.HardTanh()
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+   
+   local module = nn.HardTanh()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision ,  'error on state ')
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Abs()
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+   
+   local module = nn.Abs()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision ,  'error on state ')
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Threshold()
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+
+   local module = nn.Threshold(random.uniform(-2,2),random.uniform(-2,2))
+
+   local err = nn.Jacobian.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = nn.Jacobian.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.HardShrink()
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+
+   local module = nn.HardShrink(math.random()/2)
+
+   local err = nn.Jacobian.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = nn.Jacobian.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SoftShrink()
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+
+   local module = nn.SoftShrink(math.random()/2)
+
+   local err = nn.Jacobian.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = nn.Jacobian.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Power()
+   local in1 = torch.rand(10,20)
+   local module = nn.Power(2)
+   local out = module:forward(in1)
+   local err = out:dist(in1:cmul(in1))
+   mytester:asserteq(err, 0, torch.typename(module) .. ' - forward err ')
+
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local pw = random.uniform()*math.random(1,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+
+   local module = nn.Power(pw)
+
+   local err = nn.Jacobian.testJacobian(module, input, 0.1, 2)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = nn.Jacobian.testIO(module,input, 0.1, 2)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Square()
+   local in1 = torch.rand(10,20)
+   local module = nn.Square()
+   local out = module:forward(in1)
+   local err = out:dist(in1:cmul(in1))
+   mytester:asserteq(err, 0, torch.typename(module) .. ' - forward err ')
+
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+
+   local module = nn.Square()
+
+   local err = nn.Jacobian.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = nn.Jacobian.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Sqrt()
+   local in1 = torch.rand(10,20)
+   local module = nn.Sqrt()
+   local out = module:forward(in1)
+   local err = out:dist(in1:sqrt())
+   mytester:asserteq(err, 0, torch.typename(module) .. ' - forward err ')
+
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+
+   local module = nn.Sqrt()
+
+   local err = nn.Jacobian.testJacobian(module, input, 0.1, 2)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = nn.Jacobian.testIO(module, input, 0, 2)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Linear()
+   local ini = math.random(50,70)
+   local inj = math.random(50,70)
+   local input = torch.Tensor(ini):zero()
+   local module = nn.Linear(ini,inj)
+
+   -- 1D
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err,precision, 'error on weight ')
+
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err,precision, 'error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err,precision, 'error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+
+   -- 2D
+   local nframe = math.random(50,70)
+   local input = torch.Tensor(nframe, ini):zero()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err,precision, 'error on weight ')
+
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err,precision, 'error on weight ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err,precision, 'error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+
+   -- IO
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Euclidean()
+   local ini = math.random(50,70)
+   local inj = math.random(50,70)
+   local input = torch.Tensor(ini):zero()
+   local module = nn.Euclidean(ini,inj)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err,precision, 'error on weight ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.WeightedEuclidean()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local input = torch.Tensor(ini):zero()
+   local module = nn.WeightedEuclidean(ini,inj)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err,precision, 'error on weight ')
+
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err,precision, 'error on bias ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.LogSigmoid()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.LogSigmoid()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.LogSoftmax()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local input = torch.Tensor(ini,inj):zero()
+   local module = nn.LogSoftMax()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+-- function nntest.TemporalLogSoftmax()
+--    local ini = math.random(10,20)
+--    local inj = math.random(10,20)
+--    local input = torch.Tensor(ini,inj):zero()
+--    local module = nn.TemporalLogSoftMax()
+
+--    local err = jac.testJacobian(module,input)
+--    mytester:assertlt(err,precision, 'error on state ')
+
+--    local ferr,berr = jac.testIO(module,input)
+--    mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+--    mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+-- end
+
+function nntest.Max()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj*ink):zero()
+   local module = nn.Max(1)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Min()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj*ink):zero()
+   local module = nn.Min(1)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Mean()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.Mean(random.random(1,3))
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Mul()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.Mul(ini*inj*ink)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err,precision, 'error on weight ')
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Sigmoid()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.Sigmoid()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Softmax()
+   local ini = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ink, ini):zero()
+   local module = nn.SoftMax()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Softmin()
+   local ini = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ink, ini):zero()
+   local module = nn.SoftMin()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Softsign()
+   local ini = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ink, ini):zero()
+   local module = nn.SoftSign()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SoftPlus()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.SoftPlus()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialSubtractiveNormalization_2dkernel()
+   local inputSize = math.random(11,20)
+   local kersize = 9
+   local nbfeatures = math.random(5,10)
+   local kernel = torch.Tensor(kersize,kersize):fill(1)
+   local module = nn.SpatialSubtractiveNormalization(nbfeatures,kernel)
+   local input = torch.rand(nbfeatures,inputSize,inputSize)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialSubtractiveNormalization_1dkernel()
+   local inputSize = math.random(11,20)
+   local kersize = 9
+   local nbfeatures = math.random(5,10)
+   local kernel = torch.Tensor(kersize):fill(1)
+   local module = nn.SpatialSubtractiveNormalization(nbfeatures,kernel)
+   local input = torch.rand(nbfeatures,inputSize,inputSize)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialConvolution()
+   local from = math.random(1,10)
+   local to = math.random(1,10)
+   local ki = math.random(1,10)
+   local kj = math.random(1,10)
+   local si = math.random(1,4)
+   local sj = math.random(1,4)
+   local outi = math.random(10,20)
+   local outj = math.random(10,20)
+   local ini = (outi-1)*si+ki
+   local inj = (outj-1)*sj+kj
+   local module = nn.SpatialConvolution(from, to, ki, kj, si, sj)
+   local input = torch.Tensor(from, inj, ini):zero()
+
+   -- stochastic
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err , precision, 'error on weight [direct update] ')
+   
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+
+   -- batch
+   
+   --verbose = true
+   local batch = math.random(2,5)
+   outi = math.random(4,8)
+   outj = math.random(4,8)
+   ini = (outi-1)*si+ki
+   inj = (outj-1)*sj+kj
+   module = nn.SpatialConvolution(from, to, ki, kj, si, sj)
+   input = torch.Tensor(batch,from,inj,ini):zero()
+
+--    print(from, to, ki, kj, si, sj, batch, ini, inj)
+--    print(module.weight:size())
+--    print(module.gradWeight:size())
+
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'batch error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'batch error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'batch error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+   
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'batch error on bias [%s]', t))
+   end
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialConvolutionMap()
+   local from = math.random(1,10)
+   local fanin = math.random(1, from)
+   local to = math.random(1,10)
+   local ki = math.random(1,10)
+   local kj = math.random(1,10)
+   local si = math.random(1,4)
+   local sj = math.random(1,4)
+   local outi = math.random(10,20)
+   local outj = math.random(10,20)
+   local ini = (outi-1)*si+ki
+   local inj = (outj-1)*sj+kj
+
+   local module = nn.SpatialConvolutionMap(nn.tables.random(from, to, fanin), ki, kj, si, sj)
+   local input = torch.Tensor(from, inj, ini):zero()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'error on bias ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function batchcompare(smod, sin, plist)
+   local bs = torch.LongStorage(sin:size():size()+1)
+   bs[1] = 1
+   for i=1,sin:size():size() do bs[i+1] = sin:size()[i] end
+   local bin = torch.Tensor(bs):copy(sin)
+   local bmod = smod:clone()
+
+   local sout = smod:forward(sin):clone()
+   local bout = bmod:forward(bin):clone()
+
+   local sgout = torch.randn(sout:size())
+   local bgout = torch.Tensor(bout:size())
+   bgout:copy(sgout)
+
+   local sgin = smod:backward(sin, sgout)
+   local bgin = bmod:backward(bin, bgout)
+
+   smod:accGradParameters(sin, sgout, 1)
+   bmod:accGradParameters(bin, bgout, 1)
+   
+   mytester:assertTensorEq(sout,bout:select(1,1), 1e-8, 'batchcompare error on output')
+   mytester:assertTensorEq(sgin,bgin:select(1,1), 1e-8, 'batchcompare error on gradInput')
+
+   for i,v in pairs(plist) do
+      mytester:assertTensorEq(smod[v],bmod[v], 1e-8, 'batchcompare error on ' .. v)
+   end
+end
+
+function nntest.SpatialConvolutionBatchCompare()
+   local from = math.random(1,10)
+   local to = math.random(1,10)
+   local ki = math.random(1,10)
+   local kj = math.random(1,10)
+   local si = math.random(1,4)
+   local sj = math.random(1,4)
+   local outi = math.random(10,20)
+   local outj = math.random(10,20)
+   local ini = (outi-1)*si+ki
+   local inj = (outj-1)*sj+kj
+
+   local module = nn.SpatialConvolution(from, to, ki, kj, si, sj)
+   local input = torch.randn(from,inj,ini)
+
+   batchcompare(module,input, {'weight','bias','gradWeight','gradBias'})
+end
+
+function nntest.SpatialSubSamplingBatchCompare()
+   local from = math.random(1,10)
+   local ki = math.random(1,10)
+   local kj = math.random(1,10)
+   local si = math.random(1,4)
+   local sj = math.random(1,4)
+   local outi = math.random(10,20)
+   local outj = math.random(10,20)
+   local ini = (outi-1)*si+ki
+   local inj = (outj-1)*sj+kj
+   local module = nn.SpatialSubSampling(from, ki, kj, si, sj)
+   local input = torch.randn(from,inj,ini)--torch.Tensor(from, inj, ini):zero()
+
+   batchcompare(module,input, {'weight','bias','gradWeight','gradBias'})
+end
+
+function nntest.SpatialSubSampling()
+   local from = math.random(1,10)
+   local ki = math.random(1,10)
+   local kj = math.random(1,10)
+   local si = math.random(1,4)
+   local sj = math.random(1,4)
+   local outi = math.random(10,20)
+   local outj = math.random(10,20)
+   local ini = (outi-1)*si+ki
+   local inj = (outj-1)*sj+kj
+   local module = nn.SpatialSubSampling(from, ki, kj, si, sj)
+   local input = torch.Tensor(from, inj, ini):zero()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err , precision, 'error on weight [direct update] ')
+   
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+
+   --verbose = true
+   local batch = math.random(2,5)
+   outi = math.random(4,8)
+   outj = math.random(4,8)
+   ini = (outi-1)*si+ki
+   inj = (outj-1)*sj+kj
+   module = nn.SpatialSubSampling(from, ki, kj, si, sj)
+   input = torch.Tensor(batch,from,inj,ini):zero()
+
+--    print(from, to, ki, kj, si, sj, batch, ini, inj)
+--    print(module.weight:size())
+--    print(module.gradWeight:size())
+
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'batch error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'batch error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'batch error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+   
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'batch error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'batch error on bias [%s]', t))
+   end
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialMaxPooling()
+   local fanin = math.random(1,4)
+   local osizex = math.random(1,20)
+   local osizey = math.random(1,20)
+   local mx = math.random(2,4)
+   local my = math.random(2,4)
+   local sizex = osizex*mx
+   local sizey = osizey*my
+   local module = nn.SpatialMaxPooling(mx,my,mx,my)
+   local input = torch.rand(fanin,sizey,sizex)
+
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialLPPooling()
+   local fanin = math.random(1,4)
+   local osizex = math.random(1,4)
+   local osizey = math.random(1,4)
+   local p = math.random(1,4)
+   local mx = math.random(2,8)
+   local my = math.random(2,8)
+   local dx = math.random(2,mx)
+   local dy = math.random(2,my)
+   local sizex = osizex*mx
+   local sizey = osizey*my
+   local module = nn.SpatialLPPooling(fanin,p,mx,my,dx,dy)
+   local input = torch.rand(fanin,sizey,sizex)
+
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Sum()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.Sum(random.random(1,3))
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Tanh()
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+   
+   local module = nn.Tanh()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision ,  'error on state ')
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.TemporalConvolution()
+   local from = math.random(1,10)
+   local to = math.random(1,10)
+   local ki = math.random(1,10)
+   local si = math.random(1,4)
+   local outi = math.random(10,20)
+   local ini = (outi-1)*si+ki
+   local module = nn.TemporalConvolution(from, to, ki,si)
+   local input = torch.Tensor(ini, from):zero()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err , precision, 'error on weight [direct update]')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err , precision, 'error on bias [direct update]')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.TemporalSubSampling()
+   local from = math.random(1,10)
+   local ki = math.random(1,10)
+   local si = math.random(1,4)
+   local outi = math.random(10,20)
+   local ini = (outi-1)*si+ki
+   local module = nn.TemporalSubSampling(from, ki, si)
+   local input = torch.Tensor(ini, from):zero()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'error on bias ')
+   
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err , precision, 'error on weight [direct update] ')
+   
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.VolumetricConvolution()
+   local from = math.random(2,5)
+   local to = math.random(2,5)
+   local kt = math.random(3,7)
+   local ki = math.random(3,7)
+   local kj = math.random(3,7)
+   local st = math.random(2,4)
+   local si = math.random(2,4)
+   local sj = math.random(2,4)
+   local outt = math.random(3,7)
+   local outi = math.random(3,7)
+   local outj = math.random(3,7)
+   local int = (outt-1)*st+kt
+   local ini = (outi-1)*si+ki
+   local inj = (outj-1)*sj+kj
+   local module = nn.VolumetricConvolution(from, to, kt, ki, kj, st, si, sj)
+   local input = torch.Tensor(from, int, inj, ini):zero()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err , precision, 'error on weight [direct update] ')
+   
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+
+mytester:add(nntest)
+--mytester:add(test_SpatialConvolution)
+--mytester:add(test_AbsCriterion)
+
+if not nn then
+   require 'nn'
+   jac = nn.Jacobian
+   mytester:run()
+else
+   jac = nn.Jacobian
+   function nn.test()
+      -- randomize stuff
+      math.randomseed(os.time())
+      mytester:run()
+   end
+end