From 4df3893abd1b9f840f1d9a8c1859799ccbf941de Mon Sep 17 00:00:00 2001
From: Ronan Collobert <ronan@collobert.com>
Date: Wed, 25 Jan 2012 14:55:20 +0100
Subject: initial revamp of torch7 tree

---
 Abs.lua                             |   15 +
 AbsCriterion.lua                    |   14 +
 Add.lua                             |   54 +
 CAddTable.lua                       |   24 +
 CDivTable.lua                       |   21 +
 CMakeLists.txt                      |    9 +
 CMul.lua                            |   36 +
 CMulTable.lua                       |   26 +
 CSubTable.lua                       |   21 +
 ClassNLLCriterion.lua               |   44 +
 Concat.lua                          |  119 ++
 ConcatTable.lua                     |   72 +
 Copy.lua                            |   33 +
 CosineDistance.lua                  |   40 +
 CosineEmbeddingCriterion.lua        |   54 +
 Criterion.lua                       |   51 +
 CriterionTable.lua                  |   16 +
 DotProduct.lua                      |   29 +
 Euclidean.lua                       |   64 +
 Exp.lua                             |    9 +
 HardShrink.lua                      |   16 +
 HardTanh.lua                        |    9 +
 HingeEmbeddingCriterion.lua         |   26 +
 Identity.lua                        |   12 +
 Jacobian.lua                        |  239 +++
 JoinTable.lua                       |   50 +
 L1HingeEmbeddingCriterion.lua       |   41 +
 Linear.lua                          |   82 +
 LogSigmoid.lua                      |   14 +
 LogSoftMax.lua                      |    9 +
 LookupTable.lua                     |   76 +
 MSECriterion.lua                    |   14 +
 MarginCriterion.lua                 |   23 +
 MarginRankingCriterion.lua          |   25 +
 Max.lua                             |   16 +
 Mean.lua                            |   26 +
 Min.lua                             |   16 +
 Module.lua                          |  211 +++
 Mul.lua                             |   42 +
 MultiCriterion.lua                  |   32 +
 MultiLabelMarginCriterion.lua       |   14 +
 MultiMarginCriterion.lua            |   14 +
 Narrow.lua                          |   24 +
 PairwiseDistance.lua                |   33 +
 Parallel.lua                        |  137 ++
 ParallelTable.lua                   |   71 +
 Power.lua                           |   21 +
 Replicate.lua                       |   29 +
 Reshape.lua                         |   38 +
 Select.lua                          |   20 +
 Sequential.lua                      |  129 ++
 Sigmoid.lua                         |    9 +
 SoftMax.lua                         |    9 +
 SoftMin.lua                         |   15 +
 SoftPlus.lua                        |    9 +
 SoftShrink.lua                      |   16 +
 SoftSign.lua                        |   15 +
 SparseLinear.lua                    |   42 +
 SpatialConvolution.lua              |   50 +
 SpatialConvolutionMap.lua           |  119 ++
 SpatialLPPooling.lua                |   32 +
 SpatialMaxPooling.lua               |   34 +
 SpatialSubSampling.lua              |   49 +
 SpatialSubtractiveNormalization.lua |  104 ++
 SpatialZeroPadding.lua              |   53 +
 SplitTable.lua                      |   30 +
 Sqrt.lua                            |   13 +
 Square.lua                          |   13 +
 StochasticGradient.lua              |   57 +
 Sum.lua                             |   27 +
 Tanh.lua                            |    9 +
 TemporalConvolution.lua             |   51 +
 TemporalSubSampling.lua             |   48 +
 Threshold.lua                       |   20 +
 VolumetricConvolution.lua           |   51 +
 WeightedEuclidean.lua               |   85 +
 dok/abs.png                         |  Bin 0 -> 5918 bytes
 dok/exp.png                         |  Bin 0 -> 6104 bytes
 dok/hshrink.png                     |  Bin 0 -> 5576 bytes
 dok/htanh.png                       |  Bin 0 -> 5948 bytes
 dok/index.dok                       | 3053 +++++++++++++++++++++++++++++++++++
 dok/lena.jpg                        |  Bin 0 -> 39706 bytes
 dok/lenap.jpg                       |  Bin 0 -> 34838 bytes
 dok/logsigmoid.png                  |  Bin 0 -> 9116 bytes
 dok/logsoftmax.png                  |  Bin 0 -> 8712 bytes
 dok/power.png                       |  Bin 0 -> 6515 bytes
 dok/sigmmoid.png                    |  Bin 0 -> 6533 bytes
 dok/sigmoid.png                     |  Bin 0 -> 6533 bytes
 dok/softmax.png                     |  Bin 0 -> 6252 bytes
 dok/softmin.png                     |  Bin 0 -> 6446 bytes
 dok/softplus.png                    |  Bin 0 -> 9375 bytes
 dok/softsign.png                    |  Bin 0 -> 6877 bytes
 dok/sqrt.png                        |  Bin 0 -> 6008 bytes
 dok/square.png                      |  Bin 0 -> 6984 bytes
 dok/sshrink.png                     |  Bin 0 -> 5576 bytes
 dok/tanh.png                        |  Bin 0 -> 7323 bytes
 generic/Abs.c                       |   43 +
 generic/AbsCriterion.c              |   54 +
 generic/Exp.c                       |   43 +
 generic/HardShrink.c                |   50 +
 generic/HardTanh.c                  |   50 +
 generic/LogSigmoid.c                |   49 +
 generic/LogSoftMax.c                |  111 ++
 generic/MSECriterion.c              |   54 +
 generic/Max.c                       |  100 ++
 generic/Min.c                       |  100 ++
 generic/MultiLabelMarginCriterion.c |  185 +++
 generic/MultiMarginCriterion.c      |  162 ++
 generic/Sigmoid.c                   |   44 +
 generic/SoftMax.c                   |  114 ++
 generic/SoftPlus.c                  |   44 +
 generic/SoftShrink.c                |   50 +
 generic/SparseLinear.c              |  130 ++
 generic/SpatialConvolution.c        |  201 +++
 generic/SpatialConvolutionMap.c     |  229 +++
 generic/SpatialMaxPooling.c         |  163 ++
 generic/SpatialSubSampling.c        |  278 ++++
 generic/Sqrt.c                      |   46 +
 generic/Square.c                    |   45 +
 generic/Tanh.c                      |   45 +
 generic/TemporalConvolution.c       |  194 +++
 generic/TemporalSubSampling.c       |  139 ++
 generic/Threshold.c                 |   47 +
 generic/VolumetricConvolution.c     |  118 ++
 init.c                              |  163 ++
 init.lua                            |   91 ++
 test/test.lua                       | 1029 ++++++++++++
 127 files changed, 10444 insertions(+)
 create mode 100644 Abs.lua
 create mode 100644 AbsCriterion.lua
 create mode 100644 Add.lua
 create mode 100644 CAddTable.lua
 create mode 100644 CDivTable.lua
 create mode 100644 CMakeLists.txt
 create mode 100644 CMul.lua
 create mode 100644 CMulTable.lua
 create mode 100644 CSubTable.lua
 create mode 100644 ClassNLLCriterion.lua
 create mode 100644 Concat.lua
 create mode 100644 ConcatTable.lua
 create mode 100644 Copy.lua
 create mode 100644 CosineDistance.lua
 create mode 100644 CosineEmbeddingCriterion.lua
 create mode 100644 Criterion.lua
 create mode 100644 CriterionTable.lua
 create mode 100644 DotProduct.lua
 create mode 100644 Euclidean.lua
 create mode 100644 Exp.lua
 create mode 100644 HardShrink.lua
 create mode 100644 HardTanh.lua
 create mode 100644 HingeEmbeddingCriterion.lua
 create mode 100644 Identity.lua
 create mode 100644 Jacobian.lua
 create mode 100644 JoinTable.lua
 create mode 100644 L1HingeEmbeddingCriterion.lua
 create mode 100644 Linear.lua
 create mode 100644 LogSigmoid.lua
 create mode 100644 LogSoftMax.lua
 create mode 100644 LookupTable.lua
 create mode 100644 MSECriterion.lua
 create mode 100644 MarginCriterion.lua
 create mode 100644 MarginRankingCriterion.lua
 create mode 100644 Max.lua
 create mode 100644 Mean.lua
 create mode 100644 Min.lua
 create mode 100644 Module.lua
 create mode 100644 Mul.lua
 create mode 100644 MultiCriterion.lua
 create mode 100644 MultiLabelMarginCriterion.lua
 create mode 100644 MultiMarginCriterion.lua
 create mode 100644 Narrow.lua
 create mode 100644 PairwiseDistance.lua
 create mode 100644 Parallel.lua
 create mode 100644 ParallelTable.lua
 create mode 100644 Power.lua
 create mode 100644 Replicate.lua
 create mode 100644 Reshape.lua
 create mode 100644 Select.lua
 create mode 100644 Sequential.lua
 create mode 100644 Sigmoid.lua
 create mode 100644 SoftMax.lua
 create mode 100644 SoftMin.lua
 create mode 100644 SoftPlus.lua
 create mode 100644 SoftShrink.lua
 create mode 100644 SoftSign.lua
 create mode 100644 SparseLinear.lua
 create mode 100644 SpatialConvolution.lua
 create mode 100644 SpatialConvolutionMap.lua
 create mode 100644 SpatialLPPooling.lua
 create mode 100644 SpatialMaxPooling.lua
 create mode 100644 SpatialSubSampling.lua
 create mode 100644 SpatialSubtractiveNormalization.lua
 create mode 100644 SpatialZeroPadding.lua
 create mode 100644 SplitTable.lua
 create mode 100644 Sqrt.lua
 create mode 100644 Square.lua
 create mode 100644 StochasticGradient.lua
 create mode 100644 Sum.lua
 create mode 100644 Tanh.lua
 create mode 100644 TemporalConvolution.lua
 create mode 100644 TemporalSubSampling.lua
 create mode 100644 Threshold.lua
 create mode 100644 VolumetricConvolution.lua
 create mode 100644 WeightedEuclidean.lua
 create mode 100644 dok/abs.png
 create mode 100644 dok/exp.png
 create mode 100644 dok/hshrink.png
 create mode 100644 dok/htanh.png
 create mode 100644 dok/index.dok
 create mode 100644 dok/lena.jpg
 create mode 100644 dok/lenap.jpg
 create mode 100644 dok/logsigmoid.png
 create mode 100644 dok/logsoftmax.png
 create mode 100644 dok/power.png
 create mode 100644 dok/sigmmoid.png
 create mode 100644 dok/sigmoid.png
 create mode 100644 dok/softmax.png
 create mode 100644 dok/softmin.png
 create mode 100644 dok/softplus.png
 create mode 100644 dok/softsign.png
 create mode 100644 dok/sqrt.png
 create mode 100644 dok/square.png
 create mode 100644 dok/sshrink.png
 create mode 100644 dok/tanh.png
 create mode 100644 generic/Abs.c
 create mode 100644 generic/AbsCriterion.c
 create mode 100644 generic/Exp.c
 create mode 100644 generic/HardShrink.c
 create mode 100644 generic/HardTanh.c
 create mode 100644 generic/LogSigmoid.c
 create mode 100644 generic/LogSoftMax.c
 create mode 100644 generic/MSECriterion.c
 create mode 100644 generic/Max.c
 create mode 100644 generic/Min.c
 create mode 100644 generic/MultiLabelMarginCriterion.c
 create mode 100644 generic/MultiMarginCriterion.c
 create mode 100644 generic/Sigmoid.c
 create mode 100644 generic/SoftMax.c
 create mode 100644 generic/SoftPlus.c
 create mode 100644 generic/SoftShrink.c
 create mode 100644 generic/SparseLinear.c
 create mode 100644 generic/SpatialConvolution.c
 create mode 100644 generic/SpatialConvolutionMap.c
 create mode 100644 generic/SpatialMaxPooling.c
 create mode 100644 generic/SpatialSubSampling.c
 create mode 100644 generic/Sqrt.c
 create mode 100644 generic/Square.c
 create mode 100644 generic/Tanh.c
 create mode 100644 generic/TemporalConvolution.c
 create mode 100644 generic/TemporalSubSampling.c
 create mode 100644 generic/Threshold.c
 create mode 100644 generic/VolumetricConvolution.c
 create mode 100644 init.c
 create mode 100644 init.lua
 create mode 100644 test/test.lua

diff --git a/Abs.lua b/Abs.lua
new file mode 100644
index 0000000..3ccc6af
--- /dev/null
+++ b/Abs.lua
@@ -0,0 +1,15 @@
+local Abs, parent = torch.class('nn.Abs', 'nn.Module')
+
+function Abs:__init()
+   parent.__init(self)
+end
+
+function Abs:updateOutput(input)
+   input.nn.Abs_updateOutput(self, input)
+   return self.output
+end
+
+function Abs:updateGradInput(input, gradOutput)
+   input.nn.Abs_updateGradInput(self, input, gradOutput)
+   return self.gradInput
+end
diff --git a/AbsCriterion.lua b/AbsCriterion.lua
new file mode 100644
index 0000000..be7f6cb
--- /dev/null
+++ b/AbsCriterion.lua
@@ -0,0 +1,14 @@
+local AbsCriterion, parent = torch.class('nn.AbsCriterion', 'nn.Criterion')
+
+function AbsCriterion:__init()
+   parent.__init(self)
+   self.sizeAverage = true
+end
+
+function AbsCriterion:updateOutput(input, target)
+   return input.nn.AbsCriterion_updateOutput(self, input, target)
+end
+
+function AbsCriterion:updateGradInput(input, target)
+   return input.nn.AbsCriterion_updateGradInput(self, input, target)
+end
diff --git a/Add.lua b/Add.lua
new file mode 100644
index 0000000..40da79b
--- /dev/null
+++ b/Add.lua
@@ -0,0 +1,54 @@
+local Add, parent = torch.class('nn.Add', 'nn.Module')
+
+function Add:__init(inputSize,scalar)
+   parent.__init(self)
+  
+   local size = inputSize
+   if scalar then size=1 end
+   self.bias = torch.Tensor(size)
+   self.gradBias = torch.Tensor(size)
+     
+   -- state
+   self.gradInput:resize(inputSize)
+   self.output:resize(inputSize) 
+
+   self:reset()
+end
+
+function Add:reset(stdv)
+   if stdv then 
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1./math.sqrt(self.bias:size(1))
+   end
+
+   for i=1,self.bias:size(1) do
+      self.bias[i] = torch.uniform(-stdv, stdv)
+   end
+end
+
+function Add:updateOutput(input)
+   self.output:copy(input);
+   if self.gradBias:size(1)==1 then
+     self.output:add(self.bias[1]);
+   else
+     self.output:add(self.bias);
+   end
+   return self.output
+end 
+
+function Add:updateGradInput(input, gradOutput)
+   if self.gradInput then
+      self.gradInput:copy(gradOutput) 
+      return self.gradInput
+   end
+end
+
+function Add:accGradParameters(input, gradOutput, scale)
+   scale = scale or 1
+   if self.gradBias:size(1) == 1 then
+      self.gradBias[1] = self.gradBias[1] + scale*gradOutput:sumall();
+   else
+      self.gradBias:add(scale, gradOutput)
+   end
+end
diff --git a/CAddTable.lua b/CAddTable.lua
new file mode 100644
index 0000000..afe3568
--- /dev/null
+++ b/CAddTable.lua
@@ -0,0 +1,24 @@
+
+local CAddTable, parent = torch.class('nn.CAddTable', 'nn.Module')
+
+function CAddTable:__init()
+   parent.__init(self)
+   self.gradInput = {}
+end
+
+function CAddTable:updateOutput(input)
+   self.output:resizeAs(input[1]):copy(input[1])
+   for i=2,#input do
+      self.output:add(input[i])
+   end
+   return self.output
+end
+
+function CAddTable:updateGradInput(input, gradOutput)
+   for i=1,#input do
+      self.gradInput[i] = self.gradInput[i] or torch.Tensor()
+      self.gradInput[i]:resizeAs(input[i])
+      self.gradInput[i]:copy(gradOutput)
+   end
+   return self.gradInput
+end
diff --git a/CDivTable.lua b/CDivTable.lua
new file mode 100644
index 0000000..f91d024
--- /dev/null
+++ b/CDivTable.lua
@@ -0,0 +1,21 @@
+
+local CDivTable, parent = torch.class('nn.CDivTable', 'nn.Module')
+
+function CDivTable:__init()
+   parent.__init(self)
+   self.gradInput = {}
+end
+
+function CDivTable:updateOutput(input)
+   self.output:resizeAs(input[1]):copy(input[1])
+   self.output:cdiv(input[2])
+   return self.output
+end
+
+function CDivTable:updateGradInput(input, gradOutput)
+   self.gradInput[1] = self.gradInput[1] or torch.Tensor()
+   self.gradInput[2] = self.gradInput[2] or torch.Tensor()
+   self.gradInput[1]:resizeAs(input[1]):copy(gradOutput):cdiv(input[2])
+   self.gradInput[2]:resizeAs(input[2]):zero():addcdiv(-1,self.gradInput[1],input[2]):cmul(input[1])
+   return self.gradInput
+end
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..75239ad
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,9 @@
+SET(src init.c)
+
+FILE(GLOB luasrc *.lua)
+SET(luasrc ${luasrc} test/test.lua)
+
+ADD_TORCH_PACKAGE(nn "${src}" "${luasrc}" "Machine Learning")
+ADD_TORCH_DOK(dok nn "Machine Learning" "Neural Networks" 3.1)
+
+TARGET_LINK_LIBRARIES(nn luaT TH)
diff --git a/CMul.lua b/CMul.lua
new file mode 100644
index 0000000..9b59944
--- /dev/null
+++ b/CMul.lua
@@ -0,0 +1,36 @@
+local CMul, parent = torch.class('nn.CMul', 'nn.Module')
+
+function CMul:__init(inputSize)
+   parent.__init(self)
+  
+   self.weight = torch.Tensor(inputSize)
+   self.gradWeight = torch.Tensor(inputSize)
+   
+   -- state
+   self.gradInput:resize(inputSize)
+   self.output:resize(inputSize) 
+
+   self:reset()
+end
+ 
+function CMul:reset()
+   self.weight:fill(1)
+end
+
+function CMul:updateOutput(input)
+   self.output:copy(input);
+   self.output:cmul(self.weight);
+   return self.output
+end
+
+function CMul:updateGradInput(input, gradOutput)
+   if self.gradInput then
+      self.gradInput:zero()
+      self.gradInput:addcmul(1, self.weight, gradOutput)
+      return self.gradInput
+   end
+end
+
+function CMul:accGradParameters(input, gradOutput, scale)
+   self.gradWeight:addcmul(scale or 1, input, gradOutput)
+end
diff --git a/CMulTable.lua b/CMulTable.lua
new file mode 100644
index 0000000..4c058b6
--- /dev/null
+++ b/CMulTable.lua
@@ -0,0 +1,26 @@
+
+local CMulTable, parent = torch.class('nn.CMulTable', 'nn.Module')
+
+function CMulTable:__init()
+   parent.__init(self)
+   self.gradInput = {}
+end
+
+function CMulTable:updateOutput(input)
+   self.output:resizeAs(input[1]):copy(input[1])
+   for i=2,#input do
+      self.output:cmul(input[i])
+   end
+   return self.output
+end
+
+function CMulTable:updateGradInput(input, gradOutput)
+   local tout = torch.Tensor():resizeAs(self.output)
+   for i=1,#input do
+      self.gradInput[i] = self.gradInput[i] or torch.Tensor()
+      self.gradInput[i]:resizeAs(input[i]):copy(gradOutput)
+      tout:copy(self.output):cdiv(input[i])
+      self.gradInput[i]:cmul(tout)
+   end
+   return self.gradInput
+end
diff --git a/CSubTable.lua b/CSubTable.lua
new file mode 100644
index 0000000..ffc495b
--- /dev/null
+++ b/CSubTable.lua
@@ -0,0 +1,21 @@
+
+local CSubTable, parent = torch.class('nn.CSubTable', 'nn.Module')
+
+function CSubTable:__init()
+   parent.__init(self)
+   self.gradInput = {}
+end
+
+function CSubTable:updateOutput(input)
+   self.output:resizeAs(input[1]):copy(input[1])
+   self.output:add(-1,input[2])
+   return self.output
+end
+
+function CSubTable:updateGradInput(input, gradOutput)
+   self.gradInput[1] = self.gradInput[1] or torch.Tensor()
+   self.gradInput[2] = self.gradInput[2] or torch.Tensor()
+   self.gradInput[1]:resizeAs(input[1]):copy(gradOutput)
+   self.gradInput[2]:resizeAs(input[1]):copy(gradOutput):mul(-1)
+   return self.gradInput
+end
diff --git a/ClassNLLCriterion.lua b/ClassNLLCriterion.lua
new file mode 100644
index 0000000..7ac48f4
--- /dev/null
+++ b/ClassNLLCriterion.lua
@@ -0,0 +1,44 @@
+local ClassNLLCriterion, parent = torch.class('nn.ClassNLLCriterion', 'nn.Criterion')
+
+function ClassNLLCriterion:__init()
+   parent.__init(self)
+   self.sizeAverage = true
+end
+
+function ClassNLLCriterion:updateOutput(input, target)
+   if input:dim() == 1 then
+      self.output = -input[target]
+   elseif input:dim() == 2 then
+      local output = 0
+      for i=1,target:size(1) do
+         output = output - input[i][target[i]]
+      end
+      if self.sizeAverage then
+         output = output / target:size(1)
+      end
+      self.output = output
+   else
+      error('matrix or vector expected')
+   end
+   return self.output
+end
+
+function ClassNLLCriterion:updateGradInput(input, target)
+   self.gradInput:resizeAs(input)
+   self.gradInput:zero()
+
+  if input:dim() == 1 then
+      self.gradInput[target] = -1
+   else
+      local z = -1
+      if self.sizeAverage then
+         z = z / target:size(1)
+      end
+      local gradInput = self.gradInput
+      for i=1,target:size(1) do
+         gradInput[i][target[i]] = z
+      end
+   end
+
+   return self.gradInput
+end
diff --git a/Concat.lua b/Concat.lua
new file mode 100644
index 0000000..616c394
--- /dev/null
+++ b/Concat.lua
@@ -0,0 +1,119 @@
+local Concat, parent = torch.class('nn.Concat', 'nn.Module')
+
+function Concat:__init(dimension)
+   parent.__init(self)
+   self.modules = {}
+   self.size = torch.LongStorage()
+   self.dimension = dimension
+end
+
+function Concat:add(module)
+   table.insert(self.modules, module)
+   return self
+end
+
+function Concat:get(index)
+   return self.modules[index]
+end
+
+function Concat:updateOutput(input)
+   for i=1,#self.modules do
+      local currentOutput = self.modules[i]:updateOutput(input)
+      
+      if i == 1 then
+         self.size:resize(currentOutput:dim()):copy(currentOutput:size())
+      else
+         self.size[self.dimension] = self.size[self.dimension] + currentOutput:size(self.dimension)
+      end
+   end
+   self.output:resize(self.size)
+   
+   local offset = 1
+   for _,module in ipairs(self.modules) do
+      local currentOutput = module:updateOutput(input)
+      self.output:narrow(self.dimension, offset, currentOutput:size(self.dimension)):copy(currentOutput)
+      offset = offset + currentOutput:size(self.dimension)
+   end
+   return self.output
+end
+
+function Concat:updateGradInput(input, gradOutput)
+   self.gradInput:resizeAs(input)
+
+   local offset = 1
+   for i,module in ipairs(self.modules) do
+      local currentOutput = module.output
+      local currentGradInput = module:updateGradInput(input, gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)))
+        
+      if i==1 then
+         self.gradInput:copy(currentGradInput)
+      else
+         self.gradInput:add(currentGradInput)
+      end
+      offset = offset + currentOutput:size(self.dimension)
+   end
+   return self.gradInput
+end
+
+function Concat:accGradParameters(input, gradOutput, scale)
+   scale = scale or 1
+   local offset = 1
+   for i,module in ipairs(self.modules) do
+      local currentOutput = module.output
+      local currentGradInput = module:accGradParameters(input,
+                                                        gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)),
+                                                        scale)
+      offset = offset + currentOutput:size(self.dimension)
+   end
+end
+
+function Concat:accUpdateGradParameters(input, gradOutput, lr)
+   local offset = 1
+   for i,module in ipairs(self.modules) do
+      local currentOutput = module.output
+      local currentGradInput = module:accUpdateGradParameters(input,
+                                                              gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)),
+                                                              lr)
+      offset = offset + currentOutput:size(self.dimension)
+   end
+end
+
+function Concat:zeroGradParameters()
+   for _,module in ipairs(self.modules) do
+      module:zeroGradParameters()
+   end
+end
+
+function Concat:updateParameters(learningRate)
+   for _,module in ipairs(self.modules) do
+      module:updateParameters(learningRate)
+   end
+end
+
+function Concat:share(mlp,...)
+   for i=1,#self.modules do
+      self.modules[i]:share(mlp.modules[i],...); 
+   end
+end
+
+function Concat:parameters()
+   local function tinsert(to, from)
+      if type(from) == 'table' then
+         for i=1,#from do
+            tinsert(to,from[i])
+         end
+      else
+         table.insert(to,from)
+      end
+   end
+   local w = {}
+   local gw = {}
+   for i=1,#self.modules do
+      local mw,mgw = self.modules[i]:parameters()
+      if mw then
+         tinsert(w,mw)
+         tinsert(gw,mgw)
+      end
+   end
+   return w,gw
+end
diff --git a/ConcatTable.lua b/ConcatTable.lua
new file mode 100644
index 0000000..730d95e
--- /dev/null
+++ b/ConcatTable.lua
@@ -0,0 +1,72 @@
+local ConcatTable, parent = torch.class('nn.ConcatTable', 'nn.Module')
+
+function ConcatTable:__init()
+   parent.__init(self)
+   self.modules = {}
+   self.output = {}
+end
+
+function ConcatTable:add(module)
+   table.insert(self.modules, module)
+   return self
+end
+
+function ConcatTable:get(index)
+   return self.modules[index]
+end
+
+function ConcatTable:size()
+   return #self.modules 
+end
+
+function ConcatTable:updateOutput(input)
+   for i=1,#self.modules do
+      self.output[i] = self.modules[i]:updateOutput(input)
+   end
+   return self.output
+end
+
+function ConcatTable:updateGradInput(input, gradOutput)
+   for i,module in ipairs(self.modules) do
+      local currentGradInput = module:updateGradInput(input, gradOutput[i])
+      if i == 1 then
+         self.gradInput:resizeAs(currentGradInput):copy(currentGradInput)
+      else
+         self.gradInput:add(currentGradInput)
+      end
+   end
+   return self.gradInput
+end
+
+function ConcatTable:accGradParameters(input, gradOutput, scale)
+   scale = scale or 1
+   for i,module in ipairs(self.modules) do
+      module:accGradParameters(input, gradOutput[i], scale)
+   end
+end
+
+function ConcatTable:accUpdateGradParameters(input, gradOutput, lr)
+   for i,module in ipairs(self.modules) do
+      module:accUpdateGradParameters(input, gradOutput[i], lr)
+   end
+end
+
+function ConcatTable:zeroGradParameters()
+   for _,module in ipairs(self.modules) do
+      module:zeroGradParameters()
+   end
+end
+
+function ConcatTable:updateParameters(learningRate)
+   for _,module in ipairs(self.modules) do
+      module:updateParameters(learningRate)
+   end
+end
+
+function ConcatTable:share(mlp,...)
+   for i=1,#self.modules do
+      self.modules[i]:share(mlp.modules[i],...); 
+   end
+end
+
+
diff --git a/Copy.lua b/Copy.lua
new file mode 100644
index 0000000..7b6eeb3
--- /dev/null
+++ b/Copy.lua
@@ -0,0 +1,33 @@
+local Copy, parent = torch.class('nn.Copy', 'nn.Module')
+
+function Copy:__init(intype, outtype)
+   intype = intype or torch.getmetatable(torch.Tensor.__typename)
+   outtype = outtype or torch.getmetatable(torch.Tensor.__typename)
+
+   parent.__init(self)
+   self.gradInput = torch.getmetatable(intype).new()
+   self.output = torch.getmetatable(outtype).new()
+
+   if intype == outtype then
+
+      self.updateOutput = function(self, input)
+                        self.output = input
+                        return input
+                     end
+
+      self.updateGradInput = function(self, input, gradOutput)
+                         self.gradInput = gradOutput
+                         return gradOutput
+                      end
+   end
+end
+
+function Copy:updateOutput(input)
+   self.output:resize(input:size()):copy(input)
+   return self.output
+end
+
+function Copy:updateGradInput(input, gradOutput)
+   self.gradInput:resize(gradOutput:size()):copy(gradOutput)
+   return self.gradInput
+end
diff --git a/CosineDistance.lua b/CosineDistance.lua
new file mode 100644
index 0000000..061ff92
--- /dev/null
+++ b/CosineDistance.lua
@@ -0,0 +1,40 @@
+local CosineDistance, parent = torch.class('nn.CosineDistance', 'nn.Module')
+
+function CosineDistance:__init()
+   parent.__init(self)
+   self.gradInput = {torch.Tensor(), torch.Tensor()}
+   self.output=torch.Tensor(1)
+end 
+ 
+function CosineDistance:updateOutput(input)
+   local input1, input2 = input[1], input[2]
+   self.w1 = input1:dot(input2)
+   self.w22 = input1:dot(input1)
+   self.w2 = math.sqrt(self.w22)
+   self.w32 = input2:dot(input2)
+   self.w3 = math.sqrt(self.w32)
+   self.output[1] = self.w1/self.w2/self.w3
+   return self.output
+end
+
+function CosineDistance:updateGradInput(input, gradOutput)
+   local v1  = input[1]
+   local v2  = input[2]
+   local gw1 = input[1].new()
+   local gw2 = input[2].new()
+   gw1:resizeAs(v1) 
+   gw2:resizeAs(v1)
+
+   gw1:zero()
+   gw1:add(1/(self.w2*self.w3), v2)
+   gw1:add(-self.w1/(self.w22*self.w2*self.w3), v1)
+   
+   gw2:zero()
+   gw2:add(1/(self.w2*self.w3), v1)
+   gw2:add(-self.w1/(self.w32*self.w2*self.w3), v2)
+
+   gw1:mul(gradOutput[1])
+   gw2:mul(gradOutput[1])
+   self.gradInput = {gw1, gw2}
+   return self.gradInput
+end
diff --git a/CosineEmbeddingCriterion.lua b/CosineEmbeddingCriterion.lua
new file mode 100644
index 0000000..a9ee2e0
--- /dev/null
+++ b/CosineEmbeddingCriterion.lua
@@ -0,0 +1,54 @@
+local CosineEmbeddingCriterion, parent = torch.class('nn.CosineEmbeddingCriterion', 'nn.Module')
+
+function CosineEmbeddingCriterion:__init(margin)
+   parent.__init(self)
+   margin=margin or 0
+   self.margin = margin 
+   self.gradInput = {torch.Tensor(), torch.Tensor()}
+end 
+ 
+function CosineEmbeddingCriterion:updateOutput(input,y)
+   local input1, input2 = input[1], input[2]
+   self.w1 = input1:dot(input2)
+   self.w22 = input1:dot(input1)
+   self.w2 = math.sqrt(self.w22)
+   self.w32 = input2:dot(input2)
+   self.w3 = math.sqrt(self.w32)
+   self.output = self.w1/self.w2/self.w3
+   if y==-1 then
+      self.output = math.max(0, self.output - self.margin);
+   else
+      self.output = 1 - self.output
+   end
+   return self.output
+end
+
+local function mathsign(t)
+   if t>0 then return 1; end
+   if t<0 then return -1; end
+   return 2*torch.random(2)-3;
+end
+
+function CosineEmbeddingCriterion:updateGradInput(input, y)
+   local v1  = input[1]
+   local v2  = input[2]
+   local gw1 = input[1].new()
+   local gw2 = input[2].new()
+   gw1:resizeAs(v1) 
+   gw2:resizeAs(v1)
+
+   gw1:zero()
+   gw1:add(1/(self.w2*self.w3), v2)
+   gw1:add(-self.w1/(self.w22*self.w2*self.w3), v1)
+   
+   gw2:zero()
+   gw2:add(1/(self.w2*self.w3), v1)
+   gw2:add(-self.w1/(self.w32*self.w2*self.w3), v2)
+
+   if y == 1 then
+      gw1 = -gw1
+      gw2 = -gw2
+   end
+   self.gradInput = {gw1, gw2}
+   return self.gradInput
+end
diff --git a/Criterion.lua b/Criterion.lua
new file mode 100644
index 0000000..6513414
--- /dev/null
+++ b/Criterion.lua
@@ -0,0 +1,51 @@
+local Criterion = torch.class('nn.Criterion')
+
+function Criterion:__init()
+   self.gradInput = torch.Tensor()
+   self.output = 0
+end
+
+function Criterion:updateOutput(input, target)
+end
+
+function Criterion:forward(input, target)
+   return self:updateOutput(input, target)
+end
+
+function Criterion:backward(input, target)
+   return self:updateGradInput(input, target)
+end
+
+function Criterion:updateGradInput(input, target)
+end
+
+function Criterion:clone()
+   local f = torch.MemoryFile("rw"):binary()
+   f:writeObject(self)
+   f:seek(1)
+   local clone = f:readObject()
+   f:close()
+   return clone
+end
+
+function Criterion:type(type)
+   -- find all tensors and convert them
+   for key,param in pairs(self) do
+      if torch.typename(param) and torch.typename(param):find('torch%..+Tensor') then
+         self[key] = param:type(type)
+      end
+   end
+   return self
+end
+
+function Criterion:float()
+   return self:type('torch.FloatTensor')
+end
+
+function Criterion:double()
+   return self:type('torch.DoubleTensor')
+end
+
+function Criterion:cuda()
+   return self:type('torch.CudaTensor')
+end
diff --git a/CriterionTable.lua b/CriterionTable.lua
new file mode 100644
index 0000000..e5538f7
--- /dev/null
+++ b/CriterionTable.lua
@@ -0,0 +1,16 @@
+local CriterionTable, parent = torch.class('nn.CriterionTable', 'nn.Module')
+
+function CriterionTable:__init(criterion)
+   self.criterion = criterion
+   self.gradInput = {criterion.gradInput}
+end
+
+function CriterionTable:updateOutput(input) 
+   self.output = self.criterion:updateOutput(unpack(input))
+   return self.output
+end
+    
+function CriterionTable:updateGradInput(input, gradOutput)
+  self.criterion:updateGradInput(unpack(input))
+  return self.gradInput
+end 
diff --git a/DotProduct.lua b/DotProduct.lua
new file mode 100644
index 0000000..d16d295
--- /dev/null
+++ b/DotProduct.lua
@@ -0,0 +1,29 @@
+local DotProduct, parent = torch.class('nn.DotProduct', 'nn.Module')
+
+function DotProduct:__init()
+   parent.__init(self)
+   self.gradInput = {torch.Tensor(), torch.Tensor()}
+   self.output=torch.Tensor(1)
+end 
+ 
+function DotProduct:updateOutput(input,y)
+   self.output[1] = input[1]:dot(input[2])
+   return self.output
+end
+
+function DotProduct:updateGradInput(input, gradOutput)
+   local v1 = input[1]
+   local v2 = input[2]
+   local gw1=self.gradInput[1];
+   local gw2=self.gradInput[2];
+   gw1:resizeAs(v1) 
+   gw2:resizeAs(v1)
+
+   gw1:copy( v2)
+   gw1:mul(gradOutput[1])
+   
+   gw2:copy( v1)
+   gw2:mul(gradOutput[1])
+
+   return self.gradInput
+end
diff --git a/Euclidean.lua b/Euclidean.lua
new file mode 100644
index 0000000..808b7ab
--- /dev/null
+++ b/Euclidean.lua
@@ -0,0 +1,64 @@
+local Euclidean, parent = torch.class('nn.Euclidean', 'nn.Module')
+
+function Euclidean:__init(inputSize,outputSize)
+   parent.__init(self)
+
+   self.weight = torch.Tensor(inputSize,outputSize)
+   self.gradWeight = torch.Tensor(inputSize,outputSize)
+
+   -- state
+   self.gradInput:resize(inputSize)
+   self.output:resize(outputSize)
+   self.temp = torch.Tensor(inputSize)
+
+   self:reset()
+end
+
+function Euclidean:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1./math.sqrt(self.weight:size(1))
+   end
+
+   for i=1,self.weight:size(2) do
+      self.weight:select(2, i):apply(function()
+                                        return torch.uniform(-stdv, stdv)
+                                     end)
+   end
+end
+
+function Euclidean:updateOutput(input)
+   self.output:zero()
+   for o = 1,self.weight:size(2) do
+      self.output[o] = input:dist(self.weight:select(2,o))
+   end
+   return self.output
+end
+
+function Euclidean:updateGradInput(input, gradOutput)
+   self:updateOutput(input)
+   if self.gradInput then
+      self.gradInput:zero()
+      for o = 1,self.weight:size(2) do
+         if self.output[o] ~= 0 then
+            self.temp:copy(input):add(-1,self.weight:select(2,o))
+            self.temp:mul(gradOutput[o]/self.output[o])
+            self.gradInput:add(self.temp)
+         end
+      end
+      return self.gradInput
+   end
+end
+
+function Euclidean:accGradParameters(input, gradOutput, scale)
+   self:updateOutput(input)
+   scale = scale or 1
+   for o = 1,self.weight:size(2) do
+      if self.output[o] ~= 0 then
+         self.temp:copy(self.weight:select(2,o)):add(-1,input)
+         self.temp:mul(gradOutput[o]/self.output[o])
+         self.gradWeight:select(2,o):add(self.temp)
+      end
+   end
+end
diff --git a/Exp.lua b/Exp.lua
new file mode 100644
index 0000000..c4df86c
--- /dev/null
+++ b/Exp.lua
@@ -0,0 +1,9 @@
+local Exp = torch.class('nn.Exp', 'nn.Module')
+
+function Exp:updateOutput(input)
+   return input.nn.Exp_updateOutput(self, input)
+end
+
+function Exp:updateGradInput(input, gradOutput)
+   return input.nn.Exp_updateGradInput(self, input, gradOutput)
+end
diff --git a/HardShrink.lua b/HardShrink.lua
new file mode 100644
index 0000000..7dfeaca
--- /dev/null
+++ b/HardShrink.lua
@@ -0,0 +1,16 @@
+local HardShrink, parent = torch.class('nn.HardShrink', 'nn.Module')
+
+function HardShrink:__init(lam)
+   parent.__init(self)
+   self.lambda = lam or 0.5
+end
+
+function HardShrink:updateOutput(input)
+   input.nn.HardShrink_updateOutput(self, input)
+   return self.output
+end
+
+function HardShrink:updateGradInput(input, gradOutput)
+   input.nn.HardShrink_updateGradInput(self, input, gradOutput)
+   return self.gradInput
+end
diff --git a/HardTanh.lua b/HardTanh.lua
new file mode 100644
index 0000000..3391479
--- /dev/null
+++ b/HardTanh.lua
@@ -0,0 +1,9 @@
+local HardTanh = torch.class('nn.HardTanh', 'nn.Module')
+
+function HardTanh:updateOutput(input)
+   return input.nn.HardTanh_updateOutput(self, input)
+end
+
+function HardTanh:updateGradInput(input, gradOutput)
+   return input.nn.HardTanh_updateGradInput(self, input, gradOutput)
+end
diff --git a/HingeEmbeddingCriterion.lua b/HingeEmbeddingCriterion.lua
new file mode 100644
index 0000000..e88ef82
--- /dev/null
+++ b/HingeEmbeddingCriterion.lua
@@ -0,0 +1,26 @@
+local HingeEmbeddingCriterion, parent = 
+	torch.class('nn.HingeEmbeddingCriterion', 'nn.Module')
+
+function HingeEmbeddingCriterion:__init(margin)
+   parent.__init(self)
+   margin=margin or 1 
+   self.margin = margin 
+   self.gradInput = torch.Tensor(1)
+end 
+ 
+function HingeEmbeddingCriterion:updateOutput(input,y)
+   self.output=input[1]
+   if y==-1 then
+	 self.output = math.max(0,self.margin - self.output);
+   end
+   return self.output
+end
+
+function HingeEmbeddingCriterion:updateGradInput(input, y)
+  self.gradInput[1]=y
+  local dist = input[1]
+  if y == -1 and  dist > self.margin then
+     self.gradInput[1]=0;
+  end
+  return self.gradInput 
+end
diff --git a/Identity.lua b/Identity.lua
new file mode 100644
index 0000000..79b5c08
--- /dev/null
+++ b/Identity.lua
@@ -0,0 +1,12 @@
+local Identity, parent = torch.class('nn.Identity', 'nn.Module')
+
+function Identity:updateOutput(input)
+   self.output = input
+   return self.output
+end
+
+
+function Identity:updateGradInput(input, gradOutput)
+   self.gradInput = gradOutput
+   return self.gradInput
+end
diff --git a/Jacobian.lua b/Jacobian.lua
new file mode 100644
index 0000000..04330ac
--- /dev/null
+++ b/Jacobian.lua
@@ -0,0 +1,239 @@
+nn.Jacobian = {}
+
+function nn.Jacobian.backward (module, input, param, dparam)
+   local doparam = 0
+   if param then
+      doparam = 1
+   end
+   param = param or input
+   -- output deriv
+   module:forward(input)
+   local dout = module.output.new():resizeAs(module.output)
+   -- 1D view
+   local sdout = module.output.new(dout:storage(),1,dout:nElement())
+   -- jacobian matrix to calculate
+   local jacobian = torch.Tensor(param:nElement(),dout:nElement()):zero()
+
+   for i=1,sdout:nElement() do
+      dout:zero()
+      sdout[i] = 1
+      module:zeroGradParameters()
+      local din = module:updateGradInput(input, dout)
+      module:accGradParameters(input, dout)
+      if doparam == 1 then
+	 jacobian:select(2,i):copy(dparam)
+      else
+	 jacobian:select(2,i):copy(din)
+      end
+   end
+   return jacobian
+end
+
+function nn.Jacobian.backwardUpdate (module, input, param)
+
+   -- output deriv
+   module:forward(input)
+   local dout = module.output.new():resizeAs(module.output)
+   -- 1D view
+   local sdout = module.output.new(dout:storage(),1,dout:nElement())
+   -- jacobian matrix to calculate
+   local jacobian = torch.Tensor(param:nElement(),dout:nElement()):zero()
+
+   -- original param
+   local origparam = param:clone()
+
+   for i=1,sdout:nElement() do
+      param:copy(origparam)
+      dout:zero()
+      sdout[i] = 1
+      local din = module:updateGradInput(input, dout)
+      module:accUpdateGradParameters(input, dout, 1)
+      jacobian:select(2,i):copy(param)
+   end
+
+   param:copy(origparam)
+
+   return jacobian
+end
+
+function nn.Jacobian.forward(module, input, param)
+   param = param or input
+   -- perturbation amount
+   local small = 1e-6
+   -- 1D view of input
+   local tst = param:storage()
+   local sin = param.new(tst,1,tst:size())
+   -- jacobian matrix to calculate
+   local jacobian = torch.Tensor():resize(param:nElement(),module:forward(input):nElement())
+   
+   local outa = torch.Tensor(jacobian:size(2))
+   local outb = torch.Tensor(jacobian:size(2))
+   
+   for i=1,sin:nElement() do      
+      sin[i] = sin[i] - small
+      outa:copy(module:forward(input))
+      sin[i] = sin[i] + 2*small
+      outb:copy(module:forward(input))
+      sin[i] = sin[i] - small
+
+      outb:add(-1,outa):div(2*small)
+      jacobian:select(1,i):copy(outb)
+   end
+
+   return jacobian
+end
+
+function nn.Jacobian.forwardUpdate(module, input, param)
+   -- perturbation amount
+   local small = 1e-6
+   -- 1D view of input
+   local tst = param:storage()
+   local sin = param.new(tst,1,tst:size())
+   -- jacobian matrix to calculate
+   local jacobian = torch.Tensor():resize(param:nElement(),module:forward(input):nElement())
+   
+   local outa = torch.Tensor(jacobian:size(2))
+   local outb = torch.Tensor(jacobian:size(2))
+   
+   for i=1,sin:nElement() do      
+      sin[i] = sin[i] - small
+      outa:copy(module:forward(input))
+      sin[i] = sin[i] + 2*small
+      outb:copy(module:forward(input))
+      sin[i] = sin[i] - small
+
+      outb:add(-1,outa):div(2*small)
+      jacobian:select(1,i):copy(outb)
+      jacobian:select(1,i):mul(-1)
+      jacobian:select(1,i):add(sin[i])
+   end
+   return jacobian
+end
+
+function nn.Jacobian.testJacobian (module, input, minval, maxval)
+   minval = minval or -2
+   maxval = maxval or 2
+   local inrange = maxval - minval
+   input:copy(torch.rand(input:nElement()):mul(inrange):add(minval))
+   local jac_fprop = nn.Jacobian.forward(module,input)
+   local jac_bprop = nn.Jacobian.backward(module,input)
+   local error = jac_fprop-jac_bprop
+   return error:abs():maxall()
+end
+
+function nn.Jacobian.testJacobianParameters (module, input, param, dparam, minval, maxval)
+   minval = minval or -2
+   maxval = maxval or 2
+   local inrange = maxval - minval
+   input:copy(torch.rand(input:nElement()):mul(inrange):add(minval))
+   param:copy(torch.rand(param:nElement()):mul(inrange):add(minval))
+   local jac_bprop = nn.Jacobian.backward(module, input, param, dparam)
+   local jac_fprop = nn.Jacobian.forward(module, input, param)
+   local error = jac_fprop - jac_bprop
+   return error:abs():maxall()
+end
+
+function nn.Jacobian.testJacobianUpdateParameters (module, input, param, minval, maxval)
+   minval = minval or -2
+   maxval = maxval or 2
+   local inrange = maxval - minval
+   input:copy(torch.rand(input:nElement()):mul(inrange):add(minval))
+   param:copy(torch.rand(param:nElement()):mul(inrange):add(minval))
+   local params_bprop = nn.Jacobian.backwardUpdate(module, input, param)
+   local params_fprop = nn.Jacobian.forwardUpdate(module, input, param)
+
+   local error = params_fprop - params_bprop
+   return error:abs():maxall()
+end
+
+function nn.Jacobian.testIO(module,input, minval, maxval)
+   minval = minval or -2
+   maxval = maxval or 2
+   local inrange = maxval - minval
+
+   -- run module
+   module:forward(input)
+   local go = module.output:clone():copy(torch.rand(module.output:nElement()):mul(inrange):add(minval))
+   module:updateGradInput(input,go)
+   module:accGradParameters(input,go)
+
+   local fo = module.output:clone()
+   local bo = module.gradInput:clone()
+
+   -- write module
+   local f = torch.DiskFile('tmp.bin','w'):binary()
+   f:writeObject(module)
+   f:close()
+   -- read module
+   local m = torch.DiskFile('tmp.bin'):binary():readObject()
+   m:forward(input)
+   m:updateGradInput(input,go)
+   m:accGradParameters(input,go)
+   -- cleanup
+   os.remove('tmp.bin')
+
+   local fo2 = m.output:clone()
+   local bo2 = m.gradInput:clone()
+
+   local errf = fo - fo2
+   local errb = bo - bo2
+   return errf:abs():maxall(), errb:abs():maxall()
+end
+
+function nn.Jacobian.testAllUpdate(module, input, weight, gradWeight)
+   local gradOutput
+   local lr = torch.uniform(0.1, 1)
+   local errors = {}
+
+   -- accGradParameters
+   local maccgp = module:clone()
+   local weightc = maccgp[weight]:clone()
+   maccgp:forward(input)
+   gradOutput = torch.rand(maccgp.output:size())
+   maccgp:zeroGradParameters()
+   maccgp:updateGradInput(input, gradOutput)
+   maccgp:accGradParameters(input, gradOutput)
+   maccgp:updateParameters(lr)
+   errors["accGradParameters"] = (weightc-maccgp[gradWeight]*lr-maccgp[weight]):norm()
+   
+   -- accUpdateGradParameters
+   local maccugp = module:clone()
+   maccugp:forward(input)
+   maccugp:updateGradInput(input, gradOutput)
+   maccugp:accUpdateGradParameters(input, gradOutput, lr)
+   errors["accUpdateGradParameters"] = (maccugp[weight]-maccgp[weight]):norm()
+
+   -- shared, accGradParameters
+   local macsh1 = module:clone()
+   local macsh2 = module:clone()
+   macsh2:share(macsh1, weight)
+   macsh1:forward(input)
+   macsh2:forward(input)
+   macsh1:zeroGradParameters()
+   macsh2:zeroGradParameters()
+   macsh1:updateGradInput(input, gradOutput)
+   macsh2:updateGradInput(input, gradOutput)
+   macsh1:accGradParameters(input, gradOutput)
+   macsh2:accGradParameters(input, gradOutput)
+   macsh1:updateParameters(lr)
+   macsh2:updateParameters(lr)
+   local err = (weightc-maccgp[gradWeight]*(lr*2)-macsh1[weight]):norm()
+   err = err + (weightc-maccgp[gradWeight]*(lr*2)-macsh2[weight]):norm()
+   errors["accGradParameters [shared]"] = err
+   
+   -- shared, accUpdateGradParameters
+   local macshu1 = module:clone()
+   local macshu2 = module:clone()
+   macshu2:share(macshu1, weight)
+   macshu1:forward(input)
+   macshu2:forward(input)
+   macshu1:updateGradInput(input, gradOutput)
+   macshu2:updateGradInput(input, gradOutput)
+   macshu1:accUpdateGradParameters(input, gradOutput, lr)
+   macshu2:accUpdateGradParameters(input, gradOutput, lr)
+   local err = (weightc-maccgp[gradWeight]*(lr*2)-macshu1[weight]):norm()
+   err = err + (weightc-maccgp[gradWeight]*(lr*2)-macshu2[weight]):norm()
+   errors["accUpdateGradParameters [shared]"] = err
+
+   return errors
+end
diff --git a/JoinTable.lua b/JoinTable.lua
new file mode 100644
index 0000000..dc20246
--- /dev/null
+++ b/JoinTable.lua
@@ -0,0 +1,50 @@
+local JoinTable, parent = torch.class('nn.JoinTable', 'nn.Module')
+
+function JoinTable:__init(dimension)
+   parent.__init(self)
+   self.size = torch.LongStorage()
+   self.dimension = dimension
+   self.gradInput = {}
+end 
+
+function JoinTable:updateOutput(input) 
+   for i=1,#input do
+      local currentOutput = input[i]
+      if i == 1 then
+         self.size:resize(currentOutput:dim()):copy(currentOutput:size())
+      else
+         self.size[self.dimension] = self.size[self.dimension] 
+            + currentOutput:size(self.dimension)
+      end 
+   end
+   self.output:resize(self.size)
+   
+   local offset = 1  
+   for i=1,#input do
+      local currentOutput = input[i]
+      self.output:narrow(self.dimension, offset, 
+			 currentOutput:size(self.dimension)):copy(currentOutput)
+      offset = offset + currentOutput:size(self.dimension)
+   end
+   return self.output
+
+end
+
+function JoinTable:updateGradInput(input, gradOutput)
+   for i=1,#input do 
+      if self.gradInput[i] == nil then
+         self.gradInput[i] = input[i].new()
+      end
+      self.gradInput[i]:resizeAs(input[i])
+   end
+
+   local offset = 1
+   for i=1,#input do
+      local currentOutput = input[i] 
+      local currentGradInput = gradOutput:narrow(self.dimension, offset, 
+					  currentOutput:size(self.dimension))
+      self.gradInput[i]:copy(currentGradInput)
+      offset = offset + currentOutput:size(self.dimension)
+   end
+   return self.gradInput
+end
diff --git a/L1HingeEmbeddingCriterion.lua b/L1HingeEmbeddingCriterion.lua
new file mode 100644
index 0000000..5aa1ae7
--- /dev/null
+++ b/L1HingeEmbeddingCriterion.lua
@@ -0,0 +1,41 @@
+local L1HingeEmbeddingCriterion, parent = torch.class('nn.L1HingeEmbeddingCriterion', 'nn.Module')
+
+function L1HingeEmbeddingCriterion:__init(margin)
+   parent.__init(self)
+   margin=margin or 1
+   self.margin = margin 
+   self.gradInput = {torch.Tensor(), torch.Tensor()}
+end 
+ 
+function L1HingeEmbeddingCriterion:updateOutput(input,y)
+   self.output=input[1]:dist(input[2],1);
+   if y==-1 then
+	 self.output = math.max(0,self.margin - self.output);
+   end
+   return self.output
+end
+
+
+local function mathsign(t)
+   if t>0 then return 1; end
+   if t<0 then return -1; end
+   return 2*torch.random(2)-3;
+end
+
+function L1HingeEmbeddingCriterion:updateGradInput(input, y)
+  self.gradInput[1]:resizeAs(input[1]) 
+  self.gradInput[2]:resizeAs(input[2])
+  self.gradInput[1]:copy(input[1])
+  self.gradInput[1]:add(-1, input[2])
+  local dist = self.gradInput[1]:norm(1);
+  self.gradInput[1]:apply(mathsign)    -- L1 gradient
+  if y == -1 then -- just to avoid a mul by 1
+   if dist > self.margin then
+     self.gradInput[1]:zero()
+   else
+     self.gradInput[1]:mul(-1)
+   end
+  end
+  self.gradInput[2]:zero():add(-1, self.gradInput[1])
+  return self.gradInput
+end
diff --git a/Linear.lua b/Linear.lua
new file mode 100644
index 0000000..953af78
--- /dev/null
+++ b/Linear.lua
@@ -0,0 +1,82 @@
+local Linear, parent = torch.class('nn.Linear', 'nn.Module')
+
+function Linear:__init(inputSize, outputSize)
+   parent.__init(self)
+
+   self.weight = torch.Tensor(outputSize, inputSize)
+   self.bias = torch.Tensor(outputSize)
+   self.gradWeight = torch.Tensor(outputSize, inputSize)
+   self.gradBias = torch.Tensor(outputSize)
+   
+   self:reset()
+end
+
+function Linear:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1./math.sqrt(self.weight:size(2))
+   end
+
+   -- we do this so the initialization is exactly
+   -- the same than in previous torch versions
+   for i=1,self.weight:size(1) do
+      self.weight:select(1, i):apply(function()
+                                        return torch.uniform(-stdv, stdv)
+                                     end)
+      self.bias[i] = torch.uniform(-stdv, stdv)
+   end
+end
+
+function Linear:updateOutput(input)
+   if input:dim() == 1 then
+      self.output:resize(self.bias:size(1))
+      self.output:copy(self.bias)
+      self.output:addmv(1, self.weight, input)
+   elseif input:dim() == 2 then
+      local nframe = input:size(1)
+      local nunit = self.bias:size(1)
+
+      self.output:resize(nframe, nunit)
+      self.output:zero():addr(1, input.new(nframe):fill(1), self.bias)
+      self.output:addmm(1, input, self.weight:t())
+   else
+      error('input must be vector or matrix')
+   end
+
+   return self.output
+end
+
+function Linear:updateGradInput(input, gradOutput)
+   if self.gradInput then
+
+      if input:dim() == 1 then
+         self.gradInput:resizeAs(input)
+         self.gradInput:addmv(0, 1, self.weight:t(), gradOutput)
+      elseif input:dim() == 2 then
+         self.gradInput:resizeAs(input)
+         self.gradInput:addmm(0, 1, gradOutput, self.weight)
+      end
+
+      return self.gradInput
+   end
+end
+
+function Linear:accGradParameters(input, gradOutput, scale)
+   scale = scale or 1
+
+   if input:dim() == 1 then
+      self.gradWeight:addr(scale, gradOutput, input)
+      self.gradBias:add(scale, gradOutput)      
+   elseif input:dim() == 2 then
+      local nframe = input:size(1)
+      local nunit = self.bias:size(1)
+
+      self.gradWeight:addmm(scale, gradOutput:t(), input)
+      self.gradBias:addmv(scale, gradOutput:t(), input.new(nframe):fill(1))
+   end
+
+end
+
+-- we do not need to accumulate parameters when sharing
+Linear.sharedAccUpdateGradParameters = Linear.accUpdateGradParameters
diff --git a/LogSigmoid.lua b/LogSigmoid.lua
new file mode 100644
index 0000000..7485ae6
--- /dev/null
+++ b/LogSigmoid.lua
@@ -0,0 +1,14 @@
+local LogSigmoid, parent = torch.class('nn.LogSigmoid', 'nn.Module')
+
+function LogSigmoid:__init()
+   parent.__init(self)
+   self.buffer = torch.Tensor()
+end
+
+function LogSigmoid:updateOutput(input)
+   return input.nn.LogSigmoid_updateOutput(self, input)
+end
+
+function LogSigmoid:updateGradInput(input, gradOutput)
+   return input.nn.LogSigmoid_updateGradInput(self, input, gradOutput)
+end
diff --git a/LogSoftMax.lua b/LogSoftMax.lua
new file mode 100644
index 0000000..8d2947e
--- /dev/null
+++ b/LogSoftMax.lua
@@ -0,0 +1,9 @@
+local LogSoftMax = torch.class('nn.LogSoftMax', 'nn.Module')
+
+function LogSoftMax:updateOutput(input)
+   return input.nn.LogSoftMax_updateOutput(self, input)
+end
+
+function LogSoftMax:updateGradInput(input, gradOutput)
+   return input.nn.LogSoftMax_updateGradInput(self, input, gradOutput)
+end
diff --git a/LookupTable.lua b/LookupTable.lua
new file mode 100644
index 0000000..115f19c
--- /dev/null
+++ b/LookupTable.lua
@@ -0,0 +1,76 @@
+local LookupTable, parent = torch.class('nn.LookupTable', 'nn.Module')
+
+LookupTable.__version = 2
+
+function LookupTable:__init(nIndex, ...)
+   parent.__init(self)
+
+   if select('#', ...) == 1 and type(select(1, ...)) ~= "number" then
+      local size = select(1, ...)
+      self.size = torch.LongStorage(#size + 1)
+      for i=1,#size do
+         self.size[i+1] = size[i]
+      end
+   else
+      self.size = torch.LongStorage(select('#', ...)+1)
+      for i=1,select('#',...) do
+         self.size[i+1] = select(i, ...)
+      end
+   end
+
+   self.size[1] = nIndex
+   self.weight = torch.Tensor(self.size)
+   self.gradWeight = torch.Tensor(self.size):zero()
+   self.inputs = {}
+
+   self:reset()
+end
+
+function LookupTable:reset(stdv)
+   stdv = stdv or 1
+   self.weight:apply(function()
+                        return torch.normal(0, stdv)
+                     end)
+end
+
+function LookupTable:updateOutput(input)
+   local nIndex = input:size(1)
+   self.size[1] = nIndex
+   self.output:resize(self.size)
+
+   for i=1,nIndex do
+      self.output:select(1, i):copy(self.weight:select(1, input[i]))
+   end
+
+   return self.output
+end
+
+function LookupTable:zeroGradParameters()
+   for k,_ in pairs(self.inputs) do
+      self.gradWeight:select(1, k):zero()
+   end
+   self.inputs = {}
+end
+
+function LookupTable:accGradParameters(input, gradOutput, scale)
+   for i=1,input:size(1) do
+      local k = input[i]
+      self.inputs[k] = true
+      self.gradWeight:select(1, k):add(scale, gradOutput:select(1, i))
+   end
+end
+
+function LookupTable:accUpdateGradParameters(input, gradOutput, lr)
+   for i=1,input:size(1) do
+      self.weight:select(1, input[i]):add(-lr, gradOutput:select(1, i))
+   end
+end
+
+function LookupTable:updateParameters(learningRate)
+   for k,_ in pairs(self.inputs) do
+      self.weight:select(1, k):add(-learningRate, self.gradWeight:select(1, k))
+   end
+end
+
+-- we do not need to accumulate parameters when sharing
+LookupTable.sharedAccUpdateGradParameters = LookupTable.accUpdateGradParameters
diff --git a/MSECriterion.lua b/MSECriterion.lua
new file mode 100644
index 0000000..655c74f
--- /dev/null
+++ b/MSECriterion.lua
@@ -0,0 +1,14 @@
+local MSECriterion, parent = torch.class('nn.MSECriterion', 'nn.Criterion')
+
+function MSECriterion:__init()
+   parent.__init(self)
+   self.sizeAverage = true
+end
+
+function MSECriterion:updateOutput(input, target)
+   return input.nn.MSECriterion_updateOutput(self, input, target)
+end
+
+function MSECriterion:updateGradInput(input, target)
+   return input.nn.MSECriterion_updateGradInput(self, input, target)
+end
diff --git a/MarginCriterion.lua b/MarginCriterion.lua
new file mode 100644
index 0000000..deb903e
--- /dev/null
+++ b/MarginCriterion.lua
@@ -0,0 +1,23 @@
+local MarginCriterion, parent = 
+	torch.class('nn.MarginCriterion', 'nn.Module')
+
+function MarginCriterion:__init(margin)
+   parent.__init(self)
+   margin=margin or 1   
+   self.margin = margin 
+   self.gradInput = torch.Tensor(1)
+end 
+ 
+function MarginCriterion:updateOutput(input,y)
+   self.output=math.max(0, self.margin- y* input[1])
+   return self.output
+end
+
+function MarginCriterion:updateGradInput(input, y)
+  if (y*input[1])<self.margin then
+     self.gradInput[1]=-y		
+  else
+     self.gradInput[1]=0;
+  end
+  return self.gradInput 
+end
diff --git a/MarginRankingCriterion.lua b/MarginRankingCriterion.lua
new file mode 100644
index 0000000..a365ade
--- /dev/null
+++ b/MarginRankingCriterion.lua
@@ -0,0 +1,25 @@
+local MarginRankingCriterion, parent = torch.class('nn.MarginRankingCriterion', 'nn.Module')
+
+function MarginRankingCriterion:__init(margin)
+   parent.__init(self)
+   margin=margin or 1
+   self.margin = margin 
+   self.gradInput = {torch.Tensor(1), torch.Tensor(1)}
+end 
+ 
+function MarginRankingCriterion:updateOutput(input,y)
+   self.output=math.max(0, -y*(input[1][1]-input[2][1]) + self.margin  ) 
+   return self.output
+end
+
+function MarginRankingCriterion:updateGradInput(input, y)
+  local dist = -y*(input[1][1]-input[2][1]) + self.margin
+  if dist < 0 then
+     self.gradInput[1][1]=0;
+     self.gradInput[2][1]=0;
+  else	
+     self.gradInput[1][1]=-y
+     self.gradInput[2][1]=y
+  end
+  return self.gradInput 
+end
diff --git a/Max.lua b/Max.lua
new file mode 100644
index 0000000..a5ba95d
--- /dev/null
+++ b/Max.lua
@@ -0,0 +1,16 @@
+local Max, parent = torch.class('nn.Max', 'nn.Module')
+
+function Max:__init(dimension)
+   parent.__init(self)
+   dimension = dimension or 1
+   self.dimension = dimension
+   self.indices = torch.Tensor()
+end
+
+function Max:updateOutput(input)
+   return input.nn.Max_updateOutput(self, input)
+end
+
+function Max:updateGradInput(input, gradOutput)
+   return input.nn.Max_updateGradInput(self, input, gradOutput)
+end
diff --git a/Mean.lua b/Mean.lua
new file mode 100644
index 0000000..55e7609
--- /dev/null
+++ b/Mean.lua
@@ -0,0 +1,26 @@
+local Mean, parent = torch.class('nn.Mean', 'nn.Module')
+
+function Mean:__init(dimension)
+   parent.__init(self)
+   dimension = dimension or 1
+   self.dimension = dimension
+end
+
+function Mean:updateOutput(input)
+   input.torch.mean(self.output, input, self.dimension)
+   self.output = self.output:select(self.dimension, 1)
+   return self.output
+end
+
+function Mean:updateGradInput(input, gradOutput)
+   local size = gradOutput:size():totable()
+   local stride = gradOutput:stride():totable()
+   table.insert(size, self.dimension, input:size(self.dimension))
+   table.insert(stride, self.dimension, 0)
+
+   self.gradInput:resizeAs(gradOutput):copy(gradOutput)
+   self.gradInput:mul(1/input:size(self.dimension))
+   self.gradInput:resize(torch.LongStorage(size), torch.LongStorage(stride))
+
+   return self.gradInput
+end
diff --git a/Min.lua b/Min.lua
new file mode 100644
index 0000000..f4edbd8
--- /dev/null
+++ b/Min.lua
@@ -0,0 +1,16 @@
+local Min, parent = torch.class('nn.Min', 'nn.Module')
+
+function Min:__init(dimension)
+   parent.__init(self)
+   dimension = dimension or 1
+   self.dimension = dimension
+   self.indices = torch.Tensor()
+end
+
+function Min:updateOutput(input)
+   return input.nn.Min_updateOutput(self, input)
+end
+
+function Min:updateGradInput(input, gradOutput)
+   return input.nn.Min_updateGradInput(self, input, gradOutput)
+end
diff --git a/Module.lua b/Module.lua
new file mode 100644
index 0000000..2ae8115
--- /dev/null
+++ b/Module.lua
@@ -0,0 +1,211 @@
+local Module = torch.class('nn.Module')
+
+function Module:__init()
+   self.gradInput = torch.Tensor()
+   self.output = torch.Tensor()
+end
+
+function Module:parameters()
+   if self.weight and self.bias then
+      return {self.weight, self.bias}, {self.gradWeight, self.gradBias}
+   elseif self.weight then
+      return {self.weight}, {self.gradWeight}
+   elseif self.bias then
+      return {self.bias}, {self.gradBias}
+   else
+      return
+   end
+end
+
+function Module:updateOutput(input)
+   return self.output
+end
+
+function Module:forward(input)
+   return self:updateOutput(input, target)
+end
+
+function Module:backward(input, gradOutput)
+   self:updateGradInput(input, gradOutput)
+   self:accGradParameters(input, gradOutput)
+   return self.gradInput
+end
+
+function Module:backwardUpdate(input, gradOutput, lr)
+   self:updateGradInput(input, gradOutput)
+   self:accUpdateGradParameters(input, gradOutput, lr)
+   return self.gradInput
+end
+
+function Module:updateGradInput(input, gradOutput)
+   return self.gradInput
+end
+
+function Module:accGradParameters(input, gradOutput, scale)
+end
+
+function Module:accUpdateGradParameters(input, gradOutput, lr)
+   local gradWeight = self.gradWeight
+   local gradBias = self.gradBias
+   self.gradWeight = self.weight
+   self.gradBias = self.bias
+   self:accGradParameters(input, gradOutput, -lr)
+   self.gradWeight = gradWeight
+   self.gradBias = gradBias
+end
+
+function Module:sharedAccUpdateGradParameters(input, gradOutput, lr)
+   if self:parameters() then
+      self:zeroGradParameters()
+      self:accGradParameters(input, gradOutput, 1)
+      self:updateParameters(lr)
+   end
+end
+
+function Module:zeroGradParameters()
+   local _,gradParams = self:parameters()
+   if gradParams then
+      for i=1,#gradParams do
+         gradParams[i]:zero()
+      end
+   end
+end
+
+function Module:updateParameters(learningRate)
+   local params, gradParams = self:parameters()
+   if params then
+      for i=1,#params do
+         params[i]:add(-learningRate, gradParams[i])
+      end
+   end
+end
+
+function Module:share(mlp, ...)
+   for i,v in ipairs(arg) do
+      if self[v] ~= nil then
+         self[v]:set(mlp[v])
+         self.accUpdateGradParameters = self.sharedAccUpdateGradParameters
+         mlp.accUpdateGradParameters = mlp.sharedAccUpdateGradParameters
+      end
+   end
+   return self      
+end
+
+function Module:clone(...)
+   local f = torch.MemoryFile("rw"):binary()
+   f:writeObject(self)
+   f:seek(1)
+   local clone = f:readObject()
+   f:close()
+   if select('#',...) > 0 then
+      clone:share(self,...)
+   end
+   return clone
+end
+
+function Module:type(type)
+   -- find all tensors and convert them
+   for key,param in pairs(self) do
+      if torch.typename(param) and torch.typename(param):find('torch%..+Tensor') then
+         self[key] = param:type(type)
+      end
+   end
+   -- find submodules in classic containers 'modules'
+   if self.modules then
+      for _,module in ipairs(self.modules) do
+         module:type(type)
+      end
+   end
+   return self
+end
+
+function Module:float()
+   return self:type('torch.FloatTensor')
+end
+
+function Module:double()
+   return self:type('torch.DoubleTensor')
+end
+
+function Module:cuda()
+   return self:type('torch.CudaTensor')
+end
+
+function Module:getParameters()
+   -- get parameters
+   local parameters,gradParameters = self:parameters()
+
+   -- this function flattens arbitrary lists of parameters,
+   -- even complex shared ones
+   local function flatten(parameters)
+      -- already flat ?
+      local flat = true
+      for k = 2,#parameters do
+         if parameters[k]:storage() ~= parameters[k-1]:storage() then
+            flat = false
+            break
+         end
+      end
+      if flat then
+         local nParameters = 0
+         for k,param in ipairs(parameters) do
+            nParameters = nParameters + param:nElement()
+         end
+         local flatParameters = parameters[1].new(parameters[1]:storage())
+         if nParameters ~= flatParameters:nElement() then
+            error('flattenParameters(): weird parameters')
+         end
+         return flatParameters
+      end
+      -- compute offsets of each parameter
+      local offsets = {}
+      local sizes = {}
+      local strides = {}
+      local elements = {}
+      local storageOffsets = {}
+      local params = {}
+      local nParameters = 0
+      for k,param in ipairs(parameters) do
+         table.insert(offsets, nParameters+1)
+         table.insert(sizes, param:size())
+         table.insert(strides, param:stride())
+         table.insert(elements, param:nElement())
+         table.insert(storageOffsets, param:storageOffset())
+         local isView = false
+         for i = 1,k-1 do
+            if param:storage() == parameters[i]:storage() then
+               offsets[k] = offsets[i]
+               if storageOffsets[k] ~= storageOffsets[i] or elements[k] ~= elements[i] then
+                  error('flattenParameters(): cannot flatten shared weights with different structures')
+               end
+               isView = true
+               break
+            end
+         end
+         if not isView then
+            nParameters = nParameters + param:nElement()
+         end
+      end
+      -- create flat vector
+      local flatParameters = parameters[1].new(nParameters)
+      local storage = flatParameters:storage()
+      -- reallocate all parameters in flat vector
+      for i = 1,#parameters do
+         local data = parameters[i]:clone()
+         parameters[i]:set(storage, offsets[i], elements[i]):resize(sizes[i],strides[i]):copy(data)
+         data = nil
+         collectgarbage()
+      end
+      -- cleanup
+      collectgarbage()
+      -- return flat param
+      return flatParameters
+   end
+
+   -- flatten parameters and gradients
+   local flatParameters = flatten(parameters)
+   local flatGradParameters = flatten(gradParameters)
+
+   -- return new flat vector that contains all discrete parameters
+   return flatParameters, flatGradParameters
+end
diff --git a/Mul.lua b/Mul.lua
new file mode 100644
index 0000000..7841470
--- /dev/null
+++ b/Mul.lua
@@ -0,0 +1,42 @@
+local Mul, parent = torch.class('nn.Mul', 'nn.Module')
+
+function Mul:__init(inputSize)
+   parent.__init(self)
+  
+   self.weight = torch.Tensor(1)
+   self.gradWeight = torch.Tensor(1)
+   
+   -- state
+   self.gradInput:resize(inputSize)
+   self.output:resize(inputSize) 
+
+   self:reset()
+end
+
+ 
+function Mul:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1./math.sqrt(self.weight:size(1))
+   end
+
+   self.weight[1] = torch.uniform(-stdv, stdv);
+end
+
+function Mul:updateOutput(input)
+   self.output:copy(input);
+   self.output:mul(self.weight[1]);
+   return self.output 
+end
+
+function Mul:updateGradInput(input, gradOutput) 
+   self.gradInput:zero()
+   self.gradInput:add(self.weight[1], gradOutput)
+   return self.gradInput
+end
+
+function Mul:accGradParameters(input, gradOutput, scale) 
+   scale = scale or 1
+   self.gradWeight[1] = self.gradWeight[1] + scale*input:dot(gradOutput);
+end
diff --git a/MultiCriterion.lua b/MultiCriterion.lua
new file mode 100644
index 0000000..e83b97e
--- /dev/null
+++ b/MultiCriterion.lua
@@ -0,0 +1,32 @@
+local MultiCriterion, parent = torch.class('nn.MultiCriterion', 'nn.Criterion')
+
+function MultiCriterion:__init()
+   parent.__init(self)
+   self.criterions = {}
+   self.weights = torch.DoubleStorage()
+end
+
+function MultiCriterion:add(criterion, weight)
+   weight = weight or 1
+   table.insert(self.criterions, criterion)
+   self.weights:resize(#self.criterions, true)
+   self.weights[#self.criterions] = weight
+   return self
+end
+
+function MultiCriterion:updateOutput(input, target)
+   self.output = 0
+   for i=1,#self.criterions do
+      self.output = self.output + self.weights[i]*self.criterions[i]:updateOutput(input, target)
+   end
+   return self.output
+end
+
+function MultiCriterion:updateGradInput(input, target)
+   self.gradInput:resizeAs(input)
+   self.gradInput:zero()
+   for i=1,#self.criterions do
+      self.gradInput:add(self.weights[i], self.criterions[i]:updateGradInput(input, target))
+   end
+   return self.gradInput
+end
diff --git a/MultiLabelMarginCriterion.lua b/MultiLabelMarginCriterion.lua
new file mode 100644
index 0000000..c435888
--- /dev/null
+++ b/MultiLabelMarginCriterion.lua
@@ -0,0 +1,14 @@
+local MultiLabelMarginCriterion, parent = torch.class('nn.MultiLabelMarginCriterion', 'nn.Criterion')
+
+function MultiLabelMarginCriterion:__init()
+   parent.__init(self)
+   self.sizeAverage = true
+end
+
+function MultiLabelMarginCriterion:updateOutput(input, target)
+   return input.nn.MultiLabelMarginCriterion_updateOutput(self, input, target)
+end
+
+function MultiLabelMarginCriterion:updateGradInput(input, target)
+   return input.nn.MultiLabelMarginCriterion_updateGradInput(self, input, target)
+end
diff --git a/MultiMarginCriterion.lua b/MultiMarginCriterion.lua
new file mode 100644
index 0000000..e8de9d9
--- /dev/null
+++ b/MultiMarginCriterion.lua
@@ -0,0 +1,14 @@
+local MultiMarginCriterion, parent = torch.class('nn.MultiMarginCriterion', 'nn.Criterion')
+
+function MultiMarginCriterion:__init()
+   parent.__init(self)
+   self.sizeAverage = true
+end
+
+function MultiMarginCriterion:updateOutput(input, target)
+   return input.nn.MultiMarginCriterion_updateOutput(self, input, target)
+end
+
+function MultiMarginCriterion:updateGradInput(input, target)
+   return input.nn.MultiMarginCriterion_updateGradInput(self, input, target)
+end
diff --git a/Narrow.lua b/Narrow.lua
new file mode 100644
index 0000000..4445983
--- /dev/null
+++ b/Narrow.lua
@@ -0,0 +1,24 @@
+local Narrow, parent = torch.class('nn.Narrow', 'nn.Module')
+
+function Narrow:__init(dimension,offset,length)
+   parent.__init(self)
+   self.dimension=dimension
+   self.index=offset
+   self.length=length or 1
+   if not dimension or not offset then
+      error('nn.Narrow(dimension, offset, length)')
+   end
+end
+
+function Narrow:updateOutput(input)
+   local output=input:narrow(self.dimension,self.index,self.length);
+   self.output:resizeAs(output)
+   return self.output:copy(output)
+end
+
+function Narrow:updateGradInput(input, gradOutput)
+   self.gradInput:resizeAs(input)  
+   self.gradInput:zero();
+   self.gradInput:narrow(self.dimension,self.index,self.length):copy(gradOutput)
+   return self.gradInput
+end 
diff --git a/PairwiseDistance.lua b/PairwiseDistance.lua
new file mode 100644
index 0000000..638c58f
--- /dev/null
+++ b/PairwiseDistance.lua
@@ -0,0 +1,33 @@
+local PairwiseDistance, parent = torch.class('nn.PairwiseDistance', 'nn.Module')
+
+function PairwiseDistance:__init(p)
+   parent.__init(self)
+
+   -- state
+   self.gradInput = {torch.Tensor(), torch.Tensor()}
+   self.output = torch.Tensor(1)
+   self.norm=p
+end 
+  
+function PairwiseDistance:updateOutput(input)
+   self.output[1]=input[1]:dist(input[2],self.norm);
+   return self.output
+end
+
+local function mathsign(x) 
+   if x==0 then return  2*torch.random(2)-3; end
+   if x>0 then return 1; else return -1; end
+end
+
+function PairwiseDistance:updateGradInput(input, gradOutput)
+  self.gradInput[1]:resizeAs(input[1]) 
+  self.gradInput[2]:resizeAs(input[2]) 
+  self.gradInput[1]:copy(input[1])
+  self.gradInput[1]:add(-1, input[2])
+  if self.norm==1 then
+     self.gradInput[1]:apply(mathsign)
+  end
+  self.gradInput[1]:mul(gradOutput[1]);
+  self.gradInput[2]:zero():add(-1, self.gradInput[1])
+  return self.gradInput
+end
diff --git a/Parallel.lua b/Parallel.lua
new file mode 100644
index 0000000..04a8bdb
--- /dev/null
+++ b/Parallel.lua
@@ -0,0 +1,137 @@
+local Parallel, parent = torch.class('nn.Parallel', 'nn.Module')
+
+function Parallel:__init(inputDimension,outputDimension)
+   parent.__init(self)
+   self.modules = {}
+   self.size = torch.LongStorage() 
+   self.inputDimension = inputDimension
+   self.outputDimension = outputDimension
+end
+
+function Parallel:add(module)
+   table.insert(self.modules, module)
+   return self
+end
+
+function Parallel:get(index)
+   return self.modules[index]
+end
+
+function Parallel:updateOutput(input)
+   
+   local modules=input:size(self.inputDimension)
+
+   for i=1,modules do
+      local currentOutput = 
+	self.modules[i]:updateOutput(input:select(self.inputDimension,i))
+      
+      if i == 1 then
+         self.size:resize(currentOutput:dim()):copy(currentOutput:size())
+      else
+         self.size[self.outputDimension] = self.size[self.outputDimension] 
+				     + currentOutput:size(self.outputDimension)
+      end
+   end
+   self.output:resize(self.size)
+   
+   local offset = 1
+   for i=1,modules do
+      local currentOutput = self.modules[i]:updateOutput(input:select(self.inputDimension,i))
+
+      self.output:narrow(self.outputDimension, offset, 
+	                 currentOutput:size(self.outputDimension)):copy(currentOutput)
+      offset = offset + currentOutput:size(self.outputDimension)
+   end 
+   return self.output
+end
+
+function Parallel:updateGradInput(input, gradOutput)
+   local nModule=input:size(self.inputDimension)
+   self.gradInput:resizeAs(input)
+
+   local offset = 1
+   for i=1,nModule do 
+      local module=self.modules[i];
+      local currentOutput = module.output
+      local currentGradInput = 
+	module:updateGradInput(input:select(self.inputDimension,i),
+                        gradOutput:narrow(self.outputDimension, 
+                                          offset, currentOutput:size(self.outputDimension)))
+        
+      self.gradInput:select(self.inputDimension,i):copy(currentGradInput)
+      offset = offset + currentOutput:size(self.outputDimension)
+   end
+   return self.gradInput
+end
+
+function Parallel:accGradParameters(input, gradOutput, scale)
+   local nModule=input:size(self.inputDimension)
+
+   local offset = 1
+   for i=1,nModule do 
+      local module = self.modules[i];
+      local currentOutput = module.output
+      local currentGradInput = 
+         module:accGradParameters(input:select(self.inputDimension,i),
+                                  gradOutput:narrow(self.outputDimension, 
+                                                    offset, currentOutput:size(self.outputDimension)), scale)
+        
+      offset = offset + currentOutput:size(self.outputDimension)
+   end
+end
+
+function Parallel:accUpdateGradParameters(input, gradOutput, lr)
+   local nModule=input:size(self.inputDimension)
+
+   local offset = 1
+   for i=1,nModule do 
+      local module = self.modules[i];
+      local currentOutput = module.output
+      local currentGradInput = 
+         module:accUpdateGradParameters(input:select(self.inputDimension,i),
+                                        gradOutput:narrow(self.outputDimension, 
+                                                          offset, currentOutput:size(self.outputDimension)), lr)
+        
+      offset = offset + currentOutput:size(self.outputDimension)
+   end
+end
+ 
+function Parallel:zeroGradParameters()
+   for _,module in ipairs(self.modules) do
+      module:zeroGradParameters()
+   end
+end
+
+function Parallel:updateParameters(learningRate)
+   for _,module in ipairs(self.modules) do
+      module:updateParameters(learningRate)
+   end
+end
+
+function Parallel:share(mlp,...)
+   for i=1,#self.modules do
+      self.modules[i]:share(mlp.modules[i],...); 
+   end
+end
+
+function Parallel:parameters()
+   local function tinsert(to, from)
+      if type(from) == 'table' then
+         for i=1,#from do
+            tinsert(to,from[i])
+         end
+      else
+         table.insert(to,from)
+      end
+   end
+   local w = {}
+   local gw = {}
+   for i=1,#self.modules do
+      local mw,mgw = self.modules[i]:parameters()
+      if mw then
+         tinsert(w,mw)
+         tinsert(gw,mgw)
+      end
+   end
+   return w,gw
+end
diff --git a/ParallelTable.lua b/ParallelTable.lua
new file mode 100644
index 0000000..a97904f
--- /dev/null
+++ b/ParallelTable.lua
@@ -0,0 +1,71 @@
+local ParallelTable, parent = torch.class('nn.ParallelTable', 'nn.Module')
+
+function ParallelTable:__init()
+   parent.__init(self)
+   self.modules = {}
+   self.output = {}
+   self.gradInput = {}
+end
+
+function ParallelTable:add(module)
+   table.insert(self.modules, module)
+   return self
+end
+
+function ParallelTable:get(index)
+   return self.modules[index]
+end
+
+function ParallelTable:size()
+   return #self.modules 
+end
+
+function ParallelTable:updateOutput(input)
+   for i=1,#self.modules do
+      self.output[i] = self.modules[i]:updateOutput(input[i])
+   end
+   return self.output
+end
+
+
+function ParallelTable:updateGradInput(input, gradOutput)
+   for i,module in ipairs(self.modules) do
+      self.gradInput[i]= module:updateGradInput(input[i], gradOutput[i])
+   end
+   return self.gradInput
+end
+
+function ParallelTable:accGradParameters(input, gradOutput, scale)
+   scale = scale or 1
+   for i,module in ipairs(self.modules) do
+      module:accGradParameters(input[i], gradOutput[i], scale)
+   end
+end
+
+function ParallelTable:accUpdateGradParameters(input, gradOutput, lr)
+   lr = lr or 1
+   for i,module in ipairs(self.modules) do
+      module:accUpdateGradParameters(input[i], gradOutput[i], lr)
+   end
+end
+
+function ParallelTable:zeroGradParameters()
+   for _,module in ipairs(self.modules) do
+      module:zeroGradParameters()
+   end
+end
+
+function ParallelTable:updateParameters(learningRate)
+   for _,module in ipairs(self.modules) do
+      module:updateParameters(learningRate)
+   end
+end
+
+function ParallelTable:share(mlp,...)
+   for i=1,#self.modules do
+      self.modules[i]:share(mlp.modules[i],...); 
+   end
+end
+
+
+
diff --git a/Power.lua b/Power.lua
new file mode 100644
index 0000000..8052b3f
--- /dev/null
+++ b/Power.lua
@@ -0,0 +1,21 @@
+local Power, parent = torch.class('nn.Power','nn.Module')
+
+function Power:__init(p)
+   parent.__init(self)
+   self.pow = p
+   if not p then
+      error('nn.Power(power)')
+   end
+end
+
+function Power:updateOutput(input)
+   self.output:resizeAs(input):copy(input)
+   self.output:pow(self.pow)
+   return self.output
+end
+
+function Power:updateGradInput(input, gradOutput)
+   self.gradInput:resizeAs(input):copy(gradOutput)
+   self.gradInput:cmul(self.output):cdiv(input):mul(self.pow)
+   return self.gradInput
+end
diff --git a/Replicate.lua b/Replicate.lua
new file mode 100644
index 0000000..c30a86a
--- /dev/null
+++ b/Replicate.lua
@@ -0,0 +1,29 @@
+local Replicate, parent = torch.class('nn.Replicate','nn.Module')
+
+function Replicate:__init(nf)
+   parent.__init(self)
+   self.nfeatures = nf
+end
+
+function Replicate:updateOutput(input)
+   local sz = torch.LongStorage(input:dim()+1)
+   sz[1] = self.nfeatures
+   for i = 1,input:dim() do
+      sz[i+1] = input:size(i)
+   end
+   local st = torch.LongStorage(input:dim()+1)
+   st[1] = 0
+   for i = 1,input:dim() do
+      st[i+1] = input:stride(i)
+   end
+   self.output = input.new(input:storage(),input:storageOffset(),sz,st)
+   return self.output
+end
+
+function Replicate:updateGradInput(input, gradOutput)
+   self.gradInput:resizeAs(input):zero()
+   for k = 1,gradOutput:size(1) do
+      self.gradInput:add(gradOutput[k])
+   end
+   return self.gradInput
+end
diff --git a/Reshape.lua b/Reshape.lua
new file mode 100644
index 0000000..0be793f
--- /dev/null
+++ b/Reshape.lua
@@ -0,0 +1,38 @@
+local Reshape, parent = torch.class('nn.Reshape', 'nn.Module')
+
+function Reshape:__init(...)
+   parent.__init(self)
+   self.size = torch.LongStorage()
+   self.batchsize = torch.LongStorage()
+   local n = select('#', ...)
+   if n == 1 and torch.typename(select(1, ...)) == 'torch.LongStorage' then
+      self.size:resize(#select(1, ...)):copy(select(1, ...))
+   else
+      self.size:resize(n)
+      self.batchsize:resize(n+1)
+      self.nelement = 1
+      for i=1,n do
+         self.size[i] = select(i, ...)
+         self.batchsize[i+1] = select(i, ...)
+         self.nelement = self.nelement * self.size[i]
+      end
+   end
+end
+
+function Reshape:updateOutput(input)
+   input = input:contiguous()
+   local nelement = input:nElement()
+   if nelement == self.nelement then
+      self.output:set(input):resize(self.size)
+   else
+      self.batchsize[1] = input:size(1)
+      self.output:set(input):resize(self.batchsize)
+   end
+   return self.output
+end
+
+function Reshape:updateGradInput(input, gradOutput)
+   gradOutput = gradOutput:contiguous()
+   self.gradInput:set(gradOutput):resizeAs(input)
+   return self.gradInput
+end
diff --git a/Select.lua b/Select.lua
new file mode 100644
index 0000000..acf8e06
--- /dev/null
+++ b/Select.lua
@@ -0,0 +1,20 @@
+local Select, parent = torch.class('nn.Select', 'nn.Module')
+
+function Select:__init(dimension,index)
+   parent.__init(self)
+   self.dimension = dimension
+   self.index = index 
+end
+
+function Select:updateOutput(input)
+   local output = input:select(self.dimension,self.index);
+   self.output:resizeAs(output)
+   return self.output:copy(output)
+end
+
+function Select:updateGradInput(input, gradOutput)
+   self.gradInput:resizeAs(input)  
+   self.gradInput:zero()
+   self.gradInput:select(self.dimension,self.index):copy(gradOutput) 
+   return self.gradInput
+end 
diff --git a/Sequential.lua b/Sequential.lua
new file mode 100644
index 0000000..3e23350
--- /dev/null
+++ b/Sequential.lua
@@ -0,0 +1,129 @@
+local Sequential, parent = torch.class('nn.Sequential', 'nn.Module')
+
+function Sequential:__init()
+   self.modules = {}
+end
+
+function Sequential:add(module)
+   if #self.modules == 0 then
+      self.gradInput = module.gradInput
+   end
+   table.insert(self.modules, module)
+   self.output = module.output
+   return self
+end
+
+function Sequential:size()
+   return #self.modules
+end
+
+function Sequential:get(index)
+   return self.modules[index]
+end
+
+function Sequential:updateOutput(input)
+   local currentOutput = input
+   for i=1,#self.modules do 
+      currentOutput = self.modules[i]:updateOutput(currentOutput)
+   end 
+   self.output = currentOutput
+   return currentOutput
+end
+
+function Sequential:updateGradInput(input, gradOutput)
+   local currentGradOutput = gradOutput
+   local currentModule = self.modules[#self.modules]
+   for i=#self.modules-1,1,-1 do
+      local previousModule = self.modules[i]
+      currentGradOutput = currentModule:updateGradInput(previousModule.output, currentGradOutput)
+      currentModule = previousModule
+   end
+   currentGradOutput = currentModule:updateGradInput(input, currentGradOutput)
+   self.gradInput = currentGradOutput
+   return currentGradOutput
+end
+
+function Sequential:accGradParameters(input, gradOutput, scale)
+   scale = scale or 1
+
+   local currentGradOutput = gradOutput
+   local currentModule = self.modules[#self.modules]
+   for i=#self.modules-1,1,-1 do
+      local previousModule = self.modules[i]
+      currentModule:accGradParameters(previousModule.output, currentGradOutput, scale)
+      currentGradOutput = currentModule.gradInput
+      currentModule = previousModule
+   end
+   
+   currentModule:accGradParameters(input, currentGradOutput, scale)
+end
+
+function Sequential:accUpdateGradParameters(input, gradOutput, lr)
+   local currentGradOutput = gradOutput
+   local currentModule = self.modules[#self.modules]
+   for i=#self.modules-1,1,-1 do
+      local previousModule = self.modules[i]
+      currentModule:accUpdateGradParameters(previousModule.output, currentGradOutput, lr)
+      currentGradOutput = currentModule.gradInput
+      currentModule = previousModule
+   end
+   
+   currentModule:accUpdateGradParameters(input, currentGradOutput, lr)
+end
+
+function Sequential:zeroGradParameters()
+  for i=1,#self.modules do
+     self.modules[i]:zeroGradParameters()
+  end
+end
+
+function Sequential:updateParameters(learningRate)
+   for i=1,#self.modules do
+      self.modules[i]:updateParameters(learningRate)
+   end
+end
+
+function Sequential:share(mlp,...)
+   for i=1,#self.modules do
+      self.modules[i]:share(mlp.modules[i],...); 
+   end
+end
+
+function Sequential:parameters()
+   local function tinsert(to, from)
+      if type(from) == 'table' then
+         for i=1,#from do
+            tinsert(to,from[i])
+         end
+      else
+         table.insert(to,from)
+      end
+   end
+   local w = {}
+   local gw = {}
+   for i=1,#self.modules do
+      local mw,mgw = self.modules[i]:parameters()
+      if mw then
+         tinsert(w,mw)
+         tinsert(gw,mgw)
+      end
+   end
+   return w,gw
+end
+
+function Sequential:__tostring__()
+   local tab = '  '
+   local line = '\n'
+   local next = ' -> '
+   local str = 'nn.Sequential'
+   str = str .. ' {' .. line .. tab .. '[input'
+   for i=1,#self.modules do
+      str = str .. next .. '(' .. i .. ')'
+   end
+   str = str .. next .. 'output]'
+   for i=1,#self.modules do
+      str = str .. line .. tab .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab)
+   end
+   str = str .. line .. '}'
+   return str
+end
diff --git a/Sigmoid.lua b/Sigmoid.lua
new file mode 100644
index 0000000..efde004
--- /dev/null
+++ b/Sigmoid.lua
@@ -0,0 +1,9 @@
+local Sigmoid = torch.class('nn.Sigmoid', 'nn.Module')
+
+function Sigmoid:updateOutput(input)
+   return input.nn.Sigmoid_updateOutput(self, input)
+end
+
+function Sigmoid:updateGradInput(input, gradOutput)
+   return input.nn.Sigmoid_updateGradInput(self, input, gradOutput)
+end
diff --git a/SoftMax.lua b/SoftMax.lua
new file mode 100644
index 0000000..609b353
--- /dev/null
+++ b/SoftMax.lua
@@ -0,0 +1,9 @@
+local SoftMax, parent = torch.class('nn.SoftMax', 'nn.Module')
+
+function SoftMax:updateOutput(input)
+   return input.nn.SoftMax_updateOutput(self, input)
+end
+
+function SoftMax:updateGradInput(input, gradOutput)
+   return input.nn.SoftMax_updateGradInput(self, input, gradOutput)
+end
diff --git a/SoftMin.lua b/SoftMin.lua
new file mode 100644
index 0000000..90c6c60
--- /dev/null
+++ b/SoftMin.lua
@@ -0,0 +1,15 @@
+local SoftMin, parent = torch.class('nn.SoftMin', 'nn.Module')
+
+function SoftMin:updateOutput(input)
+   self.mininput = self.mininput or input.new()
+   self.mininput:resizeAs(input):copy(input):mul(-1)
+   return input.nn.SoftMax_updateOutput(self, self.mininput)
+end
+
+function SoftMin:updateGradInput(input, gradOutput)
+   self.mininput = self.mininput or input.new()
+   self.mininput:resizeAs(input):copy(input):mul(-1)
+   self.gradInput = input.nn.SoftMax_updateGradInput(self, self.mininput, gradOutput)
+   self.gradInput:mul(-1)
+   return self.gradInput
+end
diff --git a/SoftPlus.lua b/SoftPlus.lua
new file mode 100644
index 0000000..18d586a
--- /dev/null
+++ b/SoftPlus.lua
@@ -0,0 +1,9 @@
+local SoftPlus = torch.class('nn.SoftPlus', 'nn.Module')
+
+function SoftPlus:updateOutput(input)
+   return input.nn.SoftPlus_updateOutput(self, input)
+end
+
+function SoftPlus:updateGradInput(input, gradOutput)
+   return input.nn.SoftPlus_updateGradInput(self, input, gradOutput)
+end
diff --git a/SoftShrink.lua b/SoftShrink.lua
new file mode 100644
index 0000000..379dc61
--- /dev/null
+++ b/SoftShrink.lua
@@ -0,0 +1,16 @@
+local SoftShrink, parent = torch.class('nn.SoftShrink', 'nn.Module')
+
+function SoftShrink:__init(lam)
+   parent.__init(self)
+   self.lambda = lam or 0.5
+end
+
+function SoftShrink:updateOutput(input)
+   input.nn.SoftShrink_updateOutput(self, input)
+   return self.output
+end
+
+function SoftShrink:updateGradInput(input, gradOutput)
+   input.nn.SoftShrink_updateGradInput(self, input, gradOutput)
+   return self.gradInput
+end
diff --git a/SoftSign.lua b/SoftSign.lua
new file mode 100644
index 0000000..480894c
--- /dev/null
+++ b/SoftSign.lua
@@ -0,0 +1,15 @@
+local SoftSign = torch.class('nn.SoftSign', 'nn.Module')
+
+function SoftSign:updateOutput(input)
+   self.temp = self.temp or input.new()
+   self.temp:resizeAs(input):copy(input):abs():add(1)
+   self.output:resizeAs(input):copy(input):cdiv(self.temp)
+   return self.output
+end
+
+function SoftSign:updateGradInput(input, gradOutput)
+   self.tempgrad = self.tempgrad or input.new()
+   self.tempgrad:resizeAs(self.output):copy(input):abs():add(1):cmul(self.tempgrad)
+   self.gradInput:resizeAs(input):copy(gradOutput):cdiv(self.tempgrad)
+   return self.gradInput
+end
diff --git a/SparseLinear.lua b/SparseLinear.lua
new file mode 100644
index 0000000..ec8845e
--- /dev/null
+++ b/SparseLinear.lua
@@ -0,0 +1,42 @@
+local SparseLinear, parent = torch.class('nn.SparseLinear', 'nn.Module')
+
+function SparseLinear:__init(inputSize, outputSize)
+   parent.__init(self)
+
+   self.weightDecay = 0
+   self.weight = torch.Tensor(outputSize, inputSize)
+   self.bias = torch.Tensor(outputSize)
+   self.gradWeight = torch.Tensor(outputSize, inputSize)
+   self.gradBias = torch.Tensor(outputSize)
+   self.lastInput = torch.Tensor()
+   -- state
+   self.gradInput:resize(inputSize)
+   self.output:resize(outputSize)
+
+   self:reset()
+end
+
+function SparseLinear:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1./math.sqrt(self.weight:size(1))
+   end
+
+   -- we do this so the initialization is exactly
+   -- the same than in previous torch versions
+   for i=1,self.weight:size(1) do
+      self.weight:select(1, i):apply(function()
+                                        return torch.uniform(-stdv, stdv)
+                                     end)
+      self.bias[i] = torch.uniform(-stdv, stdv) * 0.000001
+   end
+end
+
+function SparseLinear:updateOutput(input)
+   return input.nn.SparseLinear_updateOutput(self, input)
+end
+
+function SparseLinear:accGradParameters(input, gradOutput, scale)
+   return input.nn.SparseLinear_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua
new file mode 100644
index 0000000..38d2737
--- /dev/null
+++ b/SpatialConvolution.lua
@@ -0,0 +1,50 @@
+local SpatialConvolution, parent = torch.class('nn.SpatialConvolution', 'nn.Module')
+
+function SpatialConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH)
+   parent.__init(self)
+
+   dW = dW or 1
+   dH = dH or 1
+
+   self.nInputPlane = nInputPlane
+   self.nOutputPlane = nOutputPlane
+   self.kW = kW
+   self.kH = kH
+   self.dW = dW
+   self.dH = dH
+
+   self.weight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW)
+   self.bias = torch.Tensor(nOutputPlane)
+   self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW)
+   self.gradBias = torch.Tensor(nOutputPlane)
+   
+   self:reset()
+end
+
+function SpatialConvolution:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane)
+   end
+   self.weight:apply(function()
+                        return torch.uniform(-stdv, stdv)
+                     end)
+   self.bias:apply(function()
+                      return torch.uniform(-stdv, stdv)
+                   end)   
+end
+
+function SpatialConvolution:updateOutput(input)
+   return input.nn.SpatialConvolution_updateOutput(self, input)
+end
+
+function SpatialConvolution:updateGradInput(input, gradOutput)
+   if self.gradInput then
+      return input.nn.SpatialConvolution_updateGradInput(self, input, gradOutput)
+   end
+end
+
+function SpatialConvolution:accGradParameters(input, gradOutput, scale)
+   return input.nn.SpatialConvolution_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/SpatialConvolutionMap.lua b/SpatialConvolutionMap.lua
new file mode 100644
index 0000000..0dbff2f
--- /dev/null
+++ b/SpatialConvolutionMap.lua
@@ -0,0 +1,119 @@
+local SpatialConvolutionMap, parent = torch.class('nn.SpatialConvolutionMap', 'nn.Module')
+
+nn.tables = nn.tables or {}
+
+function nn.tables.full(nin, nout)
+   local ft = torch.Tensor(nin*nout,2)
+   local p = 1
+   for j=1,nout do
+      for i=1,nin do
+	 ft[p][1] = i
+	 ft[p][2] = j
+	 p = p + 1
+      end
+   end
+   return ft
+end
+
+function nn.tables.oneToOne(nfeat)
+   local ft = torch.Tensor(nfeat,2)
+   for i=1,nfeat do
+      ft[i][1] = i
+      ft[i][2] = i
+   end
+   return ft
+end
+
+function nn.tables.random(nin, nout, nto)
+   local nker = nto * nout
+   local tbl = torch.Tensor(nker, 2)
+   local fi = torch.randperm(nin)
+   local frcntr = 1
+   local tocntr = 1
+   local nfi = math.floor(nin/nto) -- number of distinct nto chunks 
+   local rfi = math.mod(nin,nto) -- number of remaining from maps
+   local totbl = tbl:select(2,2)
+   local frtbl = tbl:select(2,1)
+   local fitbl = fi:narrow(1, 1, (nfi * nto)) -- part of fi that covers distinct chunks
+   local ufrtbl= frtbl:unfold(1, nto, nto)
+   local utotbl= totbl:unfold(1, nto, nto)
+   local ufitbl= fitbl:unfold(1, nto, nto)
+   
+   -- start filling frtbl
+   for i=1,nout do -- fro each unit in target map
+      ufrtbl:select(1,i):copy(ufitbl:select(1,frcntr))
+      frcntr = frcntr + 1
+      if frcntr-1 ==  nfi then -- reset fi
+	 fi:copy(torch.randperm(nin))
+	 frcntr = 1
+      end
+   end
+   for tocntr=1,utotbl:size(1) do
+      utotbl:select(1,tocntr):fill(tocntr)
+   end
+   return tbl
+end
+
+function SpatialConvolutionMap:__init(conMatrix, kW, kH, dW, dH)
+   parent.__init(self)
+
+   dW = dW or 1
+   dH = dH or 1
+
+   self.kW = kW
+   self.kH = kH
+   self.dW = dW
+   self.dH = dH
+   self.connTable = conMatrix
+   self.nInputPlane = self.connTable:select(2,1):maxall()
+   self.nOutputPlane = self.connTable:select(2,2):maxall()
+
+   self.weight = torch.Tensor(self.connTable:size(1), kH, kW)
+   self.bias = torch.Tensor(self.nOutputPlane)
+   self.gradWeight = torch.Tensor(self.connTable:size(1), kH, kW)
+   self.gradBias = torch.Tensor(self.nOutputPlane)
+   
+   self:reset()
+end
+
+function SpatialConvolutionMap:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+      self.weight:apply(function()
+			   return torch.uniform(-stdv, stdv)
+			end)
+      self.bias:apply(function()
+			 return torch.uniform(-stdv, stdv)
+		      end)
+   else
+      local ninp = torch.Tensor(self.nOutputPlane):zero()
+      for i=1,self.connTable:size(1) do ninp[self.connTable[i][2]] =  ninp[self.connTable[i][2]]+1 end
+      for k=1,self.connTable:size(1) do
+	 stdv = 1/math.sqrt(self.kW*self.kH*ninp[self.connTable[k][2]])
+	 self.weight:select(1,k):apply(function() return torch.uniform(-stdv,stdv) end)
+      end
+      for k=1,self.bias:size(1) do
+	 stdv = 1/math.sqrt(self.kW*self.kH*ninp[k])
+	 self.bias[k] = torch.uniform(-stdv,stdv)
+      end
+   end
+end
+
+function SpatialConvolutionMap:updateOutput(input)
+   input.nn.SpatialConvolutionMap_updateOutput(self, input)
+   return self.output
+end
+
+function SpatialConvolutionMap:updateGradInput(input, gradOutput)
+   input.nn.SpatialConvolutionMap_updateGradInput(self, input, gradOutput)
+   return self.gradInput
+end
+
+function SpatialConvolutionMap:accGradParameters(input, gradOutput, scale)
+   return input.nn.SpatialConvolutionMap_accGradParameters(self, input, gradOutput, scale)
+end
+
+function SpatialConvolutionMap:decayParameters(decay)
+   self.weight:add(-decay, self.weight)
+   self.bias:add(-decay, self.bias)
+end
diff --git a/SpatialLPPooling.lua b/SpatialLPPooling.lua
new file mode 100644
index 0000000..9b9c87d
--- /dev/null
+++ b/SpatialLPPooling.lua
@@ -0,0 +1,32 @@
+local SpatialLPPooling, parent = torch.class('nn.SpatialLPPooling', 'nn.Sequential')
+
+function SpatialLPPooling:__init(nInputPlane, pnorm, kW, kH, dW, dH)
+   parent.__init(self)
+
+   dW = dW or kW
+   dH = dH or kH
+   
+   self.kW = kW
+   self.kH = kH
+   self.dW = dW
+   self.dH = dH
+
+   self.nInputPlane = nInputPlane
+   self.learnKernel = learnKernel
+
+   if pnorm == 2 then
+      self:add(nn.Square())
+   else
+      self:add(nn.Power(pnorm))
+   end
+   self:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(nInputPlane), kW, kH, dW, dH))
+   if pnorm == 2 then
+      self:add(nn.Sqrt())
+   else
+      self:add(nn.Power(1/pnorm))
+   end
+
+   self:get(2).bias:zero()
+   self:get(2).weight:fill(1/(kW*kH))
+   self:get(2).accGradParameters = nil
+end
diff --git a/SpatialMaxPooling.lua b/SpatialMaxPooling.lua
new file mode 100644
index 0000000..21197ac
--- /dev/null
+++ b/SpatialMaxPooling.lua
@@ -0,0 +1,34 @@
+local SpatialMaxPooling, parent = torch.class('nn.SpatialMaxPooling', 'nn.Module')
+
+function SpatialMaxPooling:__init(kW, kH, dW, dH)
+   parent.__init(self)
+
+   dW = dW or kW
+   dH = dH or kH
+   
+   self.kW = kW
+   self.kH = kH
+   self.dW = dW
+   self.dH = dH
+
+   self.indices = torch.Tensor()
+end
+
+function SpatialMaxPooling:updateOutput(input)
+   input.nn.SpatialMaxPooling_updateOutput(self, input)
+   return self.output
+end
+
+function SpatialMaxPooling:updateGradInput(input, gradOutput)
+   input.nn.SpatialMaxPooling_updateGradInput(self, input, gradOutput)
+   return self.gradInput
+end
+
+function SpatialMaxPooling:empty()
+   self.gradInput:resize()
+   self.gradInput:storage():resize(0)
+   self.output:resize()
+   self.output:storage():resize(0)
+   self.indices:resize()
+   self.indices:storage():resize(0)
+end
diff --git a/SpatialSubSampling.lua b/SpatialSubSampling.lua
new file mode 100644
index 0000000..48b32b9
--- /dev/null
+++ b/SpatialSubSampling.lua
@@ -0,0 +1,49 @@
+local SpatialSubSampling, parent = torch.class('nn.SpatialSubSampling', 'nn.Module')
+
+function SpatialSubSampling:__init(nInputPlane, kW, kH, dW, dH)
+   parent.__init(self)
+
+   dW = dW or 1
+   dH = dH or 1
+
+   self.nInputPlane = nInputPlane
+   self.kW = kW
+   self.kH = kH
+   self.dW = dW
+   self.dH = dH
+
+   self.weight = torch.Tensor(nInputPlane)
+   self.bias = torch.Tensor(nInputPlane)
+   self.gradWeight = torch.Tensor(nInputPlane)
+   self.gradBias = torch.Tensor(nInputPlane)
+   
+   self:reset()
+end
+
+function SpatialSubSampling:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1/math.sqrt(self.kW*self.kH)
+   end
+   self.weight:apply(function()
+                        return torch.uniform(-stdv, stdv)
+                     end)
+   self.bias:apply(function()
+                      return torch.uniform(-stdv, stdv)
+                   end)   
+end
+
+function SpatialSubSampling:updateOutput(input)
+   return input.nn.SpatialSubSampling_updateOutput(self, input)
+end
+
+function SpatialSubSampling:updateGradInput(input, gradOutput)
+   if self.gradInput then
+      return input.nn.SpatialSubSampling_updateGradInput(self, input, gradOutput)
+   end
+end
+
+function SpatialSubSampling:accGradParameters(input, gradOutput, scale)
+   return input.nn.SpatialSubSampling_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/SpatialSubtractiveNormalization.lua b/SpatialSubtractiveNormalization.lua
new file mode 100644
index 0000000..4df0fc1
--- /dev/null
+++ b/SpatialSubtractiveNormalization.lua
@@ -0,0 +1,104 @@
+local SpatialSubtractiveNormalization, parent = torch.class('nn.SpatialSubtractiveNormalization','nn.Module')
+
+function SpatialSubtractiveNormalization:__init(nInputPlane, kernel)
+   parent.__init(self)
+
+   -- get args
+   self.nInputPlane = nInputPlane or 1
+   self.kernel = kernel or torch.Tensor(9,9):fill(1)
+   local kdim = self.kernel:nDimension()
+
+   -- check args
+   if kdim ~= 2 and kdim ~= 1 then
+      error('<SpatialSubtractiveNormalization> averaging kernel must be 2D or 1D')
+   end
+   if (self.kernel:size(1) % 2) == 0 or (kdim == 2 and (self.kernel:size(2) % 2) == 0) then
+      error('<SpatialSubtractiveNormalization> averaging kernel must have ODD dimensions')
+   end
+
+   -- normalize kernel
+   self.kernel:div(self.kernel:sumall() * self.nInputPlane)
+
+   -- padding values
+   local padH = math.floor(self.kernel:size(1)/2)
+   local padW = padH
+   if kdim == 2 then
+      padW = math.floor(self.kernel:size(2)/2)
+   end
+
+   -- create convolutional mean extractor
+   self.meanestimator = nn.Sequential()
+   self.meanestimator:add(nn.SpatialZeroPadding(padW, padW, padH, padH))
+   if kdim == 2 then
+      self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane),
+                                                      self.kernel:size(2), self.kernel:size(1)))
+   else
+      self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane),
+                                                      self.kernel:size(1), 1))
+      self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane),
+                                                      1, self.kernel:size(1)))
+   end
+   self.meanestimator:add(nn.Sum(1))
+   self.meanestimator:add(nn.Replicate(self.nInputPlane))
+
+   -- set kernel and bias
+   if kdim == 2 then
+      for i = 1,self.nInputPlane do 
+         self.meanestimator.modules[2].weight[i] = self.kernel
+      end
+      self.meanestimator.modules[2].bias:zero()
+   else
+      for i = 1,self.nInputPlane do 
+         self.meanestimator.modules[2].weight[i]:copy(self.kernel)
+         self.meanestimator.modules[3].weight[i]:copy(self.kernel)
+      end
+      self.meanestimator.modules[2].bias:zero()
+      self.meanestimator.modules[3].bias:zero()
+   end
+
+   -- other operation
+   self.subtractor = nn.CSubTable()
+   self.divider = nn.CDivTable()
+
+   -- coefficient array, to adjust side effects
+   self.coef = torch.Tensor(1,1,1)
+end
+
+function SpatialSubtractiveNormalization:updateOutput(input)
+   -- compute side coefficients
+   if (input:size(3) ~= self.coef:size(2)) or (input:size(2) ~= self.coef:size(1)) then
+      local ones = input.new():resizeAs(input):fill(1)
+      self.coef = self.meanestimator:updateOutput(ones)
+      self.coef = self.coef:clone()
+   end
+
+   -- compute mean
+   self.localsums = self.meanestimator:updateOutput(input)
+   self.adjustedsums = self.divider:updateOutput{self.localsums, self.coef}
+   self.output = self.subtractor:updateOutput{input, self.adjustedsums}
+
+   -- done
+   return self.output
+end
+
+function SpatialSubtractiveNormalization:updateGradInput(input, gradOutput)
+   -- resize grad
+   self.gradInput:resizeAs(input):zero()
+
+   -- backprop through all modules
+   local gradsub = self.subtractor:updateGradInput({input, self.adjustedsums}, gradOutput)
+   local graddiv = self.divider:updateGradInput({self.localsums, self.coef}, gradsub[2])
+   self.gradInput:add(self.meanestimator:updateGradInput(input, graddiv[1]))
+   self.gradInput:add(gradsub[1])
+
+   -- done
+   return self.gradInput
+end
+
+function SpatialSubtractiveNormalization:type(type)
+   parent.type(self,type)
+   self.meanestimator:type(type)
+   self.divider:type(type)
+   self.subtractor:type(type)
+   return self
+end
diff --git a/SpatialZeroPadding.lua b/SpatialZeroPadding.lua
new file mode 100644
index 0000000..af03e71
--- /dev/null
+++ b/SpatialZeroPadding.lua
@@ -0,0 +1,53 @@
+local SpatialZeroPadding, parent = torch.class('nn.SpatialZeroPadding', 'nn.Module')
+
+function SpatialZeroPadding:__init(pad_l, pad_r, pad_t, pad_b)
+   parent.__init(self)
+   self.pad_l = pad_l
+   self.pad_r = pad_r or self.pad_l
+   self.pad_t = pad_t or self.pad_l
+   self.pad_b = pad_b or self.pad_l
+end
+
+function SpatialZeroPadding:updateOutput(input)
+   if input:dim() ~= 3 then error('input must be 3-dimensional') end
+   local h = input:size(2) + self.pad_t + self.pad_b
+   local w = input:size(3) + self.pad_l + self.pad_r
+   if w < 1 or h < 1 then error('input is too small') end
+   self.output:resize(input:size(1), h, w)
+   self.output:zero()
+   -- crop input if necessary
+   local c_input = input
+   if self.pad_t < 0 then c_input = c_input:narrow(2, 1 - self.pad_t, c_input:size(2) + self.pad_t) end
+   if self.pad_b < 0 then c_input = c_input:narrow(2, 1, c_input:size(2) + self.pad_b) end
+   if self.pad_l < 0 then c_input = c_input:narrow(3, 1 - self.pad_l, c_input:size(3) + self.pad_l) end
+   if self.pad_r < 0 then c_input = c_input:narrow(3, 1, c_input:size(3) + self.pad_r) end
+   -- crop outout if necessary
+   local c_output = self.output
+   if self.pad_t > 0 then c_output = c_output:narrow(2, 1 + self.pad_t, c_output:size(2) - self.pad_t) end
+   if self.pad_b > 0 then c_output = c_output:narrow(2, 1, c_output:size(2) - self.pad_b) end
+   if self.pad_l > 0 then c_output = c_output:narrow(3, 1 + self.pad_l, c_output:size(3) - self.pad_l) end
+   if self.pad_r > 0 then c_output = c_output:narrow(3, 1, c_output:size(3) - self.pad_r) end
+   -- copy input to output
+   c_output:copy(c_input)
+   return self.output
+end
+
+function SpatialZeroPadding:updateGradInput(input, gradOutput)
+   if input:dim() ~= 3 then error('input must be 3-dimensional') end
+   self.gradInput:resizeAs(input):zero()
+   -- crop gradInput if necessary
+   local cg_input = self.gradInput
+   if self.pad_t < 0 then cg_input = cg_input:narrow(2, 1 - self.pad_t, cg_input:size(2) + self.pad_t) end
+   if self.pad_b < 0 then cg_input = cg_input:narrow(2, 1, cg_input:size(2) + self.pad_b) end
+   if self.pad_l < 0 then cg_input = cg_input:narrow(3, 1 - self.pad_l, cg_input:size(3) + self.pad_l) end
+   if self.pad_r < 0 then cg_input = cg_input:narrow(3, 1, cg_input:size(3) + self.pad_r) end
+   -- crop gradOutout if necessary
+   local cg_output = gradOutput
+   if self.pad_t > 0 then cg_output = cg_output:narrow(2, 1 + self.pad_t, cg_output:size(2) - self.pad_t) end
+   if self.pad_b > 0 then cg_output = cg_output:narrow(2, 1, cg_output:size(2) - self.pad_b) end
+   if self.pad_l > 0 then cg_output = cg_output:narrow(3, 1 + self.pad_l, cg_output:size(3) - self.pad_l) end
+   if self.pad_r > 0 then cg_output = cg_output:narrow(3, 1, cg_output:size(3) - self.pad_r) end
+   -- copy gradOuput to gradInput
+   cg_input:copy(cg_output)
+   return self.gradInput
+end
diff --git a/SplitTable.lua b/SplitTable.lua
new file mode 100644
index 0000000..d2c690e
--- /dev/null
+++ b/SplitTable.lua
@@ -0,0 +1,30 @@
+local SplitTable, parent = torch.class('nn.SplitTable', 'nn.Module')
+
+function SplitTable:__init(dimension)
+   parent.__init(self)
+   self.modules = {} 
+   self.dimension = dimension
+end
+
+function SplitTable:updateOutput(input)
+   local currentOutput= {};
+   local slices = input:size(self.dimension)
+   for i=1,slices do
+      currentOutput[#currentOutput+1] = input:select(self.dimension,i)
+   end
+   self.output = currentOutput
+   return self.output
+end 
+
+
+function SplitTable:updateGradInput(input, gradOutput)
+   local slices = input:size(self.dimension)
+   self.gradInput:resizeAs(input)
+
+   local offset = 1
+   for i=1,slices do 
+      local currentGradInput = gradOutput[i];        
+      self.gradInput:select(self.dimension,i):copy(currentGradInput)
+   end
+   return self.gradInput
+end
diff --git a/Sqrt.lua b/Sqrt.lua
new file mode 100644
index 0000000..664d434
--- /dev/null
+++ b/Sqrt.lua
@@ -0,0 +1,13 @@
+local Sqrt, parent = torch.class('nn.Sqrt','nn.Module')
+
+function Sqrt:__init(args)
+   parent.__init(self)
+end
+
+function Sqrt:updateOutput(input)
+   return input.nn.Sqrt_updateOutput(self,input)
+end
+
+function Sqrt:updateGradInput(input, gradOutput)
+   return input.nn.Sqrt_updateGradInput(self,input,gradOutput)
+end
diff --git a/Square.lua b/Square.lua
new file mode 100644
index 0000000..c1b80dc
--- /dev/null
+++ b/Square.lua
@@ -0,0 +1,13 @@
+local Square, parent = torch.class('nn.Square','nn.Module')
+
+function Square:__init(args)
+   parent.__init(self)
+end
+
+function Square:updateOutput(input)
+   return input.nn.Square_updateOutput(self, input)
+end
+
+function Square:updateGradInput(input, gradOutput)
+   return input.nn.Square_updateGradInput(self, input, gradOutput)
+end
diff --git a/StochasticGradient.lua b/StochasticGradient.lua
new file mode 100644
index 0000000..2d5e810
--- /dev/null
+++ b/StochasticGradient.lua
@@ -0,0 +1,57 @@
+local StochasticGradient = torch.class('nn.StochasticGradient')
+
+function StochasticGradient:__init(module, criterion)
+   self.learningRate = 0.01
+   self.learningRateDecay = 0
+   self.maxIteration = 25
+   self.shuffleIndices = true
+   self.module = module
+   self.criterion = criterion
+end
+
+function StochasticGradient:train(dataset)
+   local iteration = 1
+   local currentLearningRate = self.learningRate
+   local module = self.module
+   local criterion = self.criterion
+
+   local shuffledIndices = torch.randperm(dataset:size(), 'torch.LongTensor')
+   if not self.shuffleIndices then
+      for t = 1,dataset:size() do
+         shuffledIndices[t] = t
+      end
+   end
+
+   print("# StochasticGradient: training")
+
+   while true do
+      local currentError = 0
+      for t = 1,dataset:size() do
+         local example = dataset[shuffledIndices[t]]
+         local input = example[1]
+         local target = example[2]
+
+         currentError = currentError + criterion:forward(module:forward(input), target)
+
+         module:updateGradInput(input, criterion:updateGradInput(module.output, target))
+         module:accUpdateGradParameters(input, criterion.gradInput, currentLearningRate)
+
+         if self.hookExample then
+            self.hookExample(self, example)
+         end
+      end
+
+      if self.hookIteration then
+         self.hookIteration(self, iteration)
+      end
+
+      currentError = currentError / dataset:size()
+      print("# current error = " .. currentError)
+      iteration = iteration + 1
+      currentLearningRate = self.learningRate/(1+iteration*self.learningRateDecay)
+      if self.maxIteration > 0 and iteration > self.maxIteration then
+         print("# StochasticGradient: you have reached the maximum number of iterations")
+         break
+      end
+   end
+end
diff --git a/Sum.lua b/Sum.lua
new file mode 100644
index 0000000..b068e25
--- /dev/null
+++ b/Sum.lua
@@ -0,0 +1,27 @@
+local Sum, parent = torch.class('nn.Sum', 'nn.Module')
+
+function Sum:__init(dimension)
+   parent.__init(self)
+   dimension = dimension or 1
+   self.dimension = dimension
+end
+
+function Sum:updateOutput(input)
+   input.torch.sum(self.output, input, self.dimension)
+   self.output = self.output:select(self.dimension, 1)
+   return self.output
+end
+
+function Sum:updateGradInput(input, gradOutput)
+   local size = gradOutput:size():totable()
+   local stride = gradOutput:stride():totable()
+   table.insert(size, self.dimension, input:size(self.dimension))
+   table.insert(stride, self.dimension, 0)
+
+   self.gradInput:set(gradOutput:storage(),
+                      1,
+                      torch.LongStorage(size),
+                      torch.LongStorage(stride))
+                      
+   return self.gradInput
+end
diff --git a/Tanh.lua b/Tanh.lua
new file mode 100644
index 0000000..b6cf1bf
--- /dev/null
+++ b/Tanh.lua
@@ -0,0 +1,9 @@
+local Tanh = torch.class('nn.Tanh', 'nn.Module')
+
+function Tanh:updateOutput(input)
+   return input.nn.Tanh_updateOutput(self, input)
+end
+
+function Tanh:updateGradInput(input, gradOutput)
+   return input.nn.Tanh_updateGradInput(self, input, gradOutput)
+end
diff --git a/TemporalConvolution.lua b/TemporalConvolution.lua
new file mode 100644
index 0000000..a3aaa7f
--- /dev/null
+++ b/TemporalConvolution.lua
@@ -0,0 +1,51 @@
+local TemporalConvolution, parent = torch.class('nn.TemporalConvolution', 'nn.Module')
+
+function TemporalConvolution:__init(inputFrameSize, outputFrameSize, kW, dW)
+   parent.__init(self)
+
+   dW = dW or 1
+
+   self.inputFrameSize = inputFrameSize
+   self.outputFrameSize = outputFrameSize
+   self.kW = kW
+   self.dW = dW
+
+   self.weight = torch.Tensor(outputFrameSize, inputFrameSize*kW)
+   self.bias = torch.Tensor(outputFrameSize)
+   self.gradWeight = torch.Tensor(outputFrameSize, inputFrameSize*kW)
+   self.gradBias = torch.Tensor(outputFrameSize)
+   
+   self:reset()
+end
+
+function TemporalConvolution:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1/math.sqrt(self.kW*self.inputFrameSize)
+   end
+   self.weight:apply(function()
+                        return torch.uniform(-stdv, stdv)
+                     end)
+   self.bias:apply(function()
+                      return torch.uniform(-stdv, stdv)
+                   end)   
+end
+
+function TemporalConvolution:updateOutput(input)
+   return input.nn.TemporalConvolution_updateOutput(self, input)
+end
+
+function TemporalConvolution:updateGradInput(input, gradOutput)
+   if self.gradInput then
+      return input.nn.TemporalConvolution_updateGradInput(self, input, gradOutput)
+   end
+end
+
+function TemporalConvolution:accGradParameters(input, gradOutput, scale)
+   scale = scale or 1
+   input.nn.TemporalConvolution_accGradParameters(self, input, gradOutput, scale)
+end
+
+-- we do not need to accumulate parameters when sharing
+TemporalConvolution.sharedAccUpdateGradParameters = TemporalConvolution.accUpdateGradParameters
diff --git a/TemporalSubSampling.lua b/TemporalSubSampling.lua
new file mode 100644
index 0000000..3d06f6e
--- /dev/null
+++ b/TemporalSubSampling.lua
@@ -0,0 +1,48 @@
+local TemporalSubSampling, parent = torch.class('nn.TemporalSubSampling', 'nn.Module')
+
+function TemporalSubSampling:__init(inputFrameSize, kW, dW)
+   parent.__init(self)
+
+   dW = dW or 1
+
+   self.inputFrameSize = inputFrameSize
+   self.kW = kW
+   self.dW = dW
+
+   self.weight = torch.Tensor(inputFrameSize)
+   self.bias = torch.Tensor(inputFrameSize)
+   self.gradWeight = torch.Tensor(inputFrameSize)
+   self.gradBias = torch.Tensor(inputFrameSize)
+   
+   self:reset()
+end
+
+function TemporalSubSampling:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1/math.sqrt(self.kW)
+   end
+
+   self.weight:apply(function()
+                        return torch.uniform(-stdv, stdv)
+                     end)
+
+   self.bias:apply(function()
+                      return torch.uniform(-stdv, stdv)
+                   end)   
+end
+
+function TemporalSubSampling:updateOutput(input)
+   return input.nn.TemporalSubSampling_updateOutput(self, input)
+end
+
+function TemporalSubSampling:updateGradInput(input, gradOutput)
+   if self.gradInput then
+      return input.nn.TemporalSubSampling_updateGradInput(self, input, gradOutput)
+   end
+end
+
+function TemporalSubSampling:accGradParameters(input, gradOutput, scale)
+   return input.nn.TemporalSubSampling_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/Threshold.lua b/Threshold.lua
new file mode 100644
index 0000000..6083957
--- /dev/null
+++ b/Threshold.lua
@@ -0,0 +1,20 @@
+local Threshold, parent = torch.class('nn.Threshold','nn.Module')
+
+function Threshold:__init(th,v)
+   parent.__init(self)
+   self.threshold = th or 1e-6
+   self.val = v or 0
+   if (th and type(th) ~= 'number') or (v and type(v) ~= 'number') then
+      error('nn.Threshold(threshold, value)')
+   end
+end
+
+function Threshold:updateOutput(input)
+   input.nn.Threshold_updateOutput(self, input)
+   return self.output
+end
+
+function Threshold:updateGradInput(input, gradOutput)
+   input.nn.Threshold_updateGradInput(self, input, gradOutput)
+   return self.gradInput
+end
diff --git a/VolumetricConvolution.lua b/VolumetricConvolution.lua
new file mode 100644
index 0000000..4262199
--- /dev/null
+++ b/VolumetricConvolution.lua
@@ -0,0 +1,51 @@
+local VolumetricConvolution, parent = torch.class('nn.VolumetricConvolution', 'nn.Module')
+
+function VolumetricConvolution:__init(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH)
+   parent.__init(self)
+
+   dT = dT or 1
+   dW = dW or 1
+   dH = dH or 1
+
+   self.nInputPlane = nInputPlane
+   self.nOutputPlane = nOutputPlane
+   self.kT = kT
+   self.kW = kW
+   self.kH = kH
+   self.dT = dT
+   self.dW = dW
+   self.dH = dH
+
+   self.weight = torch.Tensor(nOutputPlane, nInputPlane, kT, kH, kW)
+   self.bias = torch.Tensor(nOutputPlane)
+   self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane, kT, kH, kW)
+   self.gradBias = torch.Tensor(nOutputPlane)
+   
+   self:reset()
+end
+
+function VolumetricConvolution:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1/math.sqrt(self.kT*self.kW*self.kH*self.nInputPlane)
+   end
+   self.weight:apply(function()
+                        return torch.uniform(-stdv, stdv)
+                     end)
+   self.bias:apply(function()
+                      return torch.uniform(-stdv, stdv)
+                   end)   
+end
+
+function VolumetricConvolution:updateOutput(input)
+   return input.nn.VolumetricConvolution_updateOutput(self, input)
+end
+
+function VolumetricConvolution:updateGradInput(input, gradOutput)
+   return input.nn.VolumetricConvolution_updateGradInput(self, input, gradOutput)
+end
+
+function VolumetricConvolution:accGradParameters(input, gradOutput, scale)
+   return input.nn.VolumetricConvolution_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/WeightedEuclidean.lua b/WeightedEuclidean.lua
new file mode 100644
index 0000000..2761228
--- /dev/null
+++ b/WeightedEuclidean.lua
@@ -0,0 +1,85 @@
+local WeightedEuclidean, parent = torch.class('nn.WeightedEuclidean', 'nn.Module')
+
+function WeightedEuclidean:__init(inputSize,outputSize)
+   parent.__init(self)
+
+   self.templates = torch.Tensor(inputSize,outputSize)
+   self.gradTemplates = torch.Tensor(inputSize,outputSize)
+
+   self.diagCov = torch.Tensor(inputSize,outputSize)
+   self.gradDiagCov = torch.Tensor(inputSize,outputSize)
+
+   self.gradInput:resize(inputSize)
+   self.output:resize(outputSize)
+   self.temp = torch.Tensor(inputSize)
+
+   -- for compat with Torch's modules (it's bad we have to do that)
+   do
+      self.weight = self.templates
+      self.gradWeight = self.gradTemplates
+      self.bias = self.diagCov
+      self.gradBias = self.gradDiagCov
+   end
+
+   self:reset()
+end
+
+function WeightedEuclidean:reset(stdv)
+   if stdv then
+      stdv = stdv * math.sqrt(3)
+   else
+      stdv = 1./math.sqrt(self.templates:size(1))
+   end
+
+   for i=1,self.templates:size(2) do
+      self.templates:select(2, i):apply(function()
+                                        return torch.uniform(-stdv, stdv)
+                                     end)
+   end
+
+   self.diagCov:fill(1)
+end
+
+function WeightedEuclidean:updateOutput(input)
+   self.output:zero()
+   for o = 1,self.templates:size(2) do
+      self.temp:copy(input):add(-1,self.templates:select(2,o))
+      self.temp:cmul(self.temp)
+      self.temp:cmul(self.diagCov:select(2,o)):cmul(self.diagCov:select(2,o))
+      self.output[o] = math.sqrt(self.temp:sumall())
+   end
+   return self.output
+end
+
+function WeightedEuclidean:updateGradInput(input, gradOutput)
+   self:forward(input)
+   self.gradInput:zero()
+   for o = 1,self.templates:size(2) do
+      if self.output[o] ~= 0 then
+         self.temp:copy(input):add(-1,self.templates:select(2,o))
+         self.temp:cmul(self.diagCov:select(2,o)):cmul(self.diagCov:select(2,o))
+         self.temp:mul(gradOutput[o]/self.output[o])
+         self.gradInput:add(self.temp)
+      end
+   end
+   return self.gradInput
+end
+
+function WeightedEuclidean:accGradParameters(input, gradOutput, scale)
+   self:forward(input)
+   scale = scale or 1
+   for o = 1,self.templates:size(2) do
+      if self.output[o] ~= 0 then
+         self.temp:copy(self.templates:select(2,o)):add(-1,input)
+         self.temp:cmul(self.diagCov:select(2,o)):cmul(self.diagCov:select(2,o))
+         self.temp:mul(gradOutput[o]/self.output[o])
+         self.gradTemplates:select(2,o):add(self.temp)
+
+         self.temp:copy(self.templates:select(2,o)):add(-1,input)
+         self.temp:cmul(self.temp)
+         self.temp:cmul(self.diagCov:select(2,o))
+         self.temp:mul(gradOutput[o]/self.output[o])
+         self.gradDiagCov:select(2,o):add(self.temp)
+      end
+   end
+end
diff --git a/dok/abs.png b/dok/abs.png
new file mode 100644
index 0000000..fa7f470
Binary files /dev/null and b/dok/abs.png differ
diff --git a/dok/exp.png b/dok/exp.png
new file mode 100644
index 0000000..07d28d4
Binary files /dev/null and b/dok/exp.png differ
diff --git a/dok/hshrink.png b/dok/hshrink.png
new file mode 100644
index 0000000..7f96292
Binary files /dev/null and b/dok/hshrink.png differ
diff --git a/dok/htanh.png b/dok/htanh.png
new file mode 100644
index 0000000..c8e6084
Binary files /dev/null and b/dok/htanh.png differ
diff --git a/dok/index.dok b/dok/index.dok
new file mode 100644
index 0000000..ded5265
--- /dev/null
+++ b/dok/index.dok
@@ -0,0 +1,3053 @@
+====== Neural Network Package =======
+{{anchor:nn.dok}}
+
+This package provides an easy way to build and train simple or complex
+neural networks.
+
+Each module of a network is composed of [[#nn.Modules|Modules]] and there
+are several sub-classes of ''Module'' available: container classes like
+[[#nn.Sequential|Sequential]], [[#nn.Parallel|Parallel]] and
+[[#nn.Concat|Concat]] , which can contain simple layers like
+[[#nn.Linear|Linear]], [[#nn.Mean|Mean]], [[#nn.Max|Max]] and
+[[#nn.Reshape|Reshape]], as well as convolutional layers, and transfer
+functions like [[#nn.Tanh|Tanh]].
+
+Loss functions are implemented as sub-classes of
+[[#nn.Criterions|Criterion]]. They are helpful to train neural network on
+classical tasks.  Common criterions are the Mean Squared Error
+criterion implemented in [[#nn.MSECriterion|MSECriterion]] and the
+cross-entropy criterion implemented in
+[[#nn.ClassNLLCriterion|ClassNLLCriterion]].
+
+Finally, the [[#nn.StochasticGradient|StochasticGradient]] class provides a
+high level way to train the neural network of choice, even though it is
+easy with a simple for loop to [[#nn.DoItYourself|train a neural network yourself]].
+
+For those who want to implement their own modules, we suggest using
+the ''nn.Jacobian'' class for testing the derivatives of their class,
+together with the [[..:torch:tester|torch.Tester]] class. The sources
+of ''nn'' package contains sufficiently many examples of such tests.
+
+
+====== Detailed Overview of the Neural Network Package ======
+{{anchor:nn.overview.dok}}
+
+**Module**
+
+A neural network is called a [[#nn.Module|Module]] (or simply
+//module// in this documentation) in Torch. ''Module'' is an abstract
+class which defines four main methods:
+  * [[#nn.Module.forward|forward(input)]] which computes the output of the module given the ''input'' [[..:torch:tensor|Tensor]].
+  * [[#nn.Module.backward|backward(input, gradOutput)]] which computes the gradients of the module with respect to its own parameters, and its own inputs.
+  * [[#nn.Module.zeroGradParameters|zeroGradParameters()]] which zeroes the gradient with respect to the parameters of the module.
+  * [[#nn.Module.updateParameters|updateParameters(learningRate)]] which updates the parameters after one has computed the gradients with ''backward()''
+
+It also declares two members:
+  * [[#nn.Module.output|output]] which is the output returned by ''forward()''.
+  * [[#nn.Module.gradInput|gradInput]] which contains the gradients with respect to the input of the module, computed in a ''backward()''.
+
+Two other perhaps less used but handy methods are also defined:
+  * [[#nn.Module.share|share(mlp,s1,s2,...,sn)]] which makes this module share the parameters s1,..sn of the module ''mlp''. This is useful if you want to have modules that share the same weights.
+  * [[#nn.Module.clone|clone(...)]] which produces a deep copy of (i.e. not just a pointer to) this Module, including the current state of its parameters (if any).
+
+Some important remarks:
+  * ''output'' contains only valid values after a [[#nn.Module.forward|forward(input)]].
+  * ''gradInput'' contains only valid values after a [[#nn.Module.backward|backward(input, gradOutput)]].
+  * [[#nn.Module.backward|backward(input, gradOutput)]] uses certain computations obtained during [[#nn.Module.forward|forward(input)]]. You //must// call ''forward()'' before calling a ''backward()'', on the //same// ''input'', or your gradients are going to be incorrect!
+
+
+**Plug and play**
+
+Building a simple neural network can be achieved by constructing an available layer.
+A linear neural network (perceptron!) is built only in one line:
+<file lua>
+nn = nn.Linear(10,1) -- perceptron with 10 inputs
+</file>
+
+More complex neural networks are easily built using container classes
+[[#nn.Sequential|Sequential]] and [[#nn.Concat|Concat]]. ''Sequential'' plugs
+layer in a feed-forward fully connected manner. ''Concat'' concatenates in
+one layer several modules: they take the same inputs, and their output is
+concatenated.
+
+Creating a one hidden-layer multi-layer perceptron is thus just as easy as:
+<file lua>
+mlp = nn.Sequential()
+mlp:add( nn.Linear(10, 25) ) -- 10 input, 25 hidden units
+mlp:add( nn.Tanh() ) -- some hyperbolic tangent transfer function
+mlp:add( nn.Linear(25, 1) ) -- 1 output
+</file>
+
+Of course, ''Sequential'' and ''Concat'' can contains other
+''Sequential'' or ''Concat'', allowing you to try the craziest neural
+networks you ever dreamt of! See the [[#nn.Modules|complete list of
+available modules]].
+
+**Training a neural network**
+
+Once you built your neural network, you have to choose a particular
+[[#nn.Criterions|Criterion]] to train it. A criterion is a class which
+describes the cost to be minimized during training.
+
+You can then train the neural network by using the
+[[#nn.StochasticGradient|StochasticGradient]] class.
+
+<file lua>
+ criterion = nn.MSECriterion() -- Mean Squared Error criterion
+ trainer = nn.StochasticGradient(mlp, criterion)
+ trainer:train(dataset) -- train using some examples
+</file>
+
+StochasticGradient expect as a ''dataset'' an object which implements
+the operator ''dataset[index]'' and implements the method
+''dataset:size()''. The ''size()'' methods returns the number of
+examples and ''dataset[i]'' has to return the i-th example.
+
+An ''example'' has to be an object which implements the operator
+''example[field]'', where ''field'' might take the value ''1'' (input
+features) or ''2'' (corresponding label which will be given to the
+criterion).  The input is usually a Tensor (except if you use special
+kind of gradient modules, like [[#nn.TableLayers|table layers]]). The
+label type depends of the criterion.  For example, the
+[[#nn.MSECriterion|MSECriterion]] expect a Tensor, but the
+[[#nn.ClassNLLCriterion|ClassNLLCriterion]] except a integer number (the
+class).
+
+Such a dataset is easily constructed by using Lua tables, but it could
+any ''C'' object for example, as long as required operators/methods
+are implemented.  [[#nn.DoItStochasticGradient|See an example]].
+
+''StochasticGradient'' being written in ''Lua'', it is extremely easy
+to cut-and-paste it and create a variant to it adapted to your needs
+(if the constraints of ''StochasticGradient'' do not satisfy you).
+
+**Low Level Training Of a Neural Network**
+
+If you want to program the ''StochasticGradient'' by hand, you
+essentially need to control the use of forwards and backwards through
+the network yourself.  For example, here is the code fragment one
+would need to make a gradient step given an input ''x'', a desired
+output ''y'', a network ''mlp'' and a given criterion ''criterion''
+and learning rate ''learningRate'':
+
+<file lua>
+function gradUpdate(mlp, x, y, criterion, learningRate) 
+  local pred = mlp:forward(x)
+  local err = criterion:forward(pred, y)
+  local gradCriterion = criterion:backward(pred, y)
+  mlp:zeroGradParameters()
+  mlp:backward(x, gradCriterion)
+  mlp:updateParameters(learningRate)
+end
+</file>
+For example, if you wish to use your own criterion you can simple replace 
+''gradCriterion'' with the gradient vector of your criterion of choice.
+
+
+======  Modules ======
+{{anchor:nn.Modules}}
+
+Modules are bricks to build neural networks. A [[#nn.Module|Module]] is a neural network
+by itself, but it can be combined with other networks using [[#nn.Containers|container classes]] to create
+complex neural networks.
+
+=====  Module =====
+{{anchor:nn.Module}}
+
+''Module'' is an abstract class which defines fundamental methods necessary
+for a training a neural network. Modules are [[..:torch:file#torch.file.serialization|serializable]].
+
+Modules contain two states variables: [[#nn.ModuleOutput|output]] and
+[[#nn.ModuleGradInput|gradInput]].
+
+====  [output] forward(input) ====
+{{anchor:nn.Module.forward}}
+
+Takes an ''input'' object, and computes the corresponding ''output'' of the
+module. In general ''input'' and ''output'' are
+[[..:torch:tensor|Tensors]]. However, some special sub-classes
+like [[#nn.TableLayers|table layers]] might expect something else. Please,
+refer to each module specification for further information.
+
+After a ''forward()'', the [[#nn.ModuleOutput|ouput]] state variable should
+have been updated to the new value.
+
+It is not advised to override this function. Instead, one should
+implement [[#nn.Module.updateOutput|updateOutput(input)]]
+function. The forward module in the abstract parent class
+[[#nn.Module|Module]] will call ''updateOutput(input)''.
+
+====  [gradInput] backward(input, gradOutput) ====
+{{anchor:nn.Module.backward}}
+
+Performs a //backpropagation step// through the module, with respect to the
+given ''input''.  In general this method makes the assumption
+[[#nn.Module.forward|forward(input)]] has been called before, //with the same input//.
+This is necessary for optimization reasons. If you do not respect
+this rule, ''backward()'' will compute incorrect gradients.
+
+In general ''input'' and ''gradOutput''  and ''gradInput'' are
+[[..:torch:tensor|Tensors]]. However, some special sub-classes
+like [[#nn.TableLayers|table layers]] might expect something else. Please,
+refer to each module specification for further information.
+
+A //backpropagation step// consist in computing two kind of gradients
+at ''input'' given ''gradOutput'' (gradients with respect to the
+output of the module).  This function simply performs this task using
+two function calls:
+
+  - A function call to [[#nn.Module.updateGradInput|updateGradInput(input, gradOutput)]].
+  - A function call to [[#nn.Module.accGradParameters|accGradParameters(input,gradOutput)]].
+
+It is not advised to override this function call in custom classes. It
+is better to override
+[[#nn.Module.updateGradInput|updateGradInput(input, gradOutput)]] and
+[[#nn.Module.accGradParameters|accGradParameters(input, gradOutput)]]
+functions.
+
+==== updateOutput(input) ====
+{{anchor:nn.Module.updateOutput}}
+
+Computes the output using the current parameter set of the class and
+input. This function returns the result which is stored in the
+[[#nn.Module.output|output]] field.
+
+==== updateGradInput(input, gradOutput) ====
+{{anchor:nn.Module.updateGradInput}}
+
+Computing the gradient of the module with respect to its own
+input. This is returned in ''gradInput''. Also, the
+[[#nn.Module.gradInput|gradInput]] state variable is updated
+accordingly.
+
+==== accGradParameters(input, gradOutput) ====
+{{anchor:nn.Module.accGradParameters}}
+
+Computing the gradient of the module with respect to its
+ownparameters. Many modules do not perform this step as they do not
+have any parameters. The state variable name for the parameters is
+module dependent. The module is expected to //accumulate// the
+gradients with respect to the parameters in some variable.
+
+Zeroing this accumulation is achieved with
+[[#nn.Module.zeroGradParameters|zeroGradParameters()]] and updating
+the parameters according to this accumulation is done with
+[[#nn.Module.updateParameters|updateParameters()]].
+
+====  zeroGradParameters() ====
+{{anchor:nn.Module.zeroGradParameters}}
+
+If the module has parameters, this will zero the accumulation of the
+gradients with respect to these parameters, accumulated through
+[[#nn.Module.accGradParameters|accGradParameters(input, gradOutput)]]
+calls. Otherwise, it does nothing.
+
+====  updateParameters(learningRate) ====
+{{anchor:nn.Module.updateParameters}}
+
+If the module has parameters, this will update these parameters, according
+to the accumulation of the gradients with respect to these parameters,
+accumulated through [[#nn.Module.backward|backward()]] calls.
+
+The update is basically:
+<file lua>
+parameters = parameters - learningRate * gradients_wrt_parameters
+</file>
+If the module does not have parameters, it does nothing.
+
+==== accUpdateGradParameters(input, gradOutput, learningRate) ====
+{{anchor:nn.Module.accUpdateGradParameters}}
+
+This is a convenience module that performs two functions at
+once. Calculates and accumulates the gradients with respect to the
+weights after mutltiplying with negative of the learning rate
+''learningRate''. Performing these two operations at once is more
+performance efficient and it might be advantageous in certain
+situations.
+
+Keep in mind that, this function uses a simple trick to achieve its
+goal and it might not be valid for a custom module.
+
+<file lua>
+function Module:accUpdateGradParameters(input, gradOutput, lr)
+   local gradWeight = self.gradWeight
+   local gradBias = self.gradBias
+   self.gradWeight = self.weight
+   self.gradBias = self.bias
+   self:accGradParameters(input, gradOutput, -lr)
+   self.gradWeight = gradWeight
+   self.gradBias = gradBias
+end
+</file>
+
+As it can be seen, the gradients are accumulated directly into
+weights. This assumption may not be true for a module that computes a
+nonlinear operation.
+
+==== share(mlp,s1,s2,...,sn) ====
+{{anchor:nn.Module.share}}
+
+This function modifies the parameters of the module named
+''s1'',..''sn'' (if they exist) so that they are shared with (pointers
+to) the parameters with the same names in the given module ''mlp''.
+
+The parameters have to be Tensors. This function is typically used if
+you want to have modules that share the same weights or biases.
+
+Note that this function if called on a [[#nn.Containers|Container]]
+module will share the same parameters for all the contained modules as
+well.
+
+Example:
+<file lua>
+
+-- make an mlp
+mlp1=nn.Sequential(); 
+mlp1:add(nn.Linear(100,10));
+
+-- make a second mlp
+mlp2=nn.Sequential(); 
+mlp2:add(nn.Linear(100,10)); 
+
+-- the second mlp shares the bias of the first
+mlp2:share(mlp1,'bias');
+
+-- we change the bias of the first
+mlp1:get(1).bias[1]=99;
+
+-- and see that the second one's bias has also changed..
+print(mlp2:get(1).bias[1])
+
+</file>
+
+
+====  clone(mlp,...) ====
+{{anchor:nn.Module.clone}}
+
+Creates a deep copy of (i.e. not just a pointer to) the module,
+including the current state of its parameters (e.g. weight, biases
+etc., if any).
+
+If arguments are provided to the ''clone(...)'' function it also calls
+[[#nn.Module.share|share(...)]] with those arguments on the cloned
+module after creating it, hence making a deep copy of this module with
+some shared parameters.
+
+Example:
+<file lua>
+-- make an mlp
+mlp1=nn.Sequential(); 
+mlp1:add(nn.Linear(100,10));
+
+-- make a copy that shares the weights and biases
+mlp2=mlp1:clone('weight','bias');
+
+-- we change the bias of the first mlp
+mlp1:get(1).bias[1]=99;
+
+-- and see that the second one's bias has also changed..
+print(mlp2:get(1).bias[1])
+
+</file>
+
+==== type(type) ====
+{{anchor:nn.Module.type}}
+
+This function converts all the parameters of a module to the given
+''type''. The ''type'' can be one of the types defined for
+[[..:torch:tensor|torch.Tensor]].
+
+==== float() ====
+{{anchor:nn.Module.float}}
+
+Convenience method for calling [[#nn.Module.type|module:type('torch.FloatTensor')]]
+
+==== double() ====
+{{anchor:nn.Module.double}}
+
+Convenience method for calling [[#nn.Module.type|module:type('torch.DoubleTensor')]]
+
+==== cuda() ====
+{{anchor:nn.Module.cuda}}
+
+Convenience method for calling [[#nn.Module.type|module:type('torch.CudaTensor')]]
+
+====  State Variables ====
+{{anchor:nn.statevars.dok}}
+
+These state variables are useful objects if one wants to check the guts of
+a ''Module''. The object pointer is //never// supposed to change. However, its
+contents (including its size if it is a Tensor) are supposed to change.
+
+In general state variables are
+[[..:torch:tensor|Tensors]]. However, some special sub-classes
+like [[#nn.TableLayers|table layers]] contain something else. Please,
+refer to each module specification for further information.
+
+===  output ===
+{{anchor:nn.Module.output}}
+
+This contains the output of the module, computed with the last call of
+[[#nn.Module.forward|forward(input)]].
+
+===  gradInput ===
+{{anchor:nn.Module.gradInput}}
+
+This contains the gradients with respect to the inputs of the module, computed with the last call of
+[[#nn.Module.updateGradInput|updateGradInput(input, gradOutput)]]. 
+
+====  Parameters and gradients w.r.t parameters ====
+
+Some modules contain parameters (the ones that we actually want to
+train!). The name of these parameters, and gradients w.r.t these parameters
+are module dependent.
+
+==== [{weights}, {gradWeights}] parameters() ====
+{{anchor:nn.Module.parameters}}
+
+This function should returns two tables. One for the learnable
+parameters ''{weights}'' and another for the gradients of the energy
+wrt to the learnable parameters ''{gradWeights}''.
+
+For custom modules, it is a good idea to also override this
+function. By default none of the built-in functions/modules use this
+function call, but it is especialy useful when one wants to obtain a
+global view of the whole network.
+
+=====  Containers =====
+{{anchor:nn.Containers}}
+
+====  Concat ====
+{{anchor:nn.Concat}}
+
+<file lua>
+module = nn.Concat(dim)
+</file>
+Concat concatenates the output of one layer of "parallel" modules along the
+provided dimension ''dim'': they take the same inputs, and their output is
+concatenated.
+<file lua>
+mlp=nn.Concat(1);
+mlp:add(nn.Linear(5,3))
+mlp:add(nn.Linear(5,7))
+require "lab"
+print(mlp:forward(lab.randn(5)))
+</file>
+which gives the output:
+<file lua>
+ 0.7486
+ 0.1349
+ 0.7924
+-0.0371
+-0.4794
+ 0.3044
+-0.0835
+-0.7928
+ 0.7856
+-0.1815
+[torch.Tensor of dimension 10]
+</file>
+
+
+====  Sequential ====
+{{anchor:nn.Sequential}}
+
+Sequential provides a means to plug layers together
+in a feed-forward fully connected manner.
+
+E.g. 
+creating a one hidden-layer multi-layer perceptron is thus just as easy as:
+<file lua>
+mlp = nn.Sequential()
+mlp:add( nn.Linear(10, 25) ) -- 10 input, 25 hidden units
+mlp:add( nn.Tanh() ) -- some hyperbolic tangent transfer function
+mlp:add( nn.Linear(25, 1) ) -- 1 output
+
+require "lab"
+print(mlp:forward(lab.randn(10)))
+</file>
+which gives the output:
+<file lua>
+-0.1815
+[torch.Tensor of dimension 1]
+</file>
+
+====  Parallel ====
+{{anchor:nn.Parallel}}
+
+''module'' = ''Parallel(inputDimension,outputDimension)''
+
+Creates a container module that applies its ''ith'' child module to the  ''ith'' slice of the input Tensor by using [[..:torch:tensor#torch.tensor.select|select]] 
+on dimension ''inputDimension''. It concatenates the results of its contained modules together along dimension ''outputDimension''.
+
+Example:
+<file lua>
+ require "lab"
+ mlp=nn.Parallel(2,1);     -- iterate over dimension 2 of input
+ mlp:add(nn.Linear(10,3)); -- apply to first slice
+ mlp:add(nn.Linear(10,2))  -- apply to first second slice
+ print(mlp:forward(lab.randn(10,2)))
+</file>
+gives the output:
+<file lua>
+-0.5300
+-1.1015
+ 0.7764
+ 0.2819
+-0.6026
+[torch.Tensor of dimension 5]
+</file>
+
+A more complicated example:
+<file lua>
+require "lab"
+
+mlp=nn.Sequential();
+c=nn.Parallel(1,2)
+for i=1,10 do
+ local t=nn.Sequential()
+ t:add(nn.Linear(3,2))
+ t:add(nn.Reshape(2,1))
+ c:add(t)
+end
+mlp:add(c)
+
+pred=mlp:forward(lab.randn(10,3))
+print(pred)
+
+for i=1,10000 do     -- Train for a few iterations
+ x=lab.randn(10,3);
+ y=lab.ones(2,10);
+ pred=mlp:forward(x)
+
+ criterion= nn.MSECriterion()
+ local err=criterion:forward(pred,y)
+ local gradCriterion = criterion:backward(pred,y);
+ mlp:zeroGradParameters();
+ mlp:backward(x, gradCriterion); 
+ mlp:updateParameters(0.01);
+ print(err)
+end
+</file>
+=====  Simple layers =====
+{{anchor:nn.simplelayers.dok}}
+====  Linear ====
+{{anchor:nn.Linear}}
+
+''module'' = ''Linear(inputDimension,outputDimension)''
+
+Applies a linear transformation to the incoming data, i.e.  //y=
+Ax+b//. The ''input'' tensor given in ''forward(input)'' must be
+either a vector (1D tensor) or matrix (2D tensor). If the input is a
+matrix, then each row is assumed to be an input sample of given batch.
+
+You can create a layer in the following way:
+<file lua>
+ module= nn.Linear(10,5)  -- 10 inputs, 5 outputs
+</file>
+Usually this would be added to a network of some kind, e.g.:
+<file lua>
+ mlp = nn.Sequential();
+ mlp:add(module)
+</file>
+The weights and biases (//A// and //b//) can be viewed with:
+<file lua>
+ print(module.weight)
+ print(module.bias)
+</file>
+The gradients for these weights can be seen with:
+<file lua>
+ print(module.gradWeight)
+ print(module.gradBias)
+</file>
+As usual with ''nn'' modules,
+applying the linear transformation is performed with:
+<file lua>
+ x=torch.Tensor(10) -- 10 inputs
+ y=module:forward(x)
+</file>
+
+====  SparseLinear ====
+{{anchor:nn.SparseLinear}}
+
+''module'' = ''SparseLinear(inputDimension,outputDimension)''
+
+Applies a linear transformation to the incoming sparse data, i.e.
+//y= Ax+b//. The ''input'' tensor given in ''forward(input)'' must
+be a sparse vector represented as 2D tensor of the form 
+torch.Tensor(N, 2) where the pairs represent indices and values.
+The SparseLinear layer is useful when the number of input 
+dimensions is very large and the input data is sparse.
+
+You can create a sparse linear layer in the following way:
+
+<file lua>
+ module= nn.SparseLinear(10000,2)  -- 10000 inputs, 2 outputs
+</file>
+The sparse linear module may be used as part of a larger network, 
+and apart from the form of the input, 
+[[#nn.SparseLinear|SparseLinear]] 
+operates in exactly the same way as the [[#nn.Linear|Linear]] layer.
+
+A sparse input vector may be created as so..
+<file lua>
+
+ x=lab.new({1, 0.1},{2, 0.3},{10, 0.3},{31, 0.2})
+
+ print(x)
+
+  1.0000   0.1000
+  2.0000   0.3000
+ 10.0000   0.3000
+ 31.0000   0.2000
+[torch.Tensor of dimension 4x2]
+
+</file>
+
+The first column contains indices, the second column contains 
+values in a a vector where all other elements are zeros. The 
+indices should not exceed the stated dimesions of the input to the 
+layer (10000 in the example).
+
+==== Abs ====
+{{anchor:nn.Abs}}
+
+''module'' = ''Abs()''
+
+''output = abs(input)''.
+
+<file lua>
+m=nn.Abs()
+ii=lab.linspace(-5,5)
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+
+{{abs.png?400}}
+
+====  Add  ====
+{{anchor:nn.Add }}
+
+''module'' = ''Add(inputDimension,scalar)''
+
+Applies a bias term to the incoming data, i.e.
+//y_i= x_i + b_i,  or if _scalar=true// then uses a single bias term,
+_y_i= x_i + b. 
+
+Example:
+<file lua>
+y=torch.Tensor(5);  
+mlp=nn.Sequential()
+mlp:add(nn.Add(5))
+
+function gradUpdate(mlp, x, y, criterion, learningRate) 
+  local pred = mlp:forward(x)
+  local err = criterion:forward(pred, y)
+  local gradCriterion = criterion:backward(pred, y)
+  mlp:zeroGradParameters()
+  mlp:backward(x, gradCriterion)
+  mlp:updateParameters(learningRate)
+  return err
+end
+
+for i=1,10000 do
+ x=lab.rand(5)
+ y:copy(x); 
+ for i=1,5 do y[i]=y[i]+i; end
+ err=gradUpdate(mlp,x,y,nn.MSECriterion(),0.01)
+end
+print(mlp:get(1).bias)
+</file>
+gives the output:
+<file lua>
+ 1.0000
+ 2.0000
+ 3.0000
+ 4.0000
+ 5.0000
+[torch.Tensor of dimension 5]
+</file>
+i.e. the network successfully learns the input //x// has been shifted 
+to produce the output //y//.
+
+
+====  Mul ====
+{{anchor:nn.Mul}}
+
+''module'' = ''Mul(inputDimension)''
+
+Applies a //single// scaling factor to the incoming data, i.e.
+//y= w x//, where //w// is a scalar. 
+
+Example:
+<file lua>
+y=torch.Tensor(5);  
+mlp=nn.Sequential()
+mlp:add(nn.Mul(5))
+
+function gradUpdate(mlp, x, y, criterion, learningRate) 
+  local pred = mlp:forward(x)
+  local err = criterion:forward(pred,y)
+  local gradCriterion = criterion:backward(pred,y);
+  mlp:zeroGradParameters();
+  mlp:backward(x, gradCriterion);
+  mlp:updateParameters(learningRate);
+  return err
+end
+
+
+for i=1,10000 do
+ x=lab.rand(5)
+ y:copy(x); y:mul(math.pi);
+ err=gradUpdate(mlp,x,y,nn.MSECriterion(),0.01)
+end
+print(mlp:get(1).weight)
+</file>
+gives the output:
+<file lua>
+ 3.1416
+[torch.Tensor of dimension 1]
+</file>
+i.e. the network successfully learns the input ''x'' has been scaled by
+pi.
+
+====  CMul ====
+{{anchor:nn.CMul  }}
+
+''module'' = ''CMul(inputDimension)''
+
+Applies a component-wise multiplication to the incoming data, i.e.
+''y_i'' = ''w_i'' =x_i=. 
+
+Example:
+<file lua>
+mlp=nn.Sequential()
+mlp:add(nn.CMul(5))
+
+y=torch.Tensor(5); 
+sc=torch.Tensor(5); for i=1,5 do sc[i]=i; end -- scale input with this
+
+function gradUpdate(mlp,x,y,criterion,learningRate) 
+  local pred = mlp:forward(x)
+  local err = criterion:forward(pred,y)
+  local gradCriterion = criterion:backward(pred,y);
+  mlp:zeroGradParameters();
+  mlp:backward(x, gradCriterion);
+  mlp:updateParameters(learningRate);
+  return err
+end
+
+for i=1,10000 do
+ x=lab.rand(5)
+ y:copy(x); y:cmul(sc);
+ err=gradUpdate(mlp,x,y,nn.MSECriterion(),0.01)
+end
+print(mlp:get(1).weight)
+</file>
+gives the output:
+<file lua>
+ 1.0000
+ 2.0000
+ 3.0000
+ 4.0000
+ 5.0000
+[torch.Tensor of dimension 5]
+</file>
+i.e. the network successfully learns the input //x// has been scaled by
+those scaling factors to produce the output //y//.
+
+
+====  Max ====
+{{anchor:nn.Max}}
+
+''module'' = ''Max(dimension)''
+
+Applies a max operation over dimension ''dimension''.
+Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2''
+then an ''nxq'' matrix would be output.
+
+
+====  Min ====
+{{anchor:nn.Min}}
+
+''module'' = ''Min(dimension)''
+
+Applies a min operation over dimension ''dimension''.
+Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2''
+then an ''nxq'' matrix would be output.
+
+
+====  Mean ====
+{{anchor:nn.Mean}}
+
+''module'' = ''Mean(dimension)''
+
+Applies a mean operation over dimension ''dimension''.
+Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2''
+then an ''nxq'' matrix would be output.
+
+====  Sum ====
+{{anchor:nn.Sum}}
+
+''module'' = ''Sum(dimension)''
+
+Applies a sum operation over dimension ''dimension''.
+Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2''
+then an ''nxq'' matrix would be output.
+
+
+====  Euclidean ====
+{{anchor:nn.Euclidean}}
+
+''module'' = ''Euclidean(inputDimension,outputDimension)''
+
+Outputs the Euclidean distance of the input to ''outputDimension'' centers,
+i.e. this layer has the weights ''c_i'', ''i'' = ''1'',..,''outputDimension'', where
+''c_i'' are vectors of dimension ''inputDimension''. Output dimension ''j'' is
+''|| c_i - x||^2'', where ''x'' is the input.
+
+====  WeightedEuclidean ====
+{{anchor:nn.WeightedEuclidean}}
+
+''module'' = ''WeightedEuclidean(inputDimension,outputDimension)''
+
+This module is similar to [[#nn.Euclidian|Euclidian]], but
+additionally learns a separate diagonal covariance matrix across the
+features of the input space for each center.
+
+
+==== Copy ====
+{{anchor:nn.Copy}}
+
+''module'' = ''Copy(inputType,outputType)''
+
+This layer copies the input to output with type casting from input
+type from ''inputType'' to ''outputType''.
+
+
+==== Narrow ====
+{{anchor:nn.Narrow}}
+
+''module'' = ''Narrow(dimension, offset, length)''
+
+Narrow is application of
+[[..:torch:tensor:#torch.Tensor.narrow|narrow]] operation in a
+module.
+
+==== Replicate ====
+{{anchor:nn.Replicate}}
+
+''module'' = ''Replicate(nFeature)''
+
+This class creates an output where the input is replicated
+''nFeature'' times along its first dimension. There is no memory
+allocation or memory copy in this module. It sets the
+[[..:torch:tensor#torch.Tensor.stride|stride]] along the first
+dimension to zero.
+
+<file lua>
+torch> x=lab.linspace(1,5,5)
+torch> =x
+ 1
+ 2
+ 3
+ 4
+ 5
+[torch.DoubleTensor of dimension 5]
+
+torch> m=nn.Replicate(3)
+torch> o=m:forward(x)
+torch> =o
+ 1  2  3  4  5
+ 1  2  3  4  5
+ 1  2  3  4  5
+[torch.DoubleTensor of dimension 3x5]
+
+torch> x:fill(13)
+torch> =x
+ 13
+ 13
+ 13
+ 13
+ 13
+[torch.DoubleTensor of dimension 5]
+
+torch> =o
+ 13  13  13  13  13
+ 13  13  13  13  13
+ 13  13  13  13  13
+[torch.DoubleTensor of dimension 3x5]
+
+</file>
+
+
+====  Reshape ====
+{{anchor:nn.Reshape}}
+
+''module'' = ''Reshape(dimension1, dimension2, ..)''
+
+Reshapes an ''nxpxqx..''  Tensor into a ''dimension1xdimension2x...'' Tensor,
+taking the elements column-wise.
+
+Example:
+<file lua>
+> x=torch.Tensor(4,4)
+> for i=1,4 do
+>  for j=1,4 do
+>   x[i][j]=(i-1)*4+j;
+>  end
+> end
+> print(x)
+
+  1   2   3   4
+  5   6   7   8
+  9  10  11  12
+ 13  14  15  16
+[torch.Tensor of dimension 4x4]
+
+> print(nn.Reshape(2,8):forward(x))
+
+  1   9   2  10   3  11   4  12
+  5  13   6  14   7  15   8  16
+[torch.Tensor of dimension 2x8]
+
+> print(nn.Reshape(8,2):forward(x))
+
+  1   3
+  5   7
+  9  11
+ 13  15
+  2   4
+  6   8
+ 10  12
+ 14  16
+[torch.Tensor of dimension 8x2]
+
+> print(nn.Reshape(16):forward(x))
+
+  1
+  5
+  9
+ 13
+  2
+  6
+ 10
+ 14
+  3
+  7
+ 11
+ 15
+  4
+  8
+ 12
+ 16
+[torch.Tensor of dimension 16]
+
+
+</file>
+
+
+====  Select ====
+{{anchor:nn.Select}}
+
+Selects a dimension and index of a  ''nxpxqx..''  Tensor.
+
+Example:
+<file lua>
+mlp=nn.Sequential();
+mlp:add(nn.Select(1,3))
+
+require "lab"
+x=lab.randn(10,5)
+print(x)
+print(mlp:forward(x))
+</file>
+gives the output:
+<file lua>
+ 0.9720 -0.0836  0.0831 -0.2059 -0.0871
+ 0.8750 -2.0432 -0.1295 -2.3932  0.8168
+ 0.0369  1.1633  0.6483  1.2862  0.6596
+ 0.1667 -0.5704 -0.7303  0.3697 -2.2941
+ 0.4794  2.0636  0.3502  0.3560 -0.5500
+-0.1898 -1.1547  0.1145 -1.1399  0.1711
+-1.5130  1.4445  0.2356 -0.5393 -0.6222
+-0.6587  0.4314  1.1916 -1.4509  1.9400
+ 0.2733  1.0911  0.7667  0.4002  0.1646
+ 0.5804 -0.5333  1.1621  1.5683 -0.1978
+[torch.Tensor of dimension 10x5]
+
+ 0.0369
+ 1.1633
+ 0.6483
+ 1.2862
+ 0.6596
+[torch.Tensor of dimension 5]
+</file>
+
+This can be used in conjunction with [[#nn.Concat|Concat]]
+to emulate the behavior 
+of [[#nn.Parallel|Parallel]], or to select various parts of an input Tensor to 
+perform operations on. Here is a fairly complicated example:
+<file lua>
+require "lab"
+
+mlp=nn.Sequential();
+c=nn.Concat(2) 
+for i=1,10 do
+ local t=nn.Sequential()
+ t:add(nn.Select(1,i))
+ t:add(nn.Linear(3,2)) 
+ t:add(nn.Reshape(2,1))
+ c:add(t)
+end
+mlp:add(c)
+
+pred=mlp:forward(lab.randn(10,3))
+print(pred)
+
+for i=1,10000 do     -- Train for a few iterations
+ x=lab.randn(10,3);
+ y=lab.ones(2,10);
+ pred=mlp:forward(x)
+
+ criterion= nn.MSECriterion()
+ err=criterion:forward(pred,y)
+ gradCriterion = criterion:backward(pred,y);
+ mlp:zeroGradParameters();
+ mlp:backward(x, gradCriterion); 
+ mlp:updateParameters(0.01);
+ print(err)
+end
+</file>
+
+====  Exp ====
+{{anchor:nn.Exp}}
+
+Applies the ''exp'' function element-wise to the input Tensor,
+thus outputting a Tensor of the same dimension.
+<file lua>
+ii=lab.linspace(-2,2)
+m=nn.Exp()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{exp.png?400}}
+
+
+==== Square ====
+{{anchor:nn.Square}}
+
+Takes the square of each element.
+
+<file lua>
+ii=lab.linspace(-5,5)
+m=nn.Square()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{square.png?400}}
+
+==== Sqrt ====
+{{anchor:nn.Sqrt}}
+
+Takes the square root of each element.
+
+<file lua>
+ii=lab.linspace(0,5)
+m=nn.Sqrt()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{sqrt.png?400}}
+
+==== Power ====
+{{anchor:nn.Power}}
+
+''module'' = ''Power(p)''
+
+Raises each element to its ''pth'' power.
+
+<file lua>
+ii=lab.linspace(0,2)
+m=nn.Power(1.25)
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{power.png?400}}
+
+=====  Transfer Function Layers =====
+{{anchor:nn.transfer.dok}}
+
+====  HardTanh ====
+{{anchor:nn.HardTanh}}
+
+Applies the ''HardTanh'' function element-wise to the input Tensor,
+thus outputting a Tensor of the same dimension.
+
+''HardTanh'' is defined as:
+
+  * ''f(x)'' = ''1, if x >''  ''1,''
+  * ''f(x)'' = ''-1, if x <''  ''-1,''
+  * ''f(x)'' = ''x,'' ''otherwise.''
+
+<file lua>
+ii=lab.linspace(-2,2)
+m=nn.HardTanh()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{htanh.png?400}}
+
+
+==== HardShrink ====
+{{anchor:nn.HardShrink}}
+
+''module = nn.HardShrink(lambda)''
+
+Applies the hard shrinkage function element-wise to the input
+[[..:torch:Tensor|Tensor]]. The output is the same size as the input.
+
+''HardShrinkage'' operator is defined as:
+
+  * ''f(x) = x, if x > lambda''
+  * ''f(x) = -x, if < -lambda''
+  * ''f(x) = 0, otherwise''
+
+<file lua>
+ii=lab.linspace(-2,2)
+m=nn.HardShrink(0.85)
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{hshrink.png?400}}
+
+==== SoftShrink ====
+{{anchor:nn.SoftShrink}}
+
+''module = nn.SoftShrink(lambda)''
+
+Applies the hard shrinkage function element-wise to the input
+[[..:torch:Tensor|Tensor]]. The output is the same size as the input.
+
+''HardShrinkage'' operator is defined as:
+
+  * ''f(x) = x-lambda, if x > lambda''
+  * ''f(x) = -x+lambda, if < -lambda''
+  * ''f(x) = 0, otherwise''
+
+<file lua>
+ii=lab.linspace(-2,2)
+m=nn.SoftShrink(0.85)
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{sshrink.png?400}}
+
+
+====  SoftMax ====
+{{anchor:nn.SoftMax}}
+
+Applies the ''Softmax'' function to an n-dimensional input Tensor,
+rescaling them so that the elements of the n-dimensional output Tensor
+lie in the range (0,1) and sum to 1. 
+
+''Softmax'' is defined as ''f_i(x)'' = ''exp(x_i-shift) / sum_j exp(x_j-shift)'',
+where ''shift'' = ''max_i x_i''.
+
+
+<file lua>
+ii=lab.exp(lab.abs(lab.randn(10)))
+m=nn.SoftMax()
+oo=m:forward(ii)
+gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'})
+gnuplot.grid(true)
+</file>
+{{softmax.png?400}}
+
+====  SoftMin ====
+{{anchor:nn.SoftMin}}
+
+Applies the ''Softmin'' function to an n-dimensional input Tensor,
+rescaling them so that the elements of the n-dimensional output Tensor
+lie in the range (0,1) and sum to 1. 
+
+''Softmin'' is defined as ''f_i(x)'' = ''exp(-x_i-shift) / sum_j exp(-x_j-shift)'',
+where ''shift'' = ''max_i x_i''.
+
+
+<file lua>
+ii=lab.exp(lab.abs(lab.randn(10)))
+m=nn.SoftMin()
+oo=m:forward(ii)
+gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'})
+gnuplot.grid(true)
+</file>
+{{softmin.png?400}}
+
+====  SoftPlus ====
+{{anchor:nn.SoftPlus}}
+
+Applies the ''SoftPlus'' function to an n-dimensioanl input Tensor.
+Can be used to constrain the output of a machine to always be positive.
+
+''SoftPlus'' is defined as ''f_i(x)'' = ''log(1 + exp(x_i)))''.
+
+<file lua>
+ii=lab.randn(10)
+m=nn.SoftPlus()
+oo=m:forward(ii)
+go=lab.ones(10)
+gi=m:backward(ii,go)
+gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'},{'gradInput',gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{softplus.png?400}}
+
+==== SoftSign ====
+{{anchor:nn.SoftSign}}
+
+Applies the ''SoftSign'' function to an n-dimensioanl input Tensor.
+
+''SoftSign'' is defined as ''f_i(x) = x_i / (1+|x_i|)''
+
+<file lua>
+ii=lab.linspace(-5,5)
+m=nn.SoftSign()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{softsign.png?400}}
+
+====  LogSigmoid ====
+{{anchor:nn.LogSigmoid}}
+
+Applies the ''LogSigmoid'' function to an n-dimensional input Tensor.
+
+''LogSigmoid'' is defined as ''f_i(x)'' = ''log(1/(1+ exp(-x_i)))''.
+
+
+<file lua>
+ii=lab.randn(10)
+m=nn.LogSigmoid()
+oo=m:forward(ii)
+go=lab.ones(10)
+gi=m:backward(ii,go)
+gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'},{'gradInput',gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{logsigmoid.png?400}}
+
+
+====  LogSoftMax ====
+{{anchor:nn.LogSoftMax}}
+
+Applies the ''LogSoftmax'' function to an n-dimensional input Tensor.
+
+''LogSoftmax'' is defined as ''f_i(x)'' = ''log(1/a exp(x_i))'',
+where  ''a'' = ''sum_j exp(x_j)''.
+
+<file lua>
+ii=lab.randn(10)
+m=nn.LogSoftMax()
+oo=m:forward(ii)
+go=lab.ones(10)
+gi=m:backward(ii,go)
+gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'},{'gradInput',gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{logsoftmax.png?400}}
+
+====  Sigmoid ====
+{{anchor:nn.Sigmoid}}
+
+Applies the ''Sigmoid'' function element-wise to the input Tensor,
+thus outputting a Tensor of the same dimension.
+
+''Sigmoid'' is defined as ''f(x)'' = ''1/(1+exp(-x))''.
+
+<file lua>
+ii=lab.linspace(-5,5)
+m=nn.Sigmoid()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{sigmoid.png?400}}
+
+====  Tanh ====
+{{anchor:nn.Tanh}}
+
+Applies the ''Tanh'' function element-wise to the input Tensor,
+thus outputting a Tensor of the same dimension.
+
+<file lua>
+ii=lab.linspace(-3,3)
+m=nn.Tanh()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{tanh.png?400}}
+
+=====  Convolutional layers =====
+{{anchor:nn.convlayers.dok}}
+
+SpatialConvolution and SpatialSubsampling apply to inputs with
+two-dimensional relationships (e.g. images).  TemporalConvolution and
+TemporalSubsampling apply to sequences with a one-dimensional
+relationship (e.g. strings of some kind).
+
+For spatial convolutional layers, the input is supposed to be 3D. The
+first dimension is the number of features, the last two dimenstions
+are spatial.
+
+====  SpatialConvolution ====
+{{anchor:nn.SpatialConvolution}}
+
+<file lua>
+module = nn.SpatialConvolution(nInputPlane, nOutputPlane, kW, kH, [dW], [dH])
+</file>
+
+Applies a 2D convolution over an input image composed of several input planes. The ''input'' tensor in
+''forward(input)'' is expected to be a 3D tensor (''width x height x nInputPlane'').
+
+The parameters are the following:
+  * ''nInputPlane'': The number of expected input planes in the image given into ''forward()''.
+  * ''nOutputPlane'': The number of output planes the convolution layer will produce.
+  * ''kW'': The kernel width of the convolution
+  * ''kH'': The kernel height of the convolution
+  * ''dW'': The step of the convolution in the width dimension. Default is ''1''.
+  * ''dH'': The step of the convolution in the height dimension. Default is ''1''.
+
+Note that depending of the size of your kernel, several (of the last)
+columns or rows of the input image might be lost. It is up to the user to
+add proper padding in images.
+
+If the input image is a 3D tensor ''nInputPlane x width x height'', the output image size
+will be ''nOutputPlane x owidth x oheight'' where
+<file lua>
+owidth  = (width  - kW) / dW + 1
+oheight = (height - kH) / dH + 1 .
+</file>
+
+The parameters of the convolution can be found in ''self.weight'' (Tensor of
+size ''nOutputPlane x nInputPlane x kH x kW'') and ''self.bias'' (Tensor of
+size ''nOutputPlane''). The corresponding gradients can be found in
+''self.gradWeight'' and ''self.gradBias''.
+
+The output value of the layer can be precisely described as:
+<file lua>
+output[i][j][k] = bias[k]
+  + sum_l sum_{s=1}^kW sum_{t=1}^kH weight[s][t][l][k]
+                                    * input[dW*(i-1)+s)][dH*(j-1)+t][l]
+</file>
+
+====  SpatialConvolutionMap ====
+{{anchor:nn.SpatialConvolutionMap}}
+
+<file lua>
+module = nn.SpatialConvolutionMap(connectionMatrix, kW, kH, [dW], [dH])
+</file>
+
+This class is a generalization of
+[[#nn.SpatialConvolution|nn.SpatialConvolution]]. It uses a geenric
+connection table between input and output features. The
+[[#nn.SpatialConvolution|nn.SpatialConvolution]] is equivalent to
+using a [[#nn.tables.full|full connection table]]. One can specify
+different types of connection tables.
+
+=== Full Connection Table ===
+{{anchor:nn.tables.full}}
+
+''table = nn.tables.full(nin,nout)''
+
+This is a precomputed table that specifies connections between every
+input and output node.
+
+=== One to One Connection Table ===
+{{anchor:nn.tables.onetoone}}
+
+''table = nn.tables.oneToOne(n)''
+
+This is a precomputed table that specifies a single connection to each
+output node from corresponding input node.
+
+=== Random Connection Table ===
+{{anchor:nn.tables.random}}
+
+''table = nn.tables.random(nin,nout, nto)''
+
+This table is randomly populated such that each output unit has
+''nto'' incoming connections. The algorihtm tries to assign uniform
+number of outgoing connections to each input node if possible.
+
+==== SpatialLPPooling ====
+{{anchor:nn.SpatialLPPooling}}
+
+<file lua>
+module = nn.SpatialLPPooling(nInputPlane, pnorm, kW, kH, [dW], [dH])
+</file>
+
+Computes the ''p'' norm in a convolutional manner on a set of 2D input planes.
+
+==== SpatialMaxPooling ====
+{{anchor:nn.SpatialMaxPooling}}
+
+<file lua>
+module = nn.SpatialMaxPooling(kW, kH [, dW, dH])
+</file>
+
+Applies 2D max-pooling operation in ''kWxkH'' regions by step size
+''dWxdH'' steps. The number of output features is equal to the number of
+input planes.
+
+====  SpatialSubSampling ====
+{{anchor:nn.SpatialSubSampling}}
+
+<file lua>
+module = nn.SpatialSubSampling(nInputPlane, kW, kH, [dW], [dH])
+</file>
+
+Applies a 2D sub-sampling over an input image composed of several input planes. The ''input'' tensor in
+''forward(input)'' is expected to be a 3D tensor (''nInputPlane x width x height''). The number of output
+planes will be the same as ''nInputPlane''.
+
+The parameters are the following:
+  * ''nInputPlane'': The number of expected input planes in the image given into ''forward()''.
+  * ''kW'': The kernel width of the sub-sampling
+  * ''kH'': The kernel height of the sub-sampling
+  * ''dW'': The step of the sub-sampling in the width dimension. Default is ''1''.
+  * ''dH'': The step of the sub-sampling in the height dimension. Default is ''1''.
+
+Note that depending of the size of your kernel, several (of the last)
+columns or rows of the input image might be lost. It is up to the user to
+add proper padding in images.
+
+If the input image is a 3D tensor ''width x height x nInputPlane'', the output image size
+will be ''owidth x oheight x nInputPlane'' where
+<file lua>
+owidth  = (width  - kW) / dW + 1
+oheight = (height - kH) / dH + 1 .
+</file>
+
+The parameters of the sub-sampling can be found in ''self.weight'' (Tensor of
+size ''nInputPlane'') and ''self.bias'' (Tensor of size ''nInputPlane''). The
+corresponding gradients can be found in ''self.gradWeight'' and
+''self.gradBias''.
+
+The output value of the layer can be precisely described as:
+<file lua>
+output[i][j][k] = bias[k]
+  + weight[k] sum_{s=1}^kW sum_{t=1}^kH input[dW*(i-1)+s)][dH*(j-1)+t][k]
+</file>
+
+==== SpatialZeroPadding ====
+{{anchor:nn.SpatialZeroPadding}}
+
+<file lua>
+module = nn.SpatialZeroPadding(padLeft, padRight, padTop, padBottom)
+</file>
+
+Each feature map of a given input is padded with specified number of
+zeros. If padding values are negative, then input is cropped.
+
+==== SpatialSubtractiveNormalization ====
+{{anchor:nn.SpatialSubtractiveNormalization}}
+
+<file lua>
+module = nn.SpatialSubtractiveNormalization(ninputplane, kernel)
+</file>
+
+Applies a spatial subtraction operation on a series of 2D inputs using
+''kernel'' for computing the weighted average in a neighborhood. The
+neighborhood is defined for a local spatial region that is the size as
+kernel and across all features. For a an input image, since there is
+only one feature, the region is only spatial. For an RGB image, the
+weighted anerage is taken over RGB channels and a spatial region.
+
+If the ''kernel'' is 1D, then it will be used for constructing and seperable
+2D kernel. The operations will be much more efficient in this case.
+
+The kernel is generally chosen as a gaussian when it is believed that
+the correlation of two pixel locations decrease with increasing
+distance. On the feature dimension, a uniform average is used since
+the weighting across features is not known.
+
+For this example we use an external package
+[[http://www.github.com/clementfarabet/lua---image/|image]]
+
+<file lua>
+require 'image'
+require 'nn'
+lena = image.rgb2y(image.lena())
+ker = lab.ones(11)
+m=nn.SpatialSubtractiveNormalization(1,ker)
+processed = m:forward(lena)
+w1=image.display(lena)
+w2=image.display(processed)
+</file>
+{{lena.jpg?300}}{{lenap.jpg?300}}
+
+====  TemporalConvolution ====
+{{anchor:nn.TemporalConvolution}}
+
+<file lua>
+module = nn.TemporalConvolution(inputFrameSize, outputFrameSize, kW, [dW])
+</file>
+
+Applies a 1D convolution over an input sequence composed of ''nInputFrame'' frames. The ''input'' tensor in
+''forward(input)'' is expected to be a 2D tensor (''nInputFrame x inputFrameSize'').
+
+The parameters are the following:
+  * ''inputFrameSize'': The input frame size expected in sequences given into ''forward()''.
+  * ''outputFrameSize'': The output frame size the convolution layer will produce.
+  * ''kW'': The kernel width of the convolution
+  * ''dW'': The step of the convolution. Default is ''1''.
+
+Note that depending of the size of your kernel, several (of the last)
+frames of the sequence might be lost. It is up to the user to add proper padding frames in the input
+sequences.
+
+If the input sequence is a 2D tensor ''inputFrameSize x nInputFrame'', the output sequence will be
+''nOutputFrame x outputFrameSize'' where
+<file lua>
+nOutputFrame = (nInputFrame - kW) / dW + 1
+</file>
+
+The parameters of the convolution can be found in ''self.weight'' (Tensor of
+size ''outputFrameSize x (inputFrameSize x kW) '') and ''self.bias'' (Tensor of
+size ''outputFrameSize''). The corresponding gradients can be found in
+''self.gradWeight'' and ''self.gradBias''.
+
+The output value of the layer can be precisely described as:
+<file lua>
+output[i][t] = bias[i]
+  + sum_j sum_{k=1}^kW weight[j][k][i]
+                                * input[j][dW*(t-1)+k)]
+</file>
+
+Here is a simple example:
+
+<file lua>
+inp=5;  -- dimensionality of one sequence element 
+outp=1; -- number of derived features for one sequence element
+kw=1;   -- kernel only operates on one sequence element at once
+dw=1;   -- we step once and go on to the next sequence element
+
+mlp=nn.TemporalConvolution(inp,outp,kw,dw)
+
+require "lab"
+x=lab.rand(7,inp) -- a sequence of 7 elements
+print(mlp:forward(x))
+</file>
+which gives:
+<file lua>
+-0.9109
+-0.9872
+-0.6808
+-0.9403
+-0.9680 
+-0.6901 
+-0.6387
+[torch.Tensor of dimension 7x1]
+</file>
+
+This is equivalent to:
+<file lua>
+weights=lab.reshape(mlp.weight,inp) -- weights applied to all
+bias= mlp.bias[1];
+for i=1,x:size(1) do -- for each sequence element
+  element= x[i]; -- features of ith sequence element
+  print(element:dot(weights) + bias)
+end
+</file>
+which gives:
+<file lua>
+-0.91094998687717
+-0.98721705771773
+-0.68075004276185
+-0.94030132495887
+-0.96798754116609
+-0.69008470895581
+-0.63871422284166
+</file>
+
+
+====  TemporalSubSampling ====
+{{anchor:nn.TemporalSubSampling}}
+
+<file lua>
+module = nn.TemporalSubSampling(inputFrameSize, kW, [dW])
+</file>
+
+Applies a 1D sub-sampling over an input sequence composed of ''nInputFrame'' frames. The ''input'' tensor in
+''forward(input)'' is expected to be a 2D tensor (''nInputFrame x inputFrameSize''). The output frame size
+will be the same as the input one (''inputFrameSize'').
+
+The parameters are the following:
+  * ''inputFrameSize'': The input frame size expected in sequences given into ''forward()''.
+  * ''kW'': The kernel width of the sub-sampling
+  * ''dW'': The step of the sub-sampling. Default is ''1''.
+
+Note that depending of the size of your kernel, several (of the last)
+frames of the sequence might be lost. It is up to the user to add proper padding frames in the input
+sequences.
+
+If the input sequence is a 2D tensor ''nInputFrame x inputFrameSize'', the output sequence will be
+''inputFrameSize x nOutputFrame'' where
+<file lua>
+nOutputFrame = (nInputFrame - kW) / dW + 1
+</file>
+
+The parameters of the sub-sampling can be found in ''self.weight'' (Tensor of
+size ''inputFrameSize'') and ''self.bias'' (Tensor of
+size ''inputFrameSize''). The corresponding gradients can be found in
+''self.gradWeight'' and ''self.gradBias''.
+
+The output value of the layer can be precisely described as:
+<file lua>
+output[i][t] = bias[i] + weight[i] * sum_{k=1}^kW input[i][dW*(t-1)+k)]
+</file>
+
+====  LookupTable ====
+{{anchor:nn.LookupTable}}
+
+<file lua>
+module = nn.LookupTable(nIndex, sizes)
+</file>
+or
+<file lua>
+module = nn.LookupTable(nIndex, size1, [size2], [size3], ...)
+</file>
+
+This layer is a particular case of a convolution, where the width of the convolution would be ''1''.
+When calling ''forward(input)'', it assumes ''input'' is a 1D tensor filled with indices. Indices start
+at ''1'' and can go up to ''nIndex''. For each index, it outputs a corresponding ''Tensor'' of size
+specified by ''sizes'' (an ''LongStorage'') or ''size1 x size2 x...''.
+
+The output tensors are concatenated, generating a ''size1 x size2 x ... x sizeN x n'' tensor, where ''n''
+is the size of the ''input'' tensor.
+
+When only ''size1'' is provided, this is equivalent to do the following matrix-matrix multiplication
+in an efficient manner:
+<file lua>
+M P
+</file>
+where ''M'' is a 2D matrix ''size1 x nIndex'' containing the parameters of the lookup-table and
+''P'' is a 2D matrix, where each column vector ''i'' is a zero vector except at index ''input[i]'' where it is ''1''.
+
+Example:
+<file lua>
+ -- a lookup table containing 10 tensors of size 3
+ module = nn.LookupTable(10, 3) 
+
+ input = torch.Tensor(4)
+ input[1] = 1; input[2] = 2; input[3] = 1; input[4] = 10;
+ print(module:forward(input))
+</file>
+
+Outputs something like:
+<file lua>
+-0.1784  2.2045 -0.1784 -0.2475
+-1.0120  0.0537 -1.0120 -0.2148
+-1.2840  0.8685 -1.2840 -0.2792
+[torch.Tensor of dimension 3x4]
+</file>
+Note that the first column vector is the same than the 3rd one!
+
+=====  Layers for manipulating tables =====
+{{anchor:nn.TableLayers}}
+
+This set of modules allows the manipulation of  Tables
+through the layers of a neural network.
+This allows one to build very rich architectures.
+
+Table-based modules work by supporting forward and backward methods that can accept 
+tables as inputs. It turns out that the usual [[#nn.Sequential|Sequential]] module can do this, so all that is needed is other child modules that take advantage of such tables.
+<file lua>
+mlp = nn.Sequential();
+t={x,y,z}
+pred=mlp:forward(t)
+pred=mlp:forward{x,y,z}      -- This is equivalent to the line before
+</file>
+
+====  ConcatTable  ====
+{{anchor:nn.ConcatTable}}
+
+ConcatTable is a container module that applies each member module to 
+the same input Tensor.
+
+Example:
+<file lua>
+mlp= nn.ConcatTable()
+mlp:add(nn.Linear(5,2))
+mlp:add(nn.Linear(5,3))
+
+require "lab"
+pred=mlp:forward(lab.randn(5));
+for i,k in pairs(pred) do print(i,k); end
+</file>
+which gives the output:
+<file lua>
+1
+-0.4073
+ 0.0110
+[torch.Tensor of dimension 2]
+
+2
+ 0.0027
+-0.0598
+-0.1189
+[torch.Tensor of dimension 3] 
+</file>
+
+====  ParallelTable ====
+{{anchor:nn.ParallelTable}}
+
+ParallelTable is a container module that, in its ''forward'' method, applies the ''ith'' member module to the ''ith'' input, and outputs a table of the set of outputs. 
+
+Example:
+<file lua>
+mlp= nn.ParallelTable()
+mlp:add(nn.Linear(10,2))
+mlp:add(nn.Linear(5,3))
+
+require "lab"
+x=lab.randn(10)
+y=lab.rand(5)
+
+pred=mlp:forward{x,y}
+for i,k in pairs(pred) do print(i,k); end
+</file>
+which gives the output:
+<file lua>
+1
+ 0.0331
+ 0.7003
+[torch.Tensor of dimension 2]
+
+2
+ 0.0677
+-0.1657
+-0.7383
+[torch.Tensor of dimension 3]
+</file>
+
+====  SplitTable  ====
+{{anchor:nn.SplitTable}}
+
+''module'' = ''SplitTable(dimension)''
+
+Creates a module that takes a Tensor as input and outputs several tables, splitting the Tensor along dimension ''dimension''.
+
+Example 1:
+<file lua>
+require "lab"
+mlp=nn.SplitTable(2)
+x=lab.randn(4,3)
+pred=mlp:forward(x)
+for i,k in pairs(pred) do print(i,k); end
+</file>
+gives the output:
+<file lua>
+1
+ 1.3885
+ 1.3295
+ 0.4281
+-1.0171
+[torch.Tensor of dimension 4]
+
+2
+-1.1565
+-0.8556
+-1.0717
+-0.8316
+[torch.Tensor of dimension 4]
+
+3
+-1.3678
+-0.1709
+-0.0191
+-2.5871
+[torch.Tensor of dimension 4]
+</file>
+
+Example 2:
+<file lua>
+require "lab"
+mlp=nn.SplitTable(1)
+pred=mlp:forward(lab.randn(10,3))
+for i,k in pairs(pred) do print(i,k); end
+</file>
+gives the output:
+<file lua>
+1
+ 1.6114
+ 0.9038
+ 0.8419
+[torch.Tensor of dimension 3]
+
+2
+ 2.4742
+ 0.2208
+ 1.6043
+[torch.Tensor of dimension 3]
+
+3
+ 1.3415
+ 0.2984
+ 0.2260
+[torch.Tensor of dimension 3]
+
+4
+ 2.0889
+ 1.2309
+ 0.0983
+[torch.Tensor of dimension 3]
+</file>
+
+A more complicated example:
+<file lua>
+require "lab"
+
+mlp=nn.Sequential();       --Create a network that takes a Tensor as input
+mlp:add(nn.SplitTable(2))
+ c=nn.ParallelTable()      --The two Tensors go through two different Linear
+ c:add(nn.Linear(10,3))	   --Layers in Parallel
+ c:add(nn.Linear(10,7))
+mlp:add(c)                 --Outputing a table with 2 elements
+ p=nn.ParallelTable()      --These tables go through two more linear layers
+ p:add(nn.Linear(3,2))	   -- separately.
+ p:add(nn.Linear(7,1)) 
+mlp:add(p) 
+mlp:add(nn.JoinTable(1))   --Finally, the tables are joined together and output. 
+
+pred=mlp:forward(lab.randn(10,2))
+print(pred)
+
+for i=1,100 do             -- A few steps of training such a network.. 
+ x=lab.ones(10,2);
+ y=torch.Tensor(3); y:copy(x:select(2,1,1):narrow(1,1,3))
+ pred=mlp:forward(x)
+
+ criterion= nn.MSECriterion()
+ local err=criterion:forward(pred,y)
+ local gradCriterion = criterion:backward(pred,y);
+ mlp:zeroGradParameters();
+ mlp:backward(x, gradCriterion); 
+ mlp:updateParameters(0.05);
+
+ print(err)
+end
+</file>
+
+====  JoinTable   ====
+{{anchor:nn.JoinTable}}
+
+''module'' = ''JoinTable(dimension)''
+
+Creates a module that takes a list of Tensors as input and outputs a Tensor by joining them together along dimension ''dimension''.
+
+Example:
+<file lua>
+require "lab"
+x=lab.randn(5,1)
+y=lab.randn(5,1)
+z=lab.randn(2,1)
+
+print(nn.JoinTable(1):forward{x,y})
+print(nn.JoinTable(2):forward{x,y})
+print(nn.JoinTable(1):forward{x,z})
+</file>
+gives the output:
+<file lua>
+1.3965
+ 0.5146
+-1.5244
+-0.9540
+ 0.4256
+ 0.1575
+ 0.4491
+ 0.6580
+ 0.1784
+-1.7362
+ 
+ 1.3965  0.1575
+ 0.5146  0.4491
+-1.5244  0.6580
+-0.9540  0.1784
+ 0.4256 -1.7362
+
+ 1.3965
+ 0.5146
+-1.5244
+-0.9540
+ 0.4256
+-1.2660
+ 1.0869
+[torch.Tensor of dimension 7x1]
+</file>
+
+A more complicated example:
+<file lua>
+require "lab"
+
+mlp=nn.Sequential();       --Create a network that takes a Tensor as input
+ c=nn.ConcatTable()        --The same Tensor goes through two different Linear
+ c:add(nn.Linear(10,3))	   --Layers in Parallel
+ c:add(nn.Linear(10,7))
+mlp:add(c)                 --Outputing a table with 2 elements
+ p=nn.ParallelTable()      --These tables go through two more linear layers
+ p:add(nn.Linear(3,2))	   -- separately.
+ p:add(nn.Linear(7,1)) 
+mlp:add(p) 
+mlp:add(nn.JoinTable(1))   --Finally, the tables are joined together and output. 
+
+pred=mlp:forward(lab.randn(10))
+print(pred)
+
+for i=1,100 do             -- A few steps of training such a network.. 
+ x=lab.ones(10);
+ y=torch.Tensor(3); y:copy(x:narrow(1,1,3))
+ pred=mlp:forward(x)
+
+ criterion= nn.MSECriterion()
+ local err=criterion:forward(pred,y)
+ local gradCriterion = criterion:backward(pred,y);
+ mlp:zeroGradParameters();
+ mlp:backward(x, gradCriterion); 
+ mlp:updateParameters(0.05);
+
+ print(err)
+end
+</file>
+
+====  Identity  ====
+{{anchor:nn.Identity}}
+
+''module'' = ''Identity()''
+
+Creates a module that returns whatever is input to it as output. 
+This is useful when combined with the module 
+[[#nn.ParallelTable|ParallelTable]]
+in case you do not wish to do anything to one of the input Tensors.
+Example:
+<file lua>
+require "lab"
+mlp=nn.Identity()
+print(mlp:forward(lab.ones(5,2)))
+</file>
+gives the output: 
+<file lua>
+ 1  1
+ 1  1
+ 1  1
+ 1  1
+ 1  1
+[torch.Tensor of dimension 5x2]
+</file>
+
+Here is a more useful example, where one can implement a network which also computes a Criterion using this module:
+<file lua> 
+pred_mlp=nn.Sequential(); -- A network that makes predictions given x.
+pred_mlp:add(nn.Linear(5,4)) 
+pred_mlp:add(nn.Linear(4,3)) 
+
+xy_mlp=nn.ParallelTable();-- A network for predictions and for keeping the
+xy_mlp:add(pred_mlp)      -- true label for comparison with a criterion
+xy_mlp:add(nn.Identity()) -- by forwarding both x and y through the network.
+
+mlp=nn.Sequential();     -- The main network that takes both x and y.
+mlp:add(xy_mlp)		 -- It feeds x and y to parallel networks;
+cr=nn.MSECriterion();
+cr_wrap=nn.CriterionTable(cr)
+mlp:add(cr_wrap)         -- and then applies the criterion.
+
+for i=1,100 do 		 -- Do a few training iterations
+  x=lab.ones(5);          -- Make input features.
+  y=torch.Tensor(3); 
+  y:copy(x:narrow(1,1,3)) -- Make output label.
+  err=mlp:forward{x,y}    -- Forward both input and output.
+  print(err)		 -- Print error from criterion.
+
+  mlp:zeroGradParameters();  -- Do backprop... 
+  mlp:backward({x, y} );   
+  mlp:updateParameters(0.05); 
+end
+</file>
+
+====  PairwiseDistance  ====
+{{anchor:nn.PairwiseDistance}}
+
+''module'' = ''PairwiseDistance(p)'' creates a module that takes a table of two vectors as input and outputs the distance between them using the ''p''-norm. 
+
+Example:
+<file lua>
+mlp_l1=nn.PairwiseDistance(1)
+mlp_l2=nn.PairwiseDistance(2)
+x=lab.new(1,2,3) 
+y=lab.new(4,5,6)
+print(mlp_l1:forward({x,y}))
+print(mlp_l2:forward({x,y}))
+</file>
+gives the output:
+<file lua>
+ 9
+[torch.Tensor of dimension 1]
+
+ 5.1962
+[torch.Tensor of dimension 1]
+</file>
+
+A more complicated example:
+<file lua>
+-- imagine we have one network we are interested in, it is called "p1_mlp"
+p1_mlp= nn.Sequential(); p1_mlp:add(nn.Linear(5,2))
+
+-- But we want to push examples towards or away from each other
+-- so we make another copy of it called p2_mlp
+-- this *shares* the same weights via the set command, but has its own set of temporary gradient storage
+-- that's why we create it again (so that the gradients of the pair don't wipe each other)
+p2_mlp= nn.Sequential(); p2_mlp:add(nn.Linear(5,2))
+p2_mlp:get(1).weight:set(p1_mlp:get(1).weight)
+p2_mlp:get(1).bias:set(p1_mlp:get(1).bias)
+
+-- we make a parallel table that takes a pair of examples as input. they both go through the same (cloned) mlp
+prl = nn.ParallelTable()
+prl:add(p1_mlp)
+prl:add(p2_mlp)
+
+-- now we define our top level network that takes this parallel table and computes the pairwise distance betweem
+-- the pair of outputs
+mlp= nn.Sequential()
+mlp:add(prl)
+mlp:add(nn.PairwiseDistance(1))
+
+-- and a criterion for pushing together or pulling apart pairs
+crit=nn.HingeEmbeddingCriterion(1)
+
+-- lets make two example vectors
+x=lab.rand(5)
+y=lab.rand(5)
+
+
+-- Use a typical generic gradient update function
+function gradUpdate(mlp, x, y, criterion, learningRate)
+local pred = mlp:forward(x)
+local err = criterion:forward(pred, y)
+local gradCriterion = criterion:backward(pred, y)
+mlp:zeroGradParameters()
+mlp:backward(x, gradCriterion)
+mlp:updateParameters(learningRate)
+end
+
+-- push the pair x and y together, notice how then the distance between them given
+-- by  print(mlp:forward({x,y})[1]) gets smaller
+for i=1,10 do
+gradUpdate(mlp,{x,y},1,crit,0.01)
+print(mlp:forward({x,y})[1])
+end
+
+
+-- pull apart the pair x and y, notice how then the distance between them given
+-- by  print(mlp:forward({x,y})[1]) gets larger
+
+for i=1,10 do
+gradUpdate(mlp,{x,y},-1,crit,0.01)
+print(mlp:forward({x,y})[1])
+end
+
+</file>
+
+====  DotProduct ====
+{{anchor:nn.DotProduct}}
+
+''module'' = ''DotProduct()'' creates a module that takes a table of two vectors as input and outputs the dot product between them.
+
+Example:
+<file lua>
+mlp=nn.DotProduct()
+x=lab.new(1,2,3) 
+y=lab.new(4,5,6)
+print(mlp:forward({x,y}))
+</file>
+gives the output:
+<file lua>
+ 32
+[torch.Tensor of dimension 1]
+</file>
+
+
+A more complicated example:
+<file lua>
+
+-- Train a ranking function so that mlp:forward({x,y},{x,z}) returns a number
+-- which indicates whether x is better matched with y or z (larger score = better match), or vice versa.
+
+mlp1=nn.Linear(5,10)
+mlp2=mlp1:clone('weight','bias')
+
+prl=nn.ParallelTable();
+prl:add(mlp1); prl:add(mlp2)
+
+mlp1=nn.Sequential()
+mlp1:add(prl)
+mlp1:add(nn.DotProduct())
+
+mlp2=mlp1:clone('weight','bias')
+
+mlp=nn.Sequential()
+prla=nn.ParallelTable()
+prla:add(mlp1)
+prla:add(mlp2)
+mlp:add(prla)
+
+x=lab.rand(5); 
+y=lab.rand(5)
+z=lab.rand(5)
+
+
+print(mlp1:forward{x,x})
+print(mlp1:forward{x,y})
+print(mlp1:forward{y,y})
+
+
+crit=nn.MarginRankingCriterion(1); 
+
+-- Use a typical generic gradient update function
+function gradUpdate(mlp, x, y, criterion, learningRate)
+   local pred = mlp:forward(x)
+   local err = criterion:forward(pred, y)
+   local gradCriterion = criterion:backward(pred, y)
+   mlp:zeroGradParameters()
+   mlp:backward(x, gradCriterion)
+   mlp:updateParameters(learningRate)
+end
+
+inp={{x,y},{x,z}}
+
+math.randomseed(1)
+
+-- make the pair x and y have a larger dot product than x and z
+
+for i=1,100 do
+   gradUpdate(mlp,inp,1,crit,0.05)
+   o1=mlp1:forward{x,y}[1]; 
+   o2=mlp2:forward{x,z}[1]; 
+   o=crit:forward(mlp:forward{{x,y},{x,z}},1)
+   print(o1,o2,o)
+end
+
+print "******************"
+
+-- make the pair x and z have a larger dot product than x and y
+
+for i=1,100 do
+   gradUpdate(mlp,inp,-1,crit,0.05)
+   o1=mlp1:forward{x,y}[1]; 
+   o2=mlp2:forward{x,z}[1]; 
+   o=crit:forward(mlp:forward{{x,y},{x,z}},-1)
+   print(o1,o2,o)
+end
+</file>
+
+
+====  CosineDistance  ====
+{{anchor:nn.CosineDistance}}
+
+''module'' = ''CosineDistance()'' creates a module that takes a table of two vectors as input and outputs the cosine distance between them.
+
+Example:
+<file lua>
+mlp=nn.CosineDistance()
+x=lab.new(1,2,3) 
+y=lab.new(4,5,6)
+print(mlp:forward({x,y}))
+</file>
+gives the output:
+<file lua>
+ 0.9746
+[torch.Tensor of dimension 1]
+</file>
+
+A more complicated example:
+<file lua>
+
+-- imagine we have one network we are interested in, it is called "p1_mlp"
+p1_mlp= nn.Sequential(); p1_mlp:add(nn.Linear(5,2))
+
+-- But we want to push examples towards or away from each other
+-- so we make another copy of it called p2_mlp
+-- this *shares* the same weights via the set command, but has its own set of temporary gradient storage
+-- that's why we create it again (so that the gradients of the pair don't wipe each other)
+p2_mlp= p1_mlp:clone('weight','bias')
+
+-- we make a parallel table that takes a pair of examples as input. they both go through the same (cloned) mlp
+prl = nn.ParallelTable()
+prl:add(p1_mlp)
+prl:add(p2_mlp)
+
+-- now we define our top level network that takes this parallel table and computes the cosine distance betweem
+-- the pair of outputs
+mlp= nn.Sequential()
+mlp:add(prl)
+mlp:add(nn.CosineDistance())
+
+
+-- lets make two example vectors
+x=lab.rand(5)
+y=lab.rand(5)
+
+-- Grad update function..
+function gradUpdate(mlp, x, y, learningRate)
+local pred = mlp:forward(x)
+if pred[1]*y < 1 then
+ gradCriterion=lab.new(-y)
+ mlp:zeroGradParameters()
+ mlp:backward(x, gradCriterion)
+ mlp:updateParameters(learningRate)
+end
+end
+
+-- push the pair x and y together, the distance should get larger..
+for i=1,1000 do
+ gradUpdate(mlp,{x,y},1,0.1)
+ if ((i%100)==0) then print(mlp:forward({x,y})[1]);end
+end
+
+
+-- pull apart the pair x and y, the distance should get smaller..
+
+for i=1,1000 do
+ gradUpdate(mlp,{x,y},-1,0.1)
+ if ((i%100)==0) then print(mlp:forward({x,y})[1]);end
+end
+</file>
+
+
+
+====  CriterionTable  ====
+{{anchor:nn.CriterionTable}}
+
+''module'' = ''CriterionTable(criterion)''
+
+Creates a module that wraps a Criterion module so that it can accept a Table of inputs. Typically the table would contain two elements: the input and output ''x'' and ''y'' that the Criterion compares.
+
+Example:
+<file lua>
+mlp = nn.CriterionTable(nn.MSECriterion())
+require "lab"
+x=lab.randn(5)
+y=lab.randn(5)
+print(mlp:forward{x,x})
+print(mlp:forward{x,y})
+</file>
+gives the output:
+<file lua>
+0
+1.9028918413199
+</file>
+
+Here is a more complex example of embedding the criterion into a network:
+<file lua>
+require "lab"
+
+function table.print(t)
+ for i,k in pairs(t) do print(i,k); end
+end
+ 
+mlp=nn.Sequential();                          -- Create an mlp that takes input
+  main_mlp=nn.Sequential();		      -- and output using ParallelTable      
+  main_mlp:add(nn.Linear(5,4)) 
+  main_mlp:add(nn.Linear(4,3))
+ cmlp=nn.ParallelTable(); 
+ cmlp:add(main_mlp)
+ cmlp:add(nn.Identity())           
+mlp:add(cmlp)
+mlp:add(nn.CriterionTable(nn.MSECriterion())) -- Apply the Criterion
+
+for i=1,20 do                                 -- Train for a few iterations
+ x=lab.ones(5);
+ y=torch.Tensor(3); y:copy(x:narrow(1,1,3))
+ err=mlp:forward{x,y}                         -- Pass in both input and output
+ print(err)
+
+ mlp:zeroGradParameters();
+ mlp:backward({x, y} );   
+ mlp:updateParameters(0.05); 
+end
+</file>
+
+==== CAddTable ====
+{{anchor:nn.CAddTable}}
+
+Takes a table of tensors and outputs summation of all tensors.
+
+<file lua>
+ii = {lab.ones(5),lab.ones(5)*2,lab.ones(5)*3}
+=ii[1]
+ 1
+ 1
+ 1
+ 1
+ 1
+[torch.DoubleTensor of dimension 5]
+
+return ii[2]
+ 2
+ 2
+ 2
+ 2
+ 2
+[torch.DoubleTensor of dimension 5]
+
+return ii[3]
+ 3
+ 3
+ 3
+ 3
+ 3
+[torch.DoubleTensor of dimension 5]
+
+m=nn.CAddTable()
+=m:forward(ii)
+ 6
+ 6
+ 6
+ 6
+ 6
+[torch.DoubleTensor of dimension 5]
+
+
+==== CSubTable ====
+{{anchor:nn.CSubTable}}
+
+Takes a table with two tensor and returns the component-wise
+subtraction between them.
+
+<file lua>
+m=nn.CSubTable()
+=m:forward({lab.ones(5)*2.2,lab.ones(5)})
+ 1.2000
+ 1.2000
+ 1.2000
+ 1.2000
+ 1.2000
+[torch.DoubleTensor of dimension 5]
+</file>
+
+==== CMulTable ====
+{{anchor:nn.CMulTable}}
+
+Takes a table of tensors and outputs the multiplication of all of them.
+
+<file lua>
+ii = {lab.ones(5)*2,lab.ones(5)*3,lab.ones(5)*4}
+m=nn.CMulTable()
+=m:forward(ii)
+ 24
+ 24
+ 24
+ 24
+ 24
+[torch.DoubleTensor of dimension 5]
+
+</file>
+
+==== CDivTable ====
+{{anchor:nn.CDivTable}}
+
+Takes a table with two tensor and returns the component-wise
+division between them.
+
+<file lua>
+m=nn.CDivTable()
+=m:forward({lab.ones(5)*2.2,lab.ones(5)*4.4})
+ 0.5000
+ 0.5000
+ 0.5000
+ 0.5000
+ 0.5000
+[torch.DoubleTensor of dimension 5]
+</file>
+
+======  Criterions ======
+{{anchor:nn.Criterions}}
+
+Criterions are helpful to train a neural network. Given an input and a
+target, they compute a gradient according to a given loss
+function. [[#nn.AbsCriterion|AbsCriterion]] and
+[[#nn.MSECriterion|MSECriterion]] are perfect for regression problems, while
+[[#nn.ClassNLLCriterion|ClassNLLCriterion]] is the criterion of choice when
+dealing with classification.
+
+Criterions are [[..:torch:file#torch.file.serialization|serializable]].
+
+=====  Criterion =====
+{{anchor:nn.Criterion}}
+
+This is an abstract class which declares methods defined in all criterions.
+This class is [[..:torch:file#torch.file.serialization|serializable]].
+
+====  [output] forward(input, target) ====
+{{anchor:nn.Criterion.forward}}
+
+Given an ''input'' and a ''target'', compute the loss function associated to the criterion and return the
+result. In general ''input'' and ''target'' are [[..:torch:tensor|tensors]], but some specific criterions
+might require some other type of object.
+
+The ''output'' returned should be a scalar in general.
+
+The state variable [[#nn.Criterion.output|self.output]] should be updated after a call to ''forward()''.
+
+====  [gradInput] backward(input, target) ====
+{{anchor:nn.Criterion.backward}}
+
+Given an ''input'' and a ''target'', compute the gradients of the loss function associated to the criterion and
+return the result.In general ''input'', ''target'' and ''gradInput'' are [[..:torch:tensor|tensors]], but some specific criterions
+might require some other type of object.
+
+The state variable [[#nn.Criterion.gradInput|self.gradInput]] should be updated after a call to ''backward()''.
+
+====  State variable: output ====
+{{anchor:nn.Criterion.output}}
+
+State variable which contains the result of the last [[#nn.Criterion.forward|forward(input, target)]] call.
+
+====  State variable: gradInput ====
+{{anchor:nn.Criterion.gradInput}}
+
+State variable which contains the result of the last [[#nn.Criterion.backward|backward(input, target)]] call.
+
+=====  AbsCriterion =====
+{{anchor:nn.AbsCriterion}}
+
+<file lua>
+criterion = AbsCriterion()
+</file>
+
+Creates a criterion that
+measures the mean absolute value between ''n'' elements in the input ''x'' 
+and output ''y'':
+
+''loss(x,y)''  = ''1/n \sum |x_i-y_i|''.
+
+If ''x'' and ''y'' are ''d''-dimensional Tensors with a total of ''n'' elements,
+the sum operation still operates over all the elements, and divides by ''n''.
+
+The division by ''n'' can be avoided if one sets the internal variable ''sizeAverage'' to ''false'':
+<file lua>
+criterion = nn.AbsCriterion()
+criterion.sizeAverage = false
+</file>
+
+=====  ClassNLLCriterion =====
+{{anchor:nn.ClassNLLCriterion}}
+
+<file lua>
+criterion = ClassNLLCriterion()
+</file>
+
+The negative log likelihood criterion. It is useful to train a classication
+problem with ''n'' classes. The ''input'' given through a ''forward()'' is
+expected to contain //log-probabilities// of each class: ''input'' has to be a
+1D tensor of size ''n''. Obtaining log-probabilities in a neural network is
+easily achieved by adding a [[#nn.LogSoftMax|LogSoftMax]] layer in the last
+layer of your neural network.
+
+This criterion expect a class index (1 to the number of class) as ''target''
+when calling [[#nn.CriterionForward|forward(input, target)]] and
+[[#nn.CriterionBackward|backward(input, target)]].
+
+The loss can be described as:
+<file lua>
+loss(x, class) = forward(x, class) = -x[class]
+</file>
+
+The following is a code fragment showing how to make a gradient step 
+given an input ''x'', a desired output ''y'' (an integer ''1'' to ''n'', 
+in this case ''n'' = ''2'' classes), 
+a network ''mlp'' and a learning rate ''learningRate'':
+<file lua>
+function gradUpdate(mlp,x,y,learningRate)
+  local criterion = nn.ClassNLLCriterion()
+  pred = mlp:forward(x)
+  local err = criterion:forward(pred, y); 
+  mlp:zeroGradParameters();
+  local t = criterion:backward(pred, y);
+  mlp:backward(x, t);
+  mlp:updateParameters(learningRate);
+end
+</file>
+
+=====  MarginCriterion =====
+{{anchor:nn.MarginCriterion}}
+
+<file lua>
+criterion = MarginCriterion()
+</file>
+
+Creates a criterion that optimizes a two-class classification hinge loss (margin-based loss) between input ''x''  (a Tensor of dimension 1) and output ''y'' (which is a scalar, either 1 or -1) :
+
+<file lua>
+loss(x,y) = forward(x,y) = max(0,m- y x).
+</file>
+
+''m'' is the margin, which is by default 1.
+
+<file lua>
+criterion = MarginCriterion(marginValue)
+</file>
+
+sets a different value of ''m''.
+
+
+Example:
+<file lua>
+require "nn"
+require "lab"
+
+function gradUpdate(mlp, x, y, criterion, learningRate)
+  local pred = mlp:forward(x)
+  local err = criterion:forward(pred, y)
+  local gradCriterion = criterion:backward(pred, y)
+  mlp:zeroGradParameters()
+  mlp:backward(x, gradCriterion)
+  mlp:updateParameters(learningRate)
+end
+
+mlp=nn.Sequential()
+mlp:add(nn.Linear(5,1))
+
+x1=lab.rand(5)
+x2=lab.rand(5)
+criterion=nn.MarginCriterion(1)
+
+for i=1,1000 do
+    gradUpdate(mlp,x1,1,criterion,0.01)
+    gradUpdate(mlp,x2,-1,criterion,0.01)
+end
+
+print(mlp:forward(x1))
+print(mlp:forward(x2))
+
+print(criterion:forward(mlp:forward(x1),1))
+print(criterion:forward(mlp:forward(x2),-1))
+</file>
+gives the output:
+<file lua>
+ 1.0043
+[torch.Tensor of dimension 1]
+
+
+-1.0061
+[torch.Tensor of dimension 1]
+
+0
+0
+</file>
+i.e. the mlp successfully separates the two data points such that they both have a margin of 1, and hence a loss of 0.
+
+=====  MSECriterion =====
+{{anchor:nn.MSECriterion}}
+
+<file lua>
+criterion = MSECriterion()
+</file>
+
+Creates a criterion that measures the mean squared error between ''n'' elements in the input ''x'' 
+and output ''y'':
+
+<file lua>
+loss(x,y) = forward(x,y) = 1/n \sum |x_i-y_i|^2 .
+</file>
+
+If ''x'' and ''y'' are ''d''-dimensional Tensors with a total of ''n'' elements,
+the sum operation still operates over all the elements, and divides by ''n''. The two tensors must
+have the same number of elements (but their sizes might be different...)
+
+The division by ''n'' can be avoided if one sets the internal variable ''sizeAverage'' to ''false'':
+<file lua>
+criterion = nn.MSECriterion()
+criterion.sizeAverage = false
+</file>
+
+=====  MultiCriterion =====
+{{anchor:nn.MultiCriterion}}
+
+<file lua>
+criterion = MultiCriterion()
+</file>
+
+This returns a Criterion which is a weighted sum of other Criterion. 
+Criterions are added using the method:
+
+''criterion:add(singleCriterion, weight)''
+
+where ''weight'' is a scalar.
+
+
+=====  HingeEmbeddingCriterion =====
+{{anchor:nn.HingeEmbeddingCriterion}}
+
+<file lua>
+criterion = HingeEmbeddingCriterion()
+</file>
+
+Creates a criterion that measures the loss given  an input
+''x'' which is a 1-dimensional vector and a label ''y'' (1 or -1).
+This is usually used for measuring whether two inputs are similar
+or dissimilar, e.g. using the L1 pairwise distance, 
+and is typically used for
+learning nonlinear embeddings or semi-supervised learning.
+
+<verbatim> 
+loss(x,y) = forward(x,y) = x, if y=1
+= max(0,margin - x), if y=-1
+</verbatim>
+
+The ''margin'' has a default value of 1, or can be set in the constructor:
+<file lua>
+criterion = HingeEmbeddingCriterion(marginValue)
+</file>
+
+Example use:
+<file lua>
+-- imagine we have one network we are interested in, it is called "p1_mlp"
+p1_mlp= nn.Sequential(); p1_mlp:add(nn.Linear(5,2))
+
+-- But we want to push examples towards or away from each other
+-- so we make another copy of it called p2_mlp
+-- this *shares* the same weights via the set command, but has its own set of temporary gradient storage
+-- that's why we create it again (so that the gradients of the pair don't wipe each other)
+p2_mlp= nn.Sequential(); p2_mlp:add(nn.Linear(5,2))
+p2_mlp:get(1).weight:set(p1_mlp:get(1).weight)
+p2_mlp:get(1).bias:set(p1_mlp:get(1).bias)
+
+-- we make a parallel table that takes a pair of examples as input. they both go through the same (cloned) mlp
+prl = nn.ParallelTable()
+prl:add(p1_mlp)
+prl:add(p2_mlp)
+
+-- now we define our top level network that takes this parallel table and computes the pairwise distance betweem
+-- the pair of outputs
+mlp= nn.Sequential()
+mlp:add(prl)
+mlp:add(nn.PairwiseDistance(1))
+
+-- and a criterion for pushing together or pulling apart pairs
+crit=nn.HingeEmbeddingCriterion(1)
+
+-- lets make two example vectors
+x=lab.rand(5)
+y=lab.rand(5)
+
+
+-- Use a typical generic gradient update function
+function gradUpdate(mlp, x, y, criterion, learningRate)
+local pred = mlp:forward(x)
+local err = criterion:forward(pred, y)
+local gradCriterion = criterion:backward(pred, y)
+mlp:zeroGradParameters()
+mlp:backward(x, gradCriterion)
+mlp:updateParameters(learningRate)
+end
+
+-- push the pair x and y together, notice how then the distance between them given
+-- by  print(mlp:forward({x,y})[1]) gets smaller
+for i=1,10 do
+gradUpdate(mlp,{x,y},1,crit,0.01)
+print(mlp:forward({x,y})[1])
+end
+
+
+-- pull apart the pair x and y, notice how then the distance between them given
+-- by  print(mlp:forward({x,y})[1]) gets larger
+
+for i=1,10 do
+gradUpdate(mlp,{x,y},-1,crit,0.01)
+print(mlp:forward({x,y})[1])
+end
+
+</file>
+
+=====  L1HingeEmbeddingCriterion =====
+{{anchor:nn.L1HingeEmbeddingCriterion}}
+
+<file lua>
+criterion = L1HingeEmbeddingCriterion(margin)
+</file>
+
+Creates a criterion that measures the loss given  an input
+''x'' = ''{x1,x2}'', a table of two tensors, and a label ''y'' (1 or -1):
+This is used for measuring whether two inputs are similar
+or dissimilar, using the L1 distance, and is typically used for
+learning nonlinear embeddings or semi-supervised learning.
+
+<verbatim> 
+loss(x,y) = forward(x,y) = ||x1-x2||_1, if y=1
+= max(0,margin - ||x1-x2||_1), if y=-1
+</verbatim>
+
+The ''margin'' has a default value of 1, or can be set in the constructor:
+<file lua>
+criterion = L1HingeEmbeddingCriterion(marginValue)
+</file>
+
+=====  CosineEmbeddingCriterion =====
+{{anchor:nn.CosineEmbeddingCriterion}}
+
+<file lua>
+criterion = nn.CosineEmbeddingCriterion(margin)
+</file>
+
+Creates a criterion that measures the loss given  an input
+''x'' = ''{x1,x2}'', a table of two tensors, and a label ''y'' (1 or -1):
+This is used for measuring whether two inputs are similar
+or dissimilar, using the cosine distance, and is typically used for
+learning nonlinear embeddings or semi-supervised learning.
+
+''margin'' should be a number from -1 to 1, 0 to 0.5 is suggested.
+Forward and Backward have to be used alternately. If ''margin'' is missing, the default value is 0.
+
+The loss function is:
+<verbatim> 
+loss(x,y) = forward(x,y) = 1-cos(x1, x2), if y=1
+= max(0,cos(x1, x2)-margin), if y=-1
+</verbatim>
+
+=====  MarginRankingCriterion =====
+{{anchor:nn.MarginRankingCriterion}}
+
+<file lua>
+criterion = nn.MarginRankingCriterion(margin)
+</file>
+
+Creates a criterion that measures the loss given  an input
+''x'' = ''{x1,x2}'', a table of two Tensors of size 1 (they contain only scalars),
+and a label ''y'' (1 or -1):
+
+If ''y'' = ''1'' then it assumed the first input should be ranked higher (have a larger value) 
+than the second input, and vice-versa for ''y'' = ''-1''.
+
+The loss function is:
+<verbatim> 
+loss(x,y) = forward(x,y) = max(0,-y*(x[1]-x[2])+margin)
+</verbatim>
+
+Example:
+<file lua>
+
+p1_mlp= nn.Linear(5,2)
+p2_mlp= p1_mlp:clone('weight','bias')
+
+prl=nn.ParallelTable()
+prl:add(p1_mlp)
+prl:add(p2_mlp)
+  
+mlp1=nn.Sequential()
+mlp1:add(prl)
+mlp1:add(nn.DotProduct())
+ 
+mlp2=mlp1:clone('weight','bias')
+
+mlpa=nn.Sequential()
+prla=nn.ParallelTable()
+prla:add(mlp1)
+prla:add(mlp2)
+mlpa:add(prla)
+
+crit=nn.MarginRankingCriterion(0.1)
+
+x=lab.randn(5)
+y=lab.randn(5)
+z=lab.randn(5)
+
+
+-- Use a typical generic gradient update function
+function gradUpdate(mlp, x, y, criterion, learningRate)
+ local pred = mlp:forward(x)
+ local err = criterion:forward(pred, y)
+ local gradCriterion = criterion:backward(pred, y)
+ mlp:zeroGradParameters()
+ mlp:backward(x, gradCriterion)
+ mlp:updateParameters(learningRate)
+end
+
+for i=1,100 do
+ gradUpdate(mlpa,{{x,y},{x,z}},1,crit,0.01)
+ if true then 
+      o1=mlp1:forward{x,y}[1]; 
+      o2=mlp2:forward{x,z}[1]; 
+      o=crit:forward(mlpa:forward{{x,y},{x,z}},1)
+      print(o1,o2,o)
+  end
+end
+
+print "--"
+
+for i=1,100 do
+ gradUpdate(mlpa,{{x,y},{x,z}},-1,crit,0.01)
+ if true then 
+      o1=mlp1:forward{x,y}[1]; 
+      o2=mlp2:forward{x,z}[1]; 
+      o=crit:forward(mlpa:forward{{x,y},{x,z}},-1)
+      print(o1,o2,o)
+  end
+end
+</file>
+
+======  Training a neural network ======
+{{anchor:nn.traningneuralnet.dok}}
+
+Training a neural network is easy with a [[#nn.DoItYourself|simple ''for'' loop]].
+While doing your own loop provides great flexibility, you might
+want sometimes a quick way of training neural
+networks. [[#nn.StochasticGradient|StochasticGradient]], a simple class
+which does the job for you is provided as standard.
+
+=====  StochasticGradient =====
+{{anchor:nn.StochasticGradient.dok}}
+
+''StochasticGradient'' is a high-level class for training [[#nn.Module|neural networks]], using a stochastic gradient
+algorithm. This class is [[..:torch:file#torch.file.serialization|serializable]].
+
+====  StochasticGradient(module, criterion) ====
+{{anchor:nn.StochasticGradient}}
+
+Create a ''StochasticGradient'' class, using the given [[#nn.Module|Module]] and [[#nn.Criterion|Criterion]].
+The class contains [[#nn.StochasticGradientParameters|several parameters]] you might want to set after initialization.
+
+====  train(dataset) ====
+{{anchor:nn.StochasticGradientTrain}}
+
+Train the module and criterion given in the
+[[#nn.StochasticGradient|constructor]] over ''dataset'', using the
+internal [[#nn.StochasticGradientParameters|parameters]].
+
+StochasticGradient expect as a ''dataset'' an object which implements the operator
+''dataset[index]'' and implements the method ''dataset:size()''. The ''size()'' methods
+returns the number of examples and ''dataset[i]'' has to return the i-th example.
+
+An ''example'' has to be an object which implements the operator
+''example[field]'', where ''field'' might take the value ''1'' (input features)
+or ''2'' (corresponding label which will be given to the criterion). 
+The input is usually a Tensor (except if you use special kind of gradient modules,
+like [[#nn.TableLayers|table layers]]). The label type depends of the criterion.
+For example, the [[#nn.MSECriterion|MSECriterion]] expects a Tensor, but the
+[[#nn.ClassNLLCriterion|ClassNLLCriterion]] except a integer number (the class).
+
+Such a dataset is easily constructed by using Lua tables, but it could any ''C'' object
+for example, as long as required operators/methods are implemented. 
+[[#nn.DoItStochasticGradient|See an example]].
+
+====  Parameters ====
+{{anchor:nn.StochasticGradientParameters}}
+
+''StochasticGradient'' has several field which have an impact on a call to [[#nn.StochasticGradientTrain|train()]].
+
+  * ''learningRate'': This is the learning rate used during training. The update of the parameters will be ''parameters = parameters - learningRate * parameters_gradient''. Default value is ''0.01''.
+  * ''learningRateDecay'': The learning rate decay. If non-zero, the learning rate (note: the field learningRate will not change value) will be computed after each iteration (pass over the dataset) with: ''current_learning_rate =learningRate / (1 + iteration * learningRateDecay)''
+  * ''maxIteration'': The maximum number of iteration (passes over the dataset). Default is ''25''.
+  * ''shuffleIndices'': Boolean which says if the examples will be randomly sampled or not. Default is ''true''. If ''false'', the examples will be taken in the order of the dataset.
+  * ''hookExample'': A possible hook function which will be called (if non-nil) during training after each example forwarded and backwarded through the network. The function takes ''(self, example)'' as parameters. Default is ''nil''.
+  * ''hookIteration'': A possible hook function which will be called (if non-nil) during training after a complete pass over the dataset. The function takes ''(self, iteration)'' as parameters. Default is ''nil''.
+
+=====  Example of training using StochasticGradient =====
+{{anchor:nn.DoItStochasticGradient}}
+
+We show an example here on a classical XOR problem.
+
+**Dataset**
+
+We first need to create a dataset, following the conventions described in
+[[#nn.StochasticGradientTrain|StochasticGradient]].
+<file lua>
+require "lab"
+dataset={};
+function dataset:size() return 100 end -- 100 examples
+for i=1,dataset:size() do 
+  local input = lab.randn(2);     -- normally distributed example in 2d
+  local output = torch.Tensor(1);
+  if input[1]*input[2]>0 then     -- calculate label for XOR function
+    output[1] = -1;
+  else
+    output[1] = 1
+  end
+  dataset[i] = {input, output}
+end
+</file>
+
+**Neural Network**
+
+We create a simple neural network with one hidden layer.
+<file lua>
+require "nn"
+mlp = nn.Sequential();  -- make a multi-layer perceptron
+inputs = 2; outputs = 1; HUs = 20; -- parameters
+mlp:add(nn.Linear(inputs, HUs))
+mlp:add(nn.Tanh())
+mlp:add(nn.Linear(HUs, outputs))
+</file>
+
+**Training**
+
+We choose the Mean Squared Error criterion and train the beast.
+<file lua>
+criterion = nn.MSECriterion()  
+trainer = nn.StochasticGradient(mlp, criterion)
+trainer.learningRate = 0.01
+trainer:train(dataset)
+</file>
+
+**Test the network**
+
+<file lua>
+x = torch.Tensor(2)
+x[1] =  0.5; x[2] =  0.5; print(mlp:forward(x))
+x[1] =  0.5; x[2] = -0.5; print(mlp:forward(x))
+x[1] = -0.5; x[2] =  0.5; print(mlp:forward(x))
+x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x))
+</file>
+
+You should see something like:
+<file lua>
+> x = torch.Tensor(2)
+> x[1] =  0.5; x[2] =  0.5; print(mlp:forward(x))
+
+-0.3490
+[torch.Tensor of dimension 1]
+
+> x[1] =  0.5; x[2] = -0.5; print(mlp:forward(x))
+
+ 1.0561
+[torch.Tensor of dimension 1]
+
+> x[1] = -0.5; x[2] =  0.5; print(mlp:forward(x))
+
+ 0.8640
+[torch.Tensor of dimension 1]
+
+> x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x))
+
+-0.2941
+[torch.Tensor of dimension 1]
+</file>
+
+=====  Example of manual training of a neural network =====
+{{anchor:nn.DoItYourself}}
+
+We show an example here on a classical XOR problem.
+
+**Neural Network**
+
+We create a simple neural network with one hidden layer.
+<file lua>
+require "nn"
+mlp = nn.Sequential();  -- make a multi-layer perceptron
+inputs = 2; outputs = 1; HUs = 20; -- parameters
+mlp:add(nn.Linear(inputs, HUs))
+mlp:add(nn.Tanh())
+mlp:add(nn.Linear(HUs, outputs))
+</file>
+
+**Loss function**
+
+We choose the Mean Squared Error criterion.
+<file lua>
+criterion = nn.MSECriterion()  
+</file>
+
+**Training**
+
+We create data //on the fly// and feed it to the neural network.
+
+<file lua>
+require "lab"
+for i = 1,2500 do
+  -- random sample
+  local input= lab.randn(2);     -- normally distributed example in 2d
+  local output= torch.Tensor(1);
+  if input[1]*input[2] > 0 then  -- calculate label for XOR function
+    output[1] = -1
+  else
+    output[1] = 1
+  end
+
+  -- feed it to the neural network and the criterion
+  criterion:forward(mlp:forward(input), output)
+
+  -- train over this example in 3 steps
+  -- (1) zero the accumulation of the gradients
+  mlp:zeroGradParameters()
+  -- (2) accumulate gradients
+  mlp:backward(input, criterion:backward(mlp.output, output))
+  -- (3) update parameters with a 0.01 learning rate
+  mlp:updateParameters(0.01)
+end
+</file>
+
+**Test the network**
+
+<file lua>
+x = torch.Tensor(2)
+x[1] =  0.5; x[2] =  0.5; print(mlp:forward(x))
+x[1] =  0.5; x[2] = -0.5; print(mlp:forward(x))
+x[1] = -0.5; x[2] =  0.5; print(mlp:forward(x))
+x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x))
+</file>
+
+You should see something like:
+<file lua>
+> x = torch.Tensor(2)
+> x[1] =  0.5; x[2] =  0.5; print(mlp:forward(x))
+
+-0.6140
+[torch.Tensor of dimension 1]
+
+> x[1] =  0.5; x[2] = -0.5; print(mlp:forward(x))
+
+ 0.8878
+[torch.Tensor of dimension 1]
+
+> x[1] = -0.5; x[2] =  0.5; print(mlp:forward(x))
+
+ 0.8548
+[torch.Tensor of dimension 1]
+
+> x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x))
+
+-0.5498
+[torch.Tensor of dimension 1]
+</file>
diff --git a/dok/lena.jpg b/dok/lena.jpg
new file mode 100644
index 0000000..d4a8c36
Binary files /dev/null and b/dok/lena.jpg differ
diff --git a/dok/lenap.jpg b/dok/lenap.jpg
new file mode 100644
index 0000000..0e6916d
Binary files /dev/null and b/dok/lenap.jpg differ
diff --git a/dok/logsigmoid.png b/dok/logsigmoid.png
new file mode 100644
index 0000000..f632ed8
Binary files /dev/null and b/dok/logsigmoid.png differ
diff --git a/dok/logsoftmax.png b/dok/logsoftmax.png
new file mode 100644
index 0000000..dec5be5
Binary files /dev/null and b/dok/logsoftmax.png differ
diff --git a/dok/power.png b/dok/power.png
new file mode 100644
index 0000000..958eeb4
Binary files /dev/null and b/dok/power.png differ
diff --git a/dok/sigmmoid.png b/dok/sigmmoid.png
new file mode 100644
index 0000000..48aad7e
Binary files /dev/null and b/dok/sigmmoid.png differ
diff --git a/dok/sigmoid.png b/dok/sigmoid.png
new file mode 100644
index 0000000..48aad7e
Binary files /dev/null and b/dok/sigmoid.png differ
diff --git a/dok/softmax.png b/dok/softmax.png
new file mode 100644
index 0000000..29c5534
Binary files /dev/null and b/dok/softmax.png differ
diff --git a/dok/softmin.png b/dok/softmin.png
new file mode 100644
index 0000000..d1807a4
Binary files /dev/null and b/dok/softmin.png differ
diff --git a/dok/softplus.png b/dok/softplus.png
new file mode 100644
index 0000000..a5ee028
Binary files /dev/null and b/dok/softplus.png differ
diff --git a/dok/softsign.png b/dok/softsign.png
new file mode 100644
index 0000000..0805433
Binary files /dev/null and b/dok/softsign.png differ
diff --git a/dok/sqrt.png b/dok/sqrt.png
new file mode 100644
index 0000000..29b1d42
Binary files /dev/null and b/dok/sqrt.png differ
diff --git a/dok/square.png b/dok/square.png
new file mode 100644
index 0000000..c191eaf
Binary files /dev/null and b/dok/square.png differ
diff --git a/dok/sshrink.png b/dok/sshrink.png
new file mode 100644
index 0000000..99c5d11
Binary files /dev/null and b/dok/sshrink.png differ
diff --git a/dok/tanh.png b/dok/tanh.png
new file mode 100644
index 0000000..d2f77aa
Binary files /dev/null and b/dok/tanh.png differ
diff --git a/generic/Abs.c b/generic/Abs.c
new file mode 100644
index 0000000..8c65813
--- /dev/null
+++ b/generic/Abs.c
@@ -0,0 +1,43 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Abs.c"
+#else
+
+static int nn_(Abs_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input, \
+                   *output_data = fabs(*input_data);)
+  return 1;
+}
+
+static int nn_(Abs_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, input);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \
+                   real z = *input_data;                              \
+                   *gradInput_data = *gradOutput_data * (z >= 0 ? 1 : -1);)
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Abs__) [] = {
+  {"Abs_updateOutput", nn_(Abs_updateOutput)},
+  {"Abs_updateGradInput", nn_(Abs_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Abs_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Abs__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/AbsCriterion.c b/generic/AbsCriterion.c
new file mode 100644
index 0000000..b9b948d
--- /dev/null
+++ b/generic/AbsCriterion.c
@@ -0,0 +1,54 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/AbsCriterion.c"
+#else
+
+static int nn_(AbsCriterion_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  real sum;
+
+  sum = 0;
+  TH_TENSOR_APPLY2(real, input, real, target,
+                   sum += fabs(*input_data - *target_data);)
+
+  if(sizeAverage)
+    sum /= THTensor_(nElement)(input);
+
+  lua_pushnumber(L, sum);
+  lua_setfield(L, 1, "output");
+
+  lua_pushnumber(L, sum);
+  return 1;
+}
+
+static int nn_(AbsCriterion_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id));
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
+
+  THTensor_(resizeAs)(gradInput, input);
+  TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
+                   *gradInput_data = ( (*input_data - *target_data) >= 0 ? norm : -norm);)
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(AbsCriterion__) [] = {
+  {"AbsCriterion_updateOutput", nn_(AbsCriterion_updateOutput)},
+  {"AbsCriterion_updateGradInput", nn_(AbsCriterion_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(AbsCriterion_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(AbsCriterion__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Exp.c b/generic/Exp.c
new file mode 100644
index 0000000..b56f379
--- /dev/null
+++ b/generic/Exp.c
@@ -0,0 +1,43 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Exp.c"
+#else
+
+static int nn_(Exp_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input,         \
+                   *output_data = exp(*input_data);)
+    
+  return 1;
+}
+
+static int nn_(Exp_updateGradInput)(lua_State *L)
+{
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, output);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,     \
+                   *gradInput_data = *gradOutput_data * *output_data;);
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Exp__) [] = {
+  {"Exp_updateOutput", nn_(Exp_updateOutput)},
+  {"Exp_updateGradInput", nn_(Exp_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Exp_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Exp__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/HardShrink.c b/generic/HardShrink.c
new file mode 100644
index 0000000..be98ddc
--- /dev/null
+++ b/generic/HardShrink.c
@@ -0,0 +1,50 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/HardShrink.c"
+#else
+
+static int nn_(HardShrink_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  real lambda = luaT_getfieldchecknumber(L, 1, "lambda");
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+  
+  TH_TENSOR_APPLY2(real, output, real, input,				\
+                   if ((*input_data) > lambda) *output_data = *input_data; \
+                   else if ((*input_data) < -lambda) *output_data = *input_data; \
+                   else *output_data = 0;);
+  return 1;
+}
+
+static int nn_(HardShrink_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  real lambda = luaT_getfieldchecknumber(L, 1, "lambda");
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, input);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,	\
+                   if ((*input_data) > lambda || (*input_data) < -lambda) \
+		     *gradInput_data = (*gradOutput_data);		\
+		   else							\
+		     *gradInput_data = 0;				\
+    );
+  return 1;
+}
+
+static const struct luaL_Reg nn_(HardShrink__) [] = {
+  {"HardShrink_updateOutput", nn_(HardShrink_updateOutput)},
+  {"HardShrink_updateGradInput", nn_(HardShrink_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(HardShrink_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(HardShrink__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/HardTanh.c b/generic/HardTanh.c
new file mode 100644
index 0000000..3764095
--- /dev/null
+++ b/generic/HardTanh.c
@@ -0,0 +1,50 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/HardTanh.c"
+#else
+
+static int nn_(HardTanh_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input, \
+                   if(*input_data < -1)          \
+                     *output_data = -1;          \
+                   else if(*input_data <= 1)     \
+                     *output_data = *input_data;    \
+                   else                       \
+                     *output_data = 1;)
+  return 1;
+}
+
+static int nn_(HardTanh_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, input);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \
+                   if(*input_data < -1 || *input_data > 1)               \
+                     *gradInput_data = 0;                             \
+                   else                                            \
+                     *gradInput_data = *gradOutput_data;);
+  return 1;
+}
+
+static const struct luaL_Reg nn_(HardTanh__) [] = {
+  {"HardTanh_updateOutput", nn_(HardTanh_updateOutput)},
+  {"HardTanh_updateGradInput", nn_(HardTanh_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(HardTanh_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(HardTanh__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/LogSigmoid.c b/generic/LogSigmoid.c
new file mode 100644
index 0000000..b5bdae4
--- /dev/null
+++ b/generic/LogSigmoid.c
@@ -0,0 +1,49 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/LogSigmoid.c"
+#else
+
+static int nn_(LogSigmoid_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *buffer = luaT_getfieldcheckudata(L, 1, "buffer", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+  THTensor_(resizeAs)(buffer, input);
+
+  TH_TENSOR_APPLY3(real, output, real, input, real, buffer,    \
+                   real z = exp(-*input_data);                 \
+                   *buffer_data = z;                           \
+                   *output_data = -log(1. + z);)
+
+  return 1;
+}
+
+static int nn_(LogSigmoid_updateGradInput)(lua_State *L)
+{
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *buffer = luaT_getfieldcheckudata(L, 1, "buffer", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, buffer);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, buffer,    \
+                   real z = *buffer_data;                              \
+                   *gradInput_data = *gradOutput_data * z / (1. + z);)
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(LogSigmoid__) [] = {
+  {"LogSigmoid_updateOutput", nn_(LogSigmoid_updateOutput)},
+  {"LogSigmoid_updateGradInput", nn_(LogSigmoid_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(LogSigmoid_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(LogSigmoid__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/LogSoftMax.c b/generic/LogSoftMax.c
new file mode 100644
index 0000000..5d4dbfc
--- /dev/null
+++ b/generic/LogSoftMax.c
@@ -0,0 +1,111 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/LogSoftMax.c"
+#else
+
+static int nn_(LogSoftMax_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  real *input_data, *output_data;
+  long nframe = 0, dim = 0;
+  long t, d;
+
+  if(input->nDimension == 1)
+  {
+    nframe = 1;
+    dim = input->size[0];
+  }
+  else if(input->nDimension == 2)
+  {
+    nframe = input->size[0];
+    dim = input->size[1];
+  }
+  else
+    THArgCheck(0, 2, "vector or matrix expected");
+
+  input = THTensor_(newContiguous)(input);
+  THTensor_(resizeAs)(output, input);
+
+  input_data = THTensor_(data)(input);
+  output_data = THTensor_(data)(output);
+  for(t = 0; t < nframe; t++)
+  {
+    accreal logsum = 0;
+    real maxInput = -THInf;
+
+    for(d = 0; d < dim; d++)
+      maxInput = THMax(maxInput, input_data[d]);
+
+    for(d = 0; d < dim; d++)
+      logsum += THExpMinusApprox(maxInput-input_data[d]);
+    logsum = maxInput + log(logsum);
+
+    for(d = 0; d < dim; d++)
+      output_data[d] = input_data[d] - logsum;
+
+    input_data += dim;
+    output_data += dim;
+  }
+
+  THTensor_(free)(input);
+
+  return 1;
+}
+
+static int nn_(LogSoftMax_updateGradInput)(lua_State *L)
+{
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  real *gradInput_data, *gradOutput_data, *output_data;
+  long nframe = 0, dim = 0;
+  long t, d;
+
+  if(output->nDimension == 1)
+  {
+    nframe = 1;
+    dim = output->size[0];
+  }
+  else if(output->nDimension == 2)
+  {
+    nframe = output->size[0];
+    dim = output->size[1];
+  }
+  else
+    THError("vector or matrix expected");
+
+  THTensor_(resizeAs)(gradInput, output);
+  gradInput_data = THTensor_(data)(gradInput);
+  output_data = THTensor_(data)(output);
+  gradOutput_data = THTensor_(data)(gradOutput);
+  for(t = 0; t < nframe; t++)
+  {
+    accreal sum = 0;
+    for(d = 0; d < dim; d++)
+      sum += gradOutput_data[d];
+
+    for(d = 0; d < dim; d++)
+      gradInput_data[d] = gradOutput_data[d] - exp(output_data[d])*sum;
+
+    gradInput_data += dim;
+    output_data += dim;
+    gradOutput_data += dim;
+  }
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(LogSoftMax__) [] = {
+  {"LogSoftMax_updateOutput", nn_(LogSoftMax_updateOutput)},
+  {"LogSoftMax_updateGradInput", nn_(LogSoftMax_updateGradInput)},
+  {NULL, NULL}
+};
+
+void nn_(LogSoftMax_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(LogSoftMax__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/MSECriterion.c b/generic/MSECriterion.c
new file mode 100644
index 0000000..c53735c
--- /dev/null
+++ b/generic/MSECriterion.c
@@ -0,0 +1,54 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/MSECriterion.c"
+#else
+
+static int nn_(MSECriterion_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  real sum;
+
+  sum = 0;
+  TH_TENSOR_APPLY2(real, input, real, target,
+                   real z = (*input_data - *target_data);
+                   sum += z*z;)
+
+  if(sizeAverage)
+    sum /= THTensor_(nElement)(input);
+
+  lua_pushnumber(L, sum);
+  lua_setfield(L, 1, "output");
+
+  lua_pushnumber(L, sum);
+  return 1;
+}
+
+static int nn_(MSECriterion_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id));
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  real norm = (sizeAverage ? 2./((real)THTensor_(nElement)(input)) : 2.);
+
+  THTensor_(resizeAs)(gradInput, input);
+  TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
+                   *gradInput_data = norm * (*input_data - *target_data);)
+  return 1;
+}
+
+static const struct luaL_Reg nn_(MSECriterion__) [] = {
+  {"MSECriterion_updateOutput", nn_(MSECriterion_updateOutput)},
+  {"MSECriterion_updateGradInput", nn_(MSECriterion_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(MSECriterion_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(MSECriterion__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Max.c b/generic/Max.c
new file mode 100644
index 0000000..87f52f1
--- /dev/null
+++ b/generic/Max.c
@@ -0,0 +1,100 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Max.c"
+#else
+
+static int nn_(Max_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1;
+  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THLongStorage *dim;
+  long i;
+
+  luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range");
+
+  dim = THLongStorage_newWithSize(input->nDimension);
+  for(i = 0; i < input->nDimension; i++)
+    dim->data[i] = input->size[i];
+  dim->data[dimension] = 1;
+  THTensor_(resize)(output, dim, NULL);
+  THTensor_(resize)(indices, dim, NULL);
+  THLongStorage_free(dim);
+
+  TH_TENSOR_DIM_APPLY3(real, output, real, input, real, indices, dimension,
+                       long theIndex = 0;
+                       real theMax = input_data[0];
+                       for(i = 1; i < input_size; i++)
+                       {
+                         if(input_data[i*input_stride] > theMax)
+                         {
+                           theIndex = i;
+                           theMax = input_data[i*input_stride];
+                         }
+                       }
+                       *indices_data = theIndex+1;
+                       *output_data = theMax;)
+
+  THTensor_(select)(output, NULL, dimension, 0);
+
+  return 1;
+}
+
+static int nn_(Max_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+  int dimension  = luaT_getfieldcheckint(L, 1, "dimension")-1;
+  THTensor *gradInput  = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor *gradOutputPlusOneDim;
+  THLongStorage *dim, *str;
+  int i, j;
+
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+
+  dim = THLongStorage_newWithSize(gradOutput->nDimension+1);
+  str = THLongStorage_newWithSize(gradOutput->nDimension+1);
+  for(i = 0, j =  0; j < gradOutput->nDimension+1; j++)
+  {
+    if(j == dimension)
+    {
+      dim->data[j] = input->size[dimension];
+      str->data[j] = 0;
+      continue;
+    }
+
+    dim->data[j] = gradOutput->size[i];
+    str->data[j] = gradOutput->stride[i];
+    i++;
+  }
+
+  gradOutputPlusOneDim = THTensor_(newWithStorage)(gradOutput->storage, gradOutput->storageOffset, dim, str);
+  THLongStorage_free(dim);
+  THLongStorage_free(str);
+
+  TH_TENSOR_DIM_APPLY3(real, gradInput, real, gradOutputPlusOneDim, real, indices, dimension,
+                       gradInput_data[ ((long)(*indices_data)-1)*gradInput_stride ] = *gradOutputPlusOneDim_data;)
+
+  THTensor_(free)(gradOutputPlusOneDim);
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Max__) [] = {
+  {"Max_updateOutput", nn_(Max_updateOutput)},
+  {"Max_updateGradInput", nn_(Max_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Max_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Max__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Min.c b/generic/Min.c
new file mode 100644
index 0000000..d3309df
--- /dev/null
+++ b/generic/Min.c
@@ -0,0 +1,100 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Min.c"
+#else
+
+static int nn_(Min_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1;
+  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THLongStorage *dim;
+  long i;
+
+  luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range");
+
+  dim = THLongStorage_newWithSize(input->nDimension);
+  for(i = 0; i < input->nDimension; i++)
+    dim->data[i] = input->size[i];
+  dim->data[dimension] = 1;
+  THTensor_(resize)(output, dim, NULL);
+  THTensor_(resize)(indices, dim, NULL);
+  THLongStorage_free(dim);
+
+  TH_TENSOR_DIM_APPLY3(real, output, real, input, real, indices, dimension,
+                       long theIndex = 0;
+                       real theMin = input_data[0];
+                       for(i = 1; i < input_size; i++)
+                       {
+                         if(input_data[i*input_stride] < theMin)
+                         {
+                           theIndex = i;
+                           theMin = input_data[i*input_stride];
+                         }
+                       }
+                       *indices_data = theIndex+1;
+                       *output_data = theMin;)
+
+  THTensor_(select)(output, NULL, dimension, 0);
+
+  return 1;
+}
+
+static int nn_(Min_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+  int dimension  = luaT_getfieldcheckint(L, 1, "dimension")-1;
+  THTensor *gradInput  = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor *gradOutputPlusOneDim;
+  THLongStorage *dim, *str;
+  int i, j;
+
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+
+  dim = THLongStorage_newWithSize(gradOutput->nDimension+1);
+  str = THLongStorage_newWithSize(gradOutput->nDimension+1);
+  for(i = 0, j =  0; j < gradOutput->nDimension+1; j++)
+  {
+    if(j == dimension)
+    {
+      dim->data[j] = input->size[dimension];
+      str->data[j] = 0;
+      continue;
+    }
+
+    dim->data[j] = gradOutput->size[i];
+    str->data[j] = gradOutput->stride[i];
+    i++;
+  }
+
+  gradOutputPlusOneDim = THTensor_(newWithStorage)(gradOutput->storage, gradOutput->storageOffset, dim, str);
+  THLongStorage_free(dim);
+  THLongStorage_free(str);
+
+  TH_TENSOR_DIM_APPLY3(real, gradInput, real, gradOutputPlusOneDim, real, indices, dimension,
+                       gradInput_data[ ((long)(*indices_data)-1)*gradInput_stride ] = *gradOutputPlusOneDim_data;)
+
+  THTensor_(free)(gradOutputPlusOneDim);
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Min__) [] = {
+  {"Min_updateOutput", nn_(Min_updateOutput)},
+  {"Min_updateGradInput", nn_(Min_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Min_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Min__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/MultiLabelMarginCriterion.c b/generic/MultiLabelMarginCriterion.c
new file mode 100644
index 0000000..f4c3914
--- /dev/null
+++ b/generic/MultiLabelMarginCriterion.c
@@ -0,0 +1,185 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/MultiLabelMarginCriterion.c"
+#else
+
+static int nn_(MultiLabelMarginCriterion_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  real *input_data, *target_data;
+  long nframe, dim;
+  long t, d, dt, ddt;
+  THTensor *target;
+  real sum;
+
+  THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
+
+  if(input->nDimension == 1)
+  {
+    nframe = 1;
+    dim = input->size[0]; 
+    target = luaT_checkudata(L, 3, torch_(Tensor_id));
+    THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, "inconsistent target size");
+  }
+  else
+  {
+    nframe = input->size[0];
+    dim = input->size[1];
+    target = luaT_checkudata(L, 3, torch_(Tensor_id));
+    THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) && (target->size[1] == dim), 3, "inconsistent target size");
+  }
+
+  THArgCheck(THTensor_(minall)(target) >= 0, 3, "target out of range");
+  THArgCheck(THTensor_(maxall)(target) <= dim, 3, "target out of range");
+
+  target = THTensor_(newContiguous)(target);
+  input = THTensor_(newContiguous)(input);
+  input_data = THTensor_(data)(input);
+  target_data = THTensor_(data)(target);
+
+  sum = 0;
+  for(t = 0; t < nframe; t++)
+  {
+    for(dt = 0; dt < dim; dt++)
+    {
+      long target_idx = (long)target_data[dt]-1;
+      real input_target;
+      if(target_idx < 0)
+        break;
+      
+      input_target = input_data[target_idx];
+      for(d = 0; d < dim; d++)
+      {
+        int istarget = 0;
+        for(ddt = 0; ddt < dim; ddt++)
+        {
+          if(!target_data[ddt])
+            break;
+          if(((long)target_data[ddt])-1 == d)
+            istarget = 1;
+        }
+        
+        if(!istarget)
+        {
+          real z = 1 - input_target + input_data[d];
+          if(z > 0)
+            sum += z;
+        }
+      }
+    }
+    input_data += dim;
+    target_data += dim;
+  }
+
+  if(sizeAverage)
+    sum /= dim;
+
+  lua_pushnumber(L, sum);
+  lua_setfield(L, 1, "output");
+
+  THTensor_(free)(input);
+  THTensor_(free)(target);
+  lua_pushnumber(L, sum);
+  return 1;
+}
+
+static int nn_(MultiLabelMarginCriterion_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  real *input_data;
+  real *gradInput_data;
+  real *target_data;
+  long nframe, dim;
+  long t, d, dt, ddt;
+  THTensor *target;
+  real g;
+
+  THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
+
+  if(input->nDimension == 1)
+  {
+    nframe = 1;
+    dim = input->size[0]; 
+    target = luaT_checkudata(L, 3, torch_(Tensor_id));
+    THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, "inconsistent target size");
+  }
+  else
+  {
+    nframe = input->size[0];
+    dim = input->size[1];
+    target = luaT_checkudata(L, 3, torch_(Tensor_id));
+    THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) && (target->size[1] == dim), 3, "inconsistent target size");
+  }
+
+  THArgCheck(THTensor_(minall)(target) >= 0, 3, "target out of range");
+  THArgCheck(THTensor_(maxall)(target) <= dim, 3, "target out of range");
+
+  target = THTensor_(newContiguous)(target);
+  input = THTensor_(newContiguous)(input);
+  input_data = THTensor_(data)(input);
+  target_data = THTensor_(data)(target);
+
+  g = (sizeAverage ? 1./((real)dim) : 1.);
+
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+  gradInput_data = THTensor_(data)(gradInput);
+
+  for(t = 0; t < nframe; t++)
+  {
+    for(dt = 0; dt < dim; dt++)
+    {
+      long target_idx = (long)target_data[dt]-1;
+      real input_target;
+      if(target_idx < 0)
+        break;
+      
+      input_target = input_data[target_idx];
+      for(d = 0; d < dim; d++)
+      {
+        int istarget = 0;
+        for(ddt = 0; ddt < dim; ddt++)
+        {
+          if(!target_data[ddt])
+            break;
+          if(((long)target_data[ddt])-1 == d)
+            istarget = 1;
+        }
+        
+        if(!istarget)
+        {
+          real z = 1 - input_target + input_data[d];
+          if(z > 0)
+          {
+            gradInput_data[target_idx] -= g;
+            gradInput_data[d] += g;
+          }
+        }
+      }
+    }
+    input_data += dim;
+    target_data += dim;
+    gradInput_data += dim;
+  }
+
+  THTensor_(free)(input);  
+  THTensor_(free)(target);
+  return 1;
+}
+
+static const struct luaL_Reg nn_(MultiLabelMarginCriterion__) [] = {
+  {"MultiLabelMarginCriterion_updateOutput", nn_(MultiLabelMarginCriterion_updateOutput)},
+  {"MultiLabelMarginCriterion_updateGradInput", nn_(MultiLabelMarginCriterion_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(MultiLabelMarginCriterion_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(MultiLabelMarginCriterion__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/MultiMarginCriterion.c b/generic/MultiMarginCriterion.c
new file mode 100644
index 0000000..ca73bc9
--- /dev/null
+++ b/generic/MultiMarginCriterion.c
@@ -0,0 +1,162 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/MultiMarginCriterion.c"
+#else
+
+static int nn_(MultiMarginCriterion_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  real *input_data, *target_data;
+  long nframe, dim;
+  long t, d;
+  real target_;
+  THTensor *target;
+  real sum;
+
+  THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
+
+  if(input->nDimension == 1)
+  {
+    nframe = 1;
+    dim = input->size[0]; 
+    target_ = luaL_checknumber(L, 3);
+    target = THTensor_(newWithSize1d)(1);
+    THTensor_(fill)(target, target_);
+  }
+  else
+  {
+    nframe = input->size[0];
+    dim = input->size[1];
+    target = luaT_checkudata(L, 3, torch_(Tensor_id));
+    THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, "inconsistent target size");
+    target = THTensor_(newContiguous)(target);
+  }
+
+  for(t = 0; t < nframe; t++)
+  {
+    real idx = THTensor_(get1d)(target, t);
+    THArgCheck((idx >= 1) && (idx <= dim), 3, "target out of range");
+  }
+
+  input = THTensor_(newContiguous)(input);
+  input_data = THTensor_(data)(input);
+  target_data = THTensor_(data)(target);
+
+  sum = 0;
+  for(t = 0; t < nframe; t++)
+  {
+    long target_idx = (long)(target_data[t]-1);
+    real input_target = input_data[target_idx];
+    for(d = 0; d < dim; d++)
+    {
+      real z = 1 - input_target + input_data[d];
+      if(d == target_idx)
+        continue;
+    
+      if(z > 0)
+        sum += z;
+    }
+    input_data += dim;
+  }
+
+  if(sizeAverage)
+    sum /= dim;
+
+  lua_pushnumber(L, sum);
+  lua_setfield(L, 1, "output");
+
+  THTensor_(free)(input);
+  THTensor_(free)(target);
+  lua_pushnumber(L, sum);
+  return 1;
+}
+
+static int nn_(MultiMarginCriterion_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  real *input_data;
+  real *gradInput_data;
+  real *target_data;
+  THTensor *target;
+  long nframe, dim;
+  long t, d;
+  real target_;
+  real g;
+  real sum;
+
+  THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
+
+  if(input->nDimension == 1)
+  {
+    nframe = 1;
+    dim = input->size[0]; 
+    target_ = luaL_checknumber(L, 3);
+    target = THTensor_(newWithSize1d)(1);
+    THTensor_(fill)(target, target_);
+  }
+  else
+  {
+    nframe = input->size[0];
+    dim = input->size[1];
+    target = luaT_checkudata(L, 3, torch_(Tensor_id));
+    THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, "inconsistent target size");
+    target = THTensor_(newContiguous)(target);
+  }
+
+  g = (sizeAverage ? 1./((real)dim) : 1.);
+
+  input = THTensor_(newContiguous)(input);
+  input_data = THTensor_(data)(input);
+
+  THTensor_(resizeAs)(gradInput, input);
+  gradInput_data = THTensor_(data)(gradInput);
+
+  target_data = THTensor_(data)(target);
+    
+  for(t = 0; t < nframe; t++)
+  {
+    long target_idx = (long)(target_data[t])-1;
+    real input_target = input_data[target_idx];
+    real gradInput_target = 0;
+    for(d = 0; d < dim; d++)
+    {
+      real z = 1 - input_target + input_data[d];
+      if(d == target_idx)
+        continue;
+    
+      if(z > 0)
+      {
+        gradInput_target -= g;
+        gradInput_data[d] = g;
+      }
+      else
+        gradInput_data[d] = 0;
+    }
+    gradInput_data[target_idx] = gradInput_target;
+    
+    input_data += dim;
+    gradInput_data += dim;
+  }
+
+
+  THTensor_(free)(input);  
+  THTensor_(free)(target);
+  return 1;
+}
+
+static const struct luaL_Reg nn_(MultiMarginCriterion__) [] = {
+  {"MultiMarginCriterion_updateOutput", nn_(MultiMarginCriterion_updateOutput)},
+  {"MultiMarginCriterion_updateGradInput", nn_(MultiMarginCriterion_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(MultiMarginCriterion_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(MultiMarginCriterion__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Sigmoid.c b/generic/Sigmoid.c
new file mode 100644
index 0000000..20348b9
--- /dev/null
+++ b/generic/Sigmoid.c
@@ -0,0 +1,44 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Sigmoid.c"
+#else
+
+static int nn_(Sigmoid_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input, \
+                   *output_data = 1./(1.+ exp(- *input_data));)
+
+  return 1;
+}
+
+static int nn_(Sigmoid_updateGradInput)(lua_State *L)
+{
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, output);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \
+                   real z = *output_data; \
+                   *gradInput_data = *gradOutput_data * (1. - z) * z;)
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Sigmoid__) [] = {
+  {"Sigmoid_updateOutput", nn_(Sigmoid_updateOutput)},
+  {"Sigmoid_updateGradInput", nn_(Sigmoid_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Sigmoid_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Sigmoid__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SoftMax.c b/generic/SoftMax.c
new file mode 100644
index 0000000..3aaae65
--- /dev/null
+++ b/generic/SoftMax.c
@@ -0,0 +1,114 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SoftMax.c"
+#else
+
+static int nn_(SoftMax_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  real *input_data, *output_data;
+  long nframe = 0, dim = 0;
+  long t, d;
+
+  if(input->nDimension == 1)
+  {
+    nframe = 1;
+    dim = input->size[0];
+  }
+  else if(input->nDimension == 2)
+  {
+    nframe = input->size[0];
+    dim = input->size[1];
+  }
+  else
+    THArgCheck(0, 2, "vector or matrix expected");
+
+  input = THTensor_(newContiguous)(input);
+  THTensor_(resizeAs)(output, input);
+
+  input_data = THTensor_(data)(input);
+  output_data = THTensor_(data)(output);
+  for(t = 0; t < nframe; t++)
+  {
+    real inputMax = -THInf;
+    for(d = 0; d < dim; d++) {
+      if (input_data[d] >= inputMax) inputMax = input_data[d];
+    }
+
+    accreal sum = 0;
+    for(d = 0; d < dim; d++) {
+      real z = THExpMinusApprox(inputMax - input_data[d]);
+      output_data[d] = z;
+      sum += z;
+    }
+
+    for(d = 0; d < dim; d++) {
+      output_data[d] *= 1/sum;
+    }
+
+    input_data += dim;
+    output_data += dim;
+  }
+
+  THTensor_(free)(input);
+
+  return 1;
+}
+
+static int nn_(SoftMax_updateGradInput)(lua_State *L)
+{
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  real *gradInput_data, *gradOutput_data, *output_data;
+  long nframe = 0, dim = 0;
+  long t, d;
+
+  if(output->nDimension == 1)
+  {
+    nframe = 1;
+    dim = output->size[0];
+  }
+  else if(output->nDimension == 2)
+  {
+    nframe = output->size[0];
+    dim = output->size[1];
+  }
+  else
+    THError("vector or matrix expected");
+
+  THTensor_(resizeAs)(gradInput, output);
+  gradInput_data = THTensor_(data)(gradInput);
+  output_data = THTensor_(data)(output);
+  gradOutput_data = THTensor_(data)(gradOutput);
+  for(t = 0; t < nframe; t++)
+  {
+    accreal sum = 0;
+    for(d = 0; d < dim; d++)
+      sum += (accreal)gradOutput_data[d] * output_data[d];
+
+    for(d = 0; d < dim; d++)
+      gradInput_data[d] = output_data[d] * (gradOutput_data[d] - sum);
+
+    gradInput_data += dim;
+    output_data += dim;
+    gradOutput_data += dim;
+  }
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(SoftMax__) [] = {
+  {"SoftMax_updateOutput", nn_(SoftMax_updateOutput)},
+  {"SoftMax_updateGradInput", nn_(SoftMax_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(SoftMax_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SoftMax__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SoftPlus.c b/generic/SoftPlus.c
new file mode 100644
index 0000000..7a097fb
--- /dev/null
+++ b/generic/SoftPlus.c
@@ -0,0 +1,44 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SoftPlus.c"
+#else
+
+static int nn_(SoftPlus_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input,               \
+                   *output_data = log1p(exp(*input_data));)
+    
+    return 1;
+}
+
+static int nn_(SoftPlus_updateGradInput)(lua_State *L)
+{
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, output);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,    \
+                   real z = exp(*output_data);                         \
+                   *gradInput_data = *gradOutput_data * (z - 1.)/z;)
+    return 1;
+}
+
+static const struct luaL_Reg nn_(SoftPlus__) [] = {
+  {"SoftPlus_updateOutput", nn_(SoftPlus_updateOutput)},
+  {"SoftPlus_updateGradInput", nn_(SoftPlus_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(SoftPlus_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SoftPlus__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SoftShrink.c b/generic/SoftShrink.c
new file mode 100644
index 0000000..0bc4075
--- /dev/null
+++ b/generic/SoftShrink.c
@@ -0,0 +1,50 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SoftShrink.c"
+#else
+
+static int nn_(SoftShrink_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  real lambda = luaT_getfieldchecknumber(L, 1, "lambda");
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+  
+  TH_TENSOR_APPLY2(real, output, real, input,				\
+                   if ((*input_data) > lambda) *output_data = *input_data - lambda; \
+                   else if ((*input_data) < -lambda) *output_data = *input_data + lambda; \
+                   else *output_data = 0;);
+  return 1;
+}
+
+static int nn_(SoftShrink_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  real lambda = luaT_getfieldchecknumber(L, 1, "lambda");
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, input);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,	\
+                   if ((*input_data) > lambda || (*input_data) < -lambda) \
+		     *gradInput_data = (*gradOutput_data);		\
+		   else							\
+		     *gradInput_data = 0;				\
+    );
+  return 1;
+}
+
+static const struct luaL_Reg nn_(SoftShrink__) [] = {
+  {"SoftShrink_updateOutput", nn_(SoftShrink_updateOutput)},
+  {"SoftShrink_updateGradInput", nn_(SoftShrink_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(SoftShrink_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SoftShrink__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SparseLinear.c b/generic/SparseLinear.c
new file mode 100644
index 0000000..d29a1aa
--- /dev/null
+++ b/generic/SparseLinear.c
@@ -0,0 +1,130 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SparseLinear.c"
+#else
+
+static int nn_(SparseLinear_updateOutput)(lua_State *L)
+{
+  long i;
+  THTensor * input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  long dim = weight->size[0]; /* number of weights.. */
+
+  THTensor_(copy)(output, bias);
+  for(i = 0; i < input->size[1]; i++)
+  {
+    long offset = (long)(THTensor_(get2d)(input, 0, i))-1;
+    
+    if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */
+    {
+      real val = THTensor_(get2d)(input, 1, i);
+      THBlas_(axpy)(output->size[0], 
+                    val, 
+                    THTensor_(data)(weight)+offset*weight->stride[0],
+                    weight->stride[1], 
+                    THTensor_(data)(output), 
+                    output->stride[0]);
+    }
+    else
+      luaL_error(L, "index out of bound");
+  }
+  return 1;
+}
+
+static int nn_(SparseLinear_accGradParameters)(lua_State *L)
+{
+  long i;
+  THTensor * input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor * gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  real scale = luaL_optnumber(L, 4, 1);
+  THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor * gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+  THTensor * gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor * lastInput = luaT_getfieldcheckudata(L, 1, "lastInput", torch_(Tensor_id));
+  real weightDecay = luaT_getfieldchecknumber(L, 1, "weightDecay");
+  long dim = gradWeight->size[0]; /* number of weights.. */
+
+  for(i = 0; i < input->size[1]; i++)
+  {
+    long offset = (long)(THTensor_(get2d)(input, 0, i))-1;
+
+    if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */
+    {
+      real val = scale*THTensor_(get2d)(input, 1, i);
+      THBlas_(scal)(gradOutput->size[0],
+                    0, 
+                    THTensor_(data)(gradWeight)+offset*gradWeight->stride[0],
+                    gradWeight->stride[1]); /* zero */
+
+      THBlas_(axpy)(gradOutput->size[0], 
+                    val, 
+                    THTensor_(data)(gradOutput), 
+                    gradOutput->stride[0], 
+                    THTensor_(data)(gradWeight)+offset*gradWeight->stride[0], 
+                    gradWeight->stride[1]);
+    }
+    else
+      luaL_error(L, "index out of bound");
+  }
+  
+  THTensor_(cadd)(gradBias, gradBias, 1, gradOutput); 
+  
+  if(weightDecay != 0)
+    THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight);
+  
+  THTensor_(resizeAs)(lastInput, input);
+  THTensor_(copy)(lastInput, input);
+  
+  return 0;
+}
+
+int nn_(SparseLinear_updateParameters)(lua_State *L)
+{
+  long i;
+  real learningRate = luaL_checknumber(L, 2);
+  THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor * gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+  THTensor * gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor * lastInput = luaT_getfieldcheckudata(L, 1, "lastInput", torch_(Tensor_id));
+  real weightDecay = luaT_getfieldchecknumber(L, 1, "weightDecay");
+  
+  long dim = weight->size[0]; /* number of weights.. */
+  THTensor_(cadd)(bias, bias, -learningRate, gradBias);
+  
+  for(i = 0; i < lastInput->size[1]; i++) 
+  {
+    long offset = (long)(THTensor_(get2d)(lastInput, 0, i))-1;
+    
+    if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */
+    {
+      THBlas_(axpy)(bias->size[0], 
+                    -learningRate, 
+                    THTensor_(data)(gradWeight)+offset*gradWeight->stride[0], 
+                    gradWeight->stride[1], 
+                    THTensor_(data)(weight)+offset*weight->stride[0], 
+                    weight->stride[1]);
+    }
+    else
+      luaL_error(L, "index out of bound");
+  }
+  return 0;
+}
+
+static const struct luaL_Reg nn_(SparseLinear__) [] = {
+  {"SparseLinear_updateOutput", nn_(SparseLinear_updateOutput)},
+  {"SparseLinear_updateParameters", nn_(SparseLinear_updateParameters)},
+  {NULL, NULL}
+};
+
+void nn_(SparseLinear_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SparseLinear__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SpatialConvolution.c b/generic/SpatialConvolution.c
new file mode 100644
index 0000000..de0de1d
--- /dev/null
+++ b/generic/SpatialConvolution.c
@@ -0,0 +1,201 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialConvolution.c"
+#else
+
+static void nn_(convolution_updateOutput_)(THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, int dH, int dW)
+{
+  /* add bias */
+  long i;
+  THTensor *outn = THTensor_(new)();
+  for (i=0; i<bias->size[0]; i++) {
+    THTensor_(select)(outn,output,0,i);
+    THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
+  }
+  THTensor_(free)(outn);
+
+  /* do convolutions */
+  THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "V","X");
+}
+
+static int nn_(SpatialConvolution_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
+
+  int dimw = 2;
+  int dimh = 1;
+  if (input->nDimension == 4) {
+    dimw++;
+    dimh++;
+  }
+
+  long nOutputPlane = weight->size[0];
+  long nInputPlane  = weight->size[1];
+  long kW           = weight->size[3];
+  long kH           = weight->size[2];
+  long inputWidth   = input->size[dimw];
+  long inputHeight  = input->size[dimh];
+  long outputWidth  = (inputWidth - kW) / dW + 1;
+  long outputHeight = (inputHeight - kH) / dH + 1;
+
+  if (input->nDimension == 3)
+  {
+    THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
+/*     printf("\n*************\nstochastic\n"); */
+/*     printf("no=%d\n",output->nDimension); */
+/*     printf("no=%ld,%ld,%ld\n",nOutputPlane,outputHeight,outputWidth); */
+/*     printf("ni=%d\n",input->nDimension); */
+    nn_(convolution_updateOutput_)(input,output,weight,bias,dH,dW);
+/*    printf("stochastic\n");*/
+  }
+  else
+  {
+    THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth);
+    THTensor *outn = THTensor_(new)();
+    THTensor *inpn = THTensor_(new)();
+    long i;
+    for (i=0; i<input->size[0]; i++)
+    {
+      THTensor_(select)(outn,output,0,i);
+      THTensor_(select)(inpn,input,0,i);
+      nn_(convolution_updateOutput_)(inpn,outn,weight,bias,dH,dW);
+    }
+    THTensor_(free)(outn);
+    THTensor_(free)(inpn);
+  }
+
+/*   /\* add bias *\/ */
+/*   long i; */
+/*   THTensor *outn = THTensor_(new)(); */
+/*   for (i=0; i<bias->size[0]; i++) { */
+/*     THTensor_(select)(outn,output,0,i); */
+/*     THTensor_(fill)(outn, THTensor_(get1d)(bias, i)); */
+/*   } */
+/*   THTensor_(free)(outn); */
+
+/*   /\* do convolutions *\/ */
+/*   THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "vx"); */
+
+  return 1;
+}
+
+
+static int nn_(SpatialConvolution_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );
+
+  long k;
+
+  /* gradient to input */
+  THTensor *tweight = THTensor_(newTranspose)(weight,0,1);
+
+  if(input->nDimension == 3)
+  {
+    THTensor_(conv2Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dH, dW, "F", "C");
+  }
+  else
+  {
+
+    THTensor_(resizeAs)(gradInput,input);
+    THTensor *outn = THTensor_(new)();
+    THTensor *inpn = THTensor_(new)();
+    long i;
+    for (i=0; i<input->size[0]; i++)
+    {
+      THTensor_(select)(outn,gradOutput,0,i);
+      THTensor_(select)(inpn,gradInput,0,i);
+      THTensor_(conv2Dmv)(inpn, 0.0, 1.0, outn, tweight, dH, dW, "F", "C");
+    }
+    THTensor_(free)(outn);
+    THTensor_(free)(inpn);
+  }
+  THTensor_(free)(tweight);
+
+  return 1;
+}
+
+static void nn_(convolution_accGradParameters_)(THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, real scale, int dH, int dW)
+{
+  long k;
+
+  /* gradient to bias */
+  real *gradBias_data = THTensor_(data)(gradBias);
+  THTensor* gradOutSlice = THTensor_(new)();
+  for(k = 0; k < gradOutput->size[0]; k++)
+  {
+    THTensor_(select)(gradOutSlice, gradOutput, 0, k);
+    gradBias_data[k] += scale*THTensor_(sumall)(gradOutSlice);
+  }
+  THTensor_(free)(gradOutSlice);
+
+  /* gradient to kernels */
+  THTensor_(conv2DRevger)(gradWeight, 1.0, scale, input, gradOutput, dH, dW);
+}
+
+static int nn_(SpatialConvolution_accGradParameters)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  real scale = luaL_optnumber(L, 4, 1);
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+  
+  THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );
+
+  if(input->nDimension == 3)
+  {
+    nn_(convolution_accGradParameters_)(input,gradOutput,gradWeight,gradBias,scale,dH,dW);
+  }
+  else
+  {
+    THTensor *outn = THTensor_(new)();
+    THTensor *inpn = THTensor_(new)();
+    long i;
+    for (i=0; i<input->size[0]; i++)
+    {
+      THTensor_(select)(outn,gradOutput,0,i);
+      THTensor_(select)(inpn,input,0,i);
+      nn_(convolution_accGradParameters_)(inpn,outn,gradWeight,gradBias,scale,dH,dW);
+    }
+    THTensor_(free)(outn);
+    THTensor_(free)(inpn);
+  }
+
+  return 0;
+}
+
+static const struct luaL_Reg nn_(SpatialConvolution__) [] = {
+  {"SpatialConvolution_updateOutput", nn_(SpatialConvolution_updateOutput)},
+  {"SpatialConvolution_updateGradInput", nn_(SpatialConvolution_updateGradInput)},
+  {"SpatialConvolution_accGradParameters", nn_(SpatialConvolution_accGradParameters)},
+  {NULL, NULL}
+};
+
+static void nn_(SpatialConvolution_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SpatialConvolution__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SpatialConvolutionMap.c b/generic/SpatialConvolutionMap.c
new file mode 100644
index 0000000..ff7d8ca
--- /dev/null
+++ b/generic/SpatialConvolutionMap.c
@@ -0,0 +1,229 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialConvolutionMap.c"
+#else
+
+static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+  THTensor *connTable = luaT_getfieldcheckudata(L, 1, "connTable", torch_(Tensor_id));
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected");
+  luaL_argcheck(L, input->size[0] == nInputPlane, 2, "invalid number of input planes");
+  luaL_argcheck(L, input->size[2] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size");
+
+  THTensor_(resize3d)(output, nOutputPlane,
+                      (input->size[1] - kH) / dH + 1, 
+                      (input->size[2] - kW) / dW + 1);
+
+  // contiguous
+  input = THTensor_(newContiguous)(input);
+  output = THTensor_(newContiguous)(output);
+
+  // get raw pointers
+  real *input_data = THTensor_(data)(input);
+  real *output_data = THTensor_(data)(output);
+  real *weight_data = THTensor_(data)(weight);
+
+  // and dims
+  long input_n = input->size[0];
+  long input_h = input->size[1];
+  long input_w = input->size[2];
+  long output_n = output->size[0];
+  long output_h = output->size[1];
+  long output_w = output->size[2];
+  long weight_n = weight->size[0];
+  long weight_h = weight->size[1];
+  long weight_w = weight->size[2];
+
+  // add bias
+  THTensor *outputPlane = THTensor_(new)();
+  int k;
+  for (k = 0; k < nOutputPlane; k++) {
+    THTensor_(select)(outputPlane,output,0,k);
+    THTensor_(fill)(outputPlane, THTensor_(get1d)(bias, k));
+  }
+  THTensor_(free)(outputPlane);
+
+  // convolve all maps
+  int i,o;
+  int nweight = connTable->size[0];
+  for (k = 0; k < nweight; k++) {
+    // get offsets for input/output
+    o = (int)THTensor_(get2d)(connTable,k,1)-1;
+    i = (int)THTensor_(get2d)(connTable,k,0)-1;
+
+    // convolve each map
+    THTensor_(validXCorr2Dptr)(output_data + o*output_w*output_h,
+                            1.0,
+                            input_data + i*input_w*input_h, input_h, input_w,
+                            weight_data + k*weight_w*weight_h, weight_h, weight_w,
+                            dH, dW);
+  }
+
+  // clean up
+  THTensor_(free)(input);
+  THTensor_(free)(output);
+
+  return 1;
+}
+
+static int nn_(SpatialConvolutionMap_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+  THTensor *connTable = luaT_getfieldcheckudata(L, 1, "connTable", torch_(Tensor_id));
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  // contiguous
+  gradInput = THTensor_(newContiguous)(gradInput);
+  gradOutput = THTensor_(newContiguous)(gradOutput);
+
+  // Resize/Zero
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+
+  // get raw pointers
+  real *gradInput_data = THTensor_(data)(gradInput);
+  real *gradOutput_data = THTensor_(data)(gradOutput);
+  real *weight_data = THTensor_(data)(weight);
+  real *gradWeight_data = THTensor_(data)(gradWeight);
+
+  // and dims
+  long input_n = input->size[0];
+  long input_h = input->size[1];
+  long input_w = input->size[2];
+  long output_n = gradOutput->size[0];
+  long output_h = gradOutput->size[1];
+  long output_w = gradOutput->size[2];
+  long weight_n = weight->size[0];
+  long weight_h = weight->size[1];
+  long weight_w = weight->size[2];
+
+  // updateGradInput all
+  int k;
+  int nkernel = connTable->size[0];
+  for(k = 0; k < nkernel; k++)
+  {
+    int o = (int)THTensor_(get2d)(connTable,k,1)-1;
+    int i = (int)THTensor_(get2d)(connTable,k,0)-1;
+
+    // gradient to input
+    THTensor_(fullConv2Dptr)(gradInput_data + i*input_w*input_h,
+                          1.0,
+                          gradOutput_data + o*output_w*output_h,  output_h,  output_w,
+                          weight_data + k*weight_w*weight_h, weight_h, weight_w,
+                          dH, dW);
+  }
+
+  // clean up
+  THTensor_(free)(gradInput);
+  THTensor_(free)(gradOutput);
+  
+  return 1;
+}
+
+static int nn_(SpatialConvolutionMap_accGradParameters)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+  real scale = luaL_optnumber(L, 4, 1);
+
+  THTensor *connTable = luaT_getfieldcheckudata(L, 1, "connTable", torch_(Tensor_id));
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  // contiguous
+  input = THTensor_(newContiguous)(input);
+  gradOutput = THTensor_(newContiguous)(gradOutput);
+
+  // get raw pointers
+  real *input_data = THTensor_(data)(input);
+  real *gradOutput_data = THTensor_(data)(gradOutput);
+  real *weight_data = THTensor_(data)(weight);
+  real *gradWeight_data = THTensor_(data)(gradWeight);
+
+  // and dims
+  long input_n = input->size[0];
+  long input_h = input->size[1];
+  long input_w = input->size[2];
+  long output_n = gradOutput->size[0];
+  long output_h = gradOutput->size[1];
+  long output_w = gradOutput->size[2];
+  long weight_n = weight->size[0];
+  long weight_h = weight->size[1];
+  long weight_w = weight->size[2];
+
+  // gradients wrt bias
+  int k;
+  THTensor *gradOutputPlane = THTensor_(new)();
+  real *gradBias_data = THTensor_(data)(gradBias);
+  for(k = 0; k < nOutputPlane; k++) {
+    THTensor_(select)(gradOutputPlane, gradOutput, 0, k);
+    gradBias_data[k] += scale * THTensor_(sumall)(gradOutputPlane);
+  }
+  THTensor_(free)(gradOutputPlane);
+
+  // gradients wrt weight
+  int nkernel = connTable->size[0];
+  for(k = 0; k < nkernel; k++)
+  {
+    int o = (int)THTensor_(get2d)(connTable,k,1)-1;
+    int i = (int)THTensor_(get2d)(connTable,k,0)-1;
+
+    // gradient to kernel
+    THTensor_(validXCorr2DRevptr)(gradWeight_data + k*weight_w*weight_h,
+                               scale,
+                               input_data + i*input_w*input_h, input_h, input_w,
+                               gradOutput_data + o*output_w*output_h, output_h, output_w,
+                               dH, dW);
+  }
+
+  // clean up
+  THTensor_(free)(input);
+  THTensor_(free)(gradOutput);
+  return 0;
+}
+
+static const struct luaL_Reg nn_(SpatialConvolutionMap__) [] = {
+  {"SpatialConvolutionMap_updateOutput", nn_(SpatialConvolutionMap_updateOutput)},
+  {"SpatialConvolutionMap_updateGradInput", nn_(SpatialConvolutionMap_updateGradInput)},
+  {"SpatialConvolutionMap_accGradParameters", nn_(SpatialConvolutionMap_accGradParameters)},
+  {NULL, NULL}
+};
+
+static void nn_(SpatialConvolutionMap_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SpatialConvolutionMap__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SpatialMaxPooling.c b/generic/SpatialMaxPooling.c
new file mode 100644
index 0000000..b9fab3b
--- /dev/null
+++ b/generic/SpatialMaxPooling.c
@@ -0,0 +1,163 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialMaxPooling.c"
+#else
+
+static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected");
+  luaL_argcheck(L, input->size[2] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size");
+
+  // sizes
+  long nslices = input->size[0];
+  long iheight = input->size[1];
+  long iwidth = input->size[2];
+  long oheight = (iheight - kH) / dH + 1;
+  long owidth = (iwidth - kW) / dW + 1;
+
+  // get contiguous input
+  input = THTensor_(newContiguous)(input);
+
+  // resize output
+  THTensor_(resize3d)(output, nslices, oheight, owidth);
+
+  // indices will contain i,j locatyions for each output point
+  THTensor_(resize4d)(indices, 2, nslices, oheight, owidth);
+
+  // get raw pointers
+  real *input_data = THTensor_(data)(input);
+  real *output_data = THTensor_(data)(output);
+  real *indices_data = THTensor_(data)(indices);
+
+  // compute max pooling for each input slice
+  long k;
+  for (k = 0; k < nslices; k++) {
+    // pointers to slices
+    real *input_p = input_data + k*iwidth*iheight;
+    real *output_p = output_data + k*owidth*oheight;
+    real *indy_p = indices_data + k*owidth*oheight;
+    real *indx_p = indices_data + (k+nslices)*owidth*oheight;
+
+    // loop over output
+    int i,j;
+    for(i = 0; i < oheight; i++) {
+      for(j = 0; j < owidth; j++) {
+        // local pointers
+        real *ip = input_p + i*iwidth*dH + j*dW;
+        real *op = output_p + i*owidth + j;
+        real *indyp = indy_p + i*owidth + j;
+        real *indxp = indx_p + i*owidth + j;
+
+        // compute local max:
+	long maxindex = -1;
+	real maxval = -THInf;
+	long tcntr = 0;
+        int x,y;
+        for(y = 0; y < kH; y++) {
+          for(x = 0; x < kW; x++) {
+            real val = *(ip + y*iwidth + x);
+            if (val > maxval) {
+              maxval = val;
+              maxindex = tcntr;
+            }
+            tcntr++;
+          }
+        }
+
+        // set output to local max
+        *op = maxval;
+
+        // store location of max (x,y)
+        *indyp = (int)(maxindex / dW)+1;
+        *indxp = (maxindex % dW) +1;
+      }
+    }
+  }
+
+  // cleanup
+  THTensor_(free)(input);
+
+  return 1;
+}
+
+static int nn_(SpatialMaxPooling_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  // get contiguous gradOutput
+  gradOutput = THTensor_(newContiguous)(gradOutput);
+
+  // resize
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+
+  // sizes
+  int ichannels = input->size[0];
+  int iheight = input->size[1];
+  int iwidth = input->size[2];
+  int ochannels = ichannels;
+  int oheight = gradOutput->size[1];
+  int owidth = gradOutput->size[2];
+
+  // get raw pointers
+  real *gradInput_data = THTensor_(data)(gradInput);
+  real *gradOutput_data = THTensor_(data)(gradOutput);
+  real *indices_data = THTensor_(data)(indices);
+
+  // backprop
+  long k;
+  for (k = 0; k < input->size[0]; k++) {
+    // pointers to slices
+    real *gradOutput_p = gradOutput_data + k*owidth*oheight;
+    real *gradInput_p = gradInput_data + k*iwidth*iheight;
+    real *indy_p = indices_data + k*owidth*oheight;
+    real *indx_p = indices_data + (k+ochannels)*owidth*oheight;
+
+    // calculate max points
+    int i,j;
+    for(i = 0; i < oheight; i++) {
+      for(j = 0; j < owidth; j++) {
+        // retrieve position of max
+ 	long maxi = *(indy_p + i*owidth + j) - 1 + i*dH;
+ 	long maxj = *(indx_p + i*owidth + j) - 1 + j*dW;
+
+        // update gradient
+        *(gradInput_p + maxi*iwidth + maxj) += *(gradOutput_p + i*owidth + j);
+      }
+    }
+  }
+
+  // cleanup
+  THTensor_(free)(gradOutput);
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(SpatialMaxPooling__) [] = {
+  {"SpatialMaxPooling_updateOutput", nn_(SpatialMaxPooling_updateOutput)},
+  {"SpatialMaxPooling_updateGradInput", nn_(SpatialMaxPooling_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(SpatialMaxPooling_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SpatialMaxPooling__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SpatialSubSampling.c b/generic/SpatialSubSampling.c
new file mode 100644
index 0000000..705253f
--- /dev/null
+++ b/generic/SpatialSubSampling.c
@@ -0,0 +1,278 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialSubSampling.c"
+#else
+
+static int nn_(SpatialSubSampling_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  real *weight_data = THTensor_(data)(weight);
+  real *bias_data = THTensor_(data)(bias);
+  real *output_data;
+  real *input_data;
+
+
+  luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
+
+  int dimw = 2;
+  int dimh = 1;
+  if (input->nDimension == 4) {
+    dimw++;
+    dimh++;
+  }
+
+  long inputWidth = input->size[dimw];
+  long inputHeight = input->size[dimh];
+  long outputWidth = (inputWidth - kW) / dW + 1;
+  long outputHeight = (inputHeight - kH) / dH + 1;
+
+
+  luaL_argcheck(L, input->size[dimh-1] == nInputPlane, 2, "invalid number of input planes");
+  luaL_argcheck(L, inputWidth >= kW && inputHeight >= kH, 2, "input image smaller than kernel size");
+
+  input = THTensor_(newContiguous)(input);
+  input_data = THTensor_(data)(input);
+
+  long nbatch = 1;
+  if (input->nDimension == 3) 
+  {
+    THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth);
+  }
+  else
+  {
+    nbatch = input->size[0];
+    THTensor_(resize4d)(output, nbatch, nInputPlane, outputHeight, outputWidth);
+  }
+
+  output_data = THTensor_(data)(output);
+
+  long i, k, p;
+
+  for(p = 0; p < nbatch; p++)
+  {
+    //input_data += p*nInputPlane*inputWidth*inputHeight;
+    //output_data += p*nInputPlane*outputHeight*outputWidth;
+    for(k = 0; k < nInputPlane; k++)
+    {
+      real *ptr_output;
+      long xx, yy;
+
+      /* Get the good mask for (k,i) (k out, i in) */
+      real the_weight = weight_data[k];
+
+      /* Initialize to the bias */
+      real z = bias_data[k];
+      for(i = 0; i < outputWidth*outputHeight; i++)
+	output_data[i] = z;
+      
+      /* For all output pixels... */
+      ptr_output = output_data;
+      for(yy = 0; yy < outputHeight; yy++)
+      {
+	for(xx = 0; xx < outputWidth; xx++)
+	{
+	  // Compute the mean of the input image...
+	  real *ptr_input = input_data+yy*dH*inputWidth+xx*dW;
+	  real sum = 0;
+	  long kx, ky;
+
+	  for(ky = 0; ky < kH; ky++)
+	  {
+	    for(kx = 0; kx < kW; kx++)
+	      sum += ptr_input[kx];
+	    ptr_input += inputWidth; // next input line
+	  }
+	  
+	  // Update output
+	  *ptr_output++ += the_weight*sum;
+	}
+      }
+
+      // Next input/output plane
+      output_data += outputWidth*outputHeight;
+      input_data += inputWidth*inputHeight;
+    }
+  }
+
+  THTensor_(free)(input);
+
+  return 1;
+}
+
+static int nn_(SpatialSubSampling_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  
+  int dimw = 2;
+  int dimh = 1;
+  long nbatch = 1;
+  if (input->nDimension == 4) {
+    dimw++;
+    dimh++;
+    nbatch = input->size[0];
+  }
+
+  long inputWidth = input->size[dimw];
+  long inputHeight = input->size[dimh];
+  long outputWidth = (inputWidth - kW) / dW + 1;
+  long outputHeight = (inputHeight - kH) / dH + 1;
+
+  real *weight_data = THTensor_(data)(weight);
+  real *gradOutput_data = THTensor_(data)(gradOutput);
+  real *gradInput_data;
+
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);  
+  gradInput_data = THTensor_(data)(gradInput);
+  gradOutput_data = THTensor_(data)(gradOutput);
+
+  long i, k, p;
+
+  for(p = 0; p < nbatch; p++)
+  {
+    //gradInput_data += p*nInputPlane*inputWidth*inputHeight;
+    //gradOutput_data += p*nInputPlane*outputWidth*outputHeight;
+    for(k = 0; k < nInputPlane; k++)
+    {
+      real the_weight = weight_data[k];
+      real *ptr_gradOutput = gradOutput_data;
+      long xx, yy;
+      
+      for(yy = 0; yy < outputHeight; yy++)
+      {
+	for(xx = 0; xx < outputWidth; xx++)
+	{
+	  real *ptr_gradInput = gradInput_data+yy*dH*inputWidth+xx*dW;
+	  real z = *ptr_gradOutput++ * the_weight;
+	  long kx, ky;
+	  
+	  for(ky = 0; ky < kH; ky++)
+	  {
+	    for(kx = 0; kx < kW; kx++)
+	      ptr_gradInput[kx] += z;
+	    ptr_gradInput += inputWidth;
+	  }    
+	}
+      }
+      gradOutput_data += outputWidth*outputHeight;
+      gradInput_data += inputWidth*inputHeight;
+    }
+  }
+
+  return 1;
+}
+
+static int nn_(SpatialSubSampling_accGradParameters)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  real scale = luaL_optnumber(L, 4, 1);
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+
+  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+  
+  int dimw = 2;
+  int dimh = 1;
+  long nbatch = 1;
+  if (input->nDimension == 4) {
+    dimw++;
+    dimh++;
+    nbatch = input->size[0];
+  }
+
+  long inputWidth = input->size[dimw];
+  long inputHeight = input->size[dimh];
+  long outputWidth = (inputWidth - kW) / dW + 1;
+  long outputHeight = (inputHeight - kH) / dH + 1;
+
+  real *gradWeight_data = THTensor_(data)(gradWeight);
+  real *gradBias_data = THTensor_(data)(gradBias);
+  real *gradOutput_data = THTensor_(data)(gradOutput);
+  real *input_data;
+
+  input = THTensor_(newContiguous)(input);
+  input_data = THTensor_(data)(input);
+
+  long i, k, p;
+  for(p = 0; p < nbatch; p++)
+  {
+    //input_data += p*nInputPlane*inputWidth*inputHeight;
+    //gradOutput_data += p*nInputPlane*inputWidth*inputHeight;
+    for(k = 0; k < nInputPlane; k++)
+    {
+      real *ptr_gradOutput = gradOutput_data;
+      real sum;
+      long xx, yy;
+
+      sum = 0;
+      for(i = 0; i < outputWidth*outputHeight; i++)
+	sum += gradOutput_data[i];
+      gradBias_data[k] += scale*sum;
+
+      sum = 0;
+      for(yy = 0; yy < outputHeight; yy++)
+      {
+	for(xx = 0; xx < outputWidth; xx++)
+	{
+	  real *ptr_input = input_data+yy*dH*inputWidth+xx*dW;
+	  real z = *ptr_gradOutput++;
+	  long kx, ky;
+
+	  for(ky = 0; ky < kH; ky++)
+	  {
+	    for(kx = 0; kx < kW; kx++)
+	      sum += z * ptr_input[kx];
+	    ptr_input += inputWidth;
+	  }    
+	}
+      }
+      gradWeight_data[k] += scale*sum;
+      gradOutput_data += outputWidth*outputHeight;
+      input_data += inputWidth*inputHeight;
+    }
+  }
+
+
+  THTensor_(free)(input);
+
+  return 0;
+}
+
+static const struct luaL_Reg nn_(SpatialSubSampling__) [] = {
+  {"SpatialSubSampling_updateOutput", nn_(SpatialSubSampling_updateOutput)},
+  {"SpatialSubSampling_updateGradInput", nn_(SpatialSubSampling_updateGradInput)},
+  {"SpatialSubSampling_accGradParameters", nn_(SpatialSubSampling_accGradParameters)},
+  {NULL, NULL}
+};
+
+static void nn_(SpatialSubSampling_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(SpatialSubSampling__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Sqrt.c b/generic/Sqrt.c
new file mode 100644
index 0000000..a739e96
--- /dev/null
+++ b/generic/Sqrt.c
@@ -0,0 +1,46 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Sqrt.c"
+#else
+
+static int nn_(Sqrt_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input,		\
+		   *output_data = sqrt(*input_data););
+
+  return 1;
+}
+
+static int nn_(Sqrt_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, input);
+
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output,	\
+		   *gradInput_data  = 0.5 * (*gradOutput_data / *output_data););
+  
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Sqrt__) [] = {
+  {"Sqrt_updateOutput", nn_(Sqrt_updateOutput)},
+  {"Sqrt_updateGradInput", nn_(Sqrt_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Sqrt_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Sqrt__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Square.c b/generic/Square.c
new file mode 100644
index 0000000..409055d
--- /dev/null
+++ b/generic/Square.c
@@ -0,0 +1,45 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Square.c"
+#else
+
+static int nn_(Square_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input,	\
+		   *output_data = *input_data * *input_data;);
+
+  return 1;
+}
+
+static int nn_(Square_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, input);
+
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \
+		   *gradInput_data = 2.0 * (*gradOutput_data) * (*input_data););
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Square__) [] = {
+  {"Square_updateOutput", nn_(Square_updateOutput)},
+  {"Square_updateGradInput", nn_(Square_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Square_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Square__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Tanh.c b/generic/Tanh.c
new file mode 100644
index 0000000..5c24d15
--- /dev/null
+++ b/generic/Tanh.c
@@ -0,0 +1,45 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Tanh.c"
+#else
+
+static int nn_(Tanh_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(output, input);
+
+  TH_TENSOR_APPLY2(real, output, real, input,   \
+                   *output_data = tanh(*input_data);)
+
+  return 1;
+}
+
+static int nn_(Tanh_updateGradInput)(lua_State *L)
+{
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, output);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \
+                   real z = *output_data;                              \
+                   *gradInput_data = *gradOutput_data * (1. - z*z););
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Tanh__) [] = {
+  {"Tanh_updateOutput", nn_(Tanh_updateOutput)},
+  {"Tanh_updateGradInput", nn_(Tanh_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Tanh_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Tanh__), "nn");
+  lua_pop(L,1);
+
+}
+
+#endif
diff --git a/generic/TemporalConvolution.c b/generic/TemporalConvolution.c
new file mode 100644
index 0000000..fa14a22
--- /dev/null
+++ b/generic/TemporalConvolution.c
@@ -0,0 +1,194 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/TemporalConvolution.c"
+#else
+
+static int nn_(TemporalConvolution_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int inputFrameSize = luaT_getfieldcheckint(L, 1, "inputFrameSize");
+  int outputFrameSize = luaT_getfieldcheckint(L, 1, "outputFrameSize");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor *outputWindow, *inputWindow;
+  int nInputFrame, nOutputFrame;
+  long k;
+  
+  luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected");
+  luaL_argcheck(L, input->size[1] == inputFrameSize, 2, "invalid input frame size");
+  luaL_argcheck(L, input->size[0] >= kW, 2, "input sequence smaller than kernel size");
+
+  input = THTensor_(newContiguous)(input);
+  outputWindow = THTensor_(new)();
+  inputWindow = THTensor_(new)();
+
+  nInputFrame = input->size[0];
+  nOutputFrame = (nInputFrame - kW) / dW + 1;
+
+  THTensor_(resize2d)(output,
+                      nOutputFrame,
+                      outputFrameSize);
+
+  /* bias first */
+  for(k = 0; k < nOutputFrame; k++)
+  {
+    THTensor_(select)(outputWindow, output, 0, k);
+    THTensor_(copy)(outputWindow, bias);
+  }
+
+  /* ouch */
+  for(k = 0; nOutputFrame > 0; k++)
+  {
+    long outputFrameStride = (kW-1)/dW+1;
+    long inputFrameStride = outputFrameStride*dW;
+    long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
+    nOutputFrame -= nFrame;
+
+    THTensor_(setStorage2d)(inputWindow, input->storage,
+                            input->storageOffset+k*dW*input->size[1],
+                            nFrame, inputFrameStride*input->size[1],
+                            kW*input->size[1], 1);
+
+    THTensor_(setStorage2d)(outputWindow, output->storage, 
+                            output->storageOffset + k*output->size[1],
+                            nFrame, outputFrameStride*output->size[1],
+                            output->size[1], 1);
+
+    THTensor_(transpose)(weight, NULL, 0, 1);
+    THTensor_(addmm)(outputWindow, 1, outputWindow, 1, inputWindow, weight);
+    THTensor_(transpose)(weight, NULL, 0, 1);
+  }
+
+  THTensor_(free)(outputWindow);
+  THTensor_(free)(inputWindow);
+  THTensor_(free)(input);
+
+  return 1;
+}
+
+static int nn_(TemporalConvolution_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  long nInputFrame = input->size[0];
+  long nOutputFrame = gradOutput->size[0];
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor *gradOutputWindow;
+  THTensor *gradInputWindow;
+  long k;
+
+  gradOutputWindow = THTensor_(new)();
+  gradInputWindow = THTensor_(new)();
+
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+
+  /* ouch */
+  for(k = 0; nOutputFrame > 0; k++)
+  {
+    long outputFrameStride = (kW-1)/dW+1;
+    long inputFrameStride = outputFrameStride*dW;
+    long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
+    nOutputFrame -= nFrame;
+
+    THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage,
+                            gradOutput->storageOffset + k*gradOutput->size[1],
+                            nFrame, outputFrameStride*gradOutput->size[1],
+                            gradOutput->size[1], 1);
+
+    THTensor_(setStorage2d)(gradInputWindow, gradInput->storage,
+                            gradInput->storageOffset+k*dW*gradInput->size[1],
+                            nFrame, inputFrameStride*gradInput->size[1],
+                            kW*gradInput->size[1], 1);
+
+    THTensor_(addmm)(gradInputWindow, 1, gradInputWindow, 1, gradOutputWindow, weight);
+  }
+
+  THTensor_(free)(gradOutputWindow);
+  THTensor_(free)(gradInputWindow);
+
+  return 1;
+}
+
+static int nn_(TemporalConvolution_accGradParameters)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  real scale = luaL_optnumber(L, 4, 1);
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  long nInputFrame = input->size[0];
+  long nOutputFrame = gradOutput->size[0];
+
+  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+
+  THTensor *gradOutputWindow;
+  THTensor *inputWindow;
+  long k;
+
+  input = THTensor_(newContiguous)(input);
+  gradOutputWindow = THTensor_(new)();
+  inputWindow = THTensor_(new)();
+
+  /* bias first */
+  for(k = 0; k < nOutputFrame; k++)
+  {
+    THTensor_(select)(gradOutputWindow, gradOutput, 0, k);
+    THTensor_(cadd)(gradBias, gradBias, scale, gradOutputWindow);
+  }
+
+  /* ouch */
+  for(k = 0; nOutputFrame > 0; k++)
+  {
+    long outputFrameStride = (kW-1)/dW+1;
+    long inputFrameStride = outputFrameStride*dW;
+    long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
+    nOutputFrame -= nFrame;
+
+    THTensor_(setStorage2d)(inputWindow, input->storage,
+                            input->storageOffset+k*dW*input->size[1],
+                            nFrame, inputFrameStride*input->size[1],
+                            kW*input->size[1], 1);
+
+    THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage, 
+                            gradOutput->storageOffset + k*gradOutput->size[1],
+                            nFrame, outputFrameStride*gradOutput->size[1],
+                            gradOutput->size[1], 1);
+
+    THTensor_(transpose)(gradOutputWindow, NULL, 0, 1);
+    THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutputWindow, inputWindow);
+    THTensor_(transpose)(gradOutputWindow, NULL, 0, 1);
+  }
+
+  THTensor_(free)(gradOutputWindow);
+  THTensor_(free)(inputWindow);
+  THTensor_(free)(input);
+
+  return 0;
+}
+
+static const struct luaL_Reg nn_(TemporalConvolution__) [] = {
+  {"TemporalConvolution_updateOutput", nn_(TemporalConvolution_updateOutput)},
+  {"TemporalConvolution_updateGradInput", nn_(TemporalConvolution_updateGradInput)},
+  {"TemporalConvolution_accGradParameters", nn_(TemporalConvolution_accGradParameters)},
+  {NULL, NULL}
+};
+
+static void nn_(TemporalConvolution_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(TemporalConvolution__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/TemporalSubSampling.c b/generic/TemporalSubSampling.c
new file mode 100644
index 0000000..39e7f3b
--- /dev/null
+++ b/generic/TemporalSubSampling.c
@@ -0,0 +1,139 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/TemporalSubSampling.c"
+#else
+
+static int nn_(TemporalSubSampling_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int inputFrameSize = luaT_getfieldcheckint(L, 1, "inputFrameSize");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  THTensor *outputFrame, *inputWindow;
+  int nInputFrame, nOutputFrame;
+  long k;
+  
+  luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected");
+  luaL_argcheck(L, input->size[1] == inputFrameSize, 2, "invalid input frame size");
+  luaL_argcheck(L, input->size[0] >= kW, 2, "input sequence smaller than kernel size");
+
+  outputFrame = THTensor_(new)();
+  inputWindow = THTensor_(new)();
+
+  nInputFrame = input->size[0];
+  nOutputFrame = (nInputFrame - kW) / dW + 1;
+
+  THTensor_(resize2d)(output,
+                      nOutputFrame,
+                      inputFrameSize);
+  
+  for(k = 0; k < nOutputFrame; k++)
+  {
+    THTensor_(narrow)(inputWindow, input, 0, k*dW, kW);
+    THTensor_(select)(outputFrame, output, 0, k);
+    THTensor_(sum)(outputFrame, inputWindow, 0);
+    THTensor_(cmul)(outputFrame, outputFrame, weight);
+    THTensor_(cadd)(outputFrame, outputFrame, 1, bias);
+  }
+
+  THTensor_(free)(outputFrame);
+  THTensor_(free)(inputWindow);
+
+  return 1;
+}
+
+static int nn_(TemporalSubSampling_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor *gradOutputFrame;
+  THTensor *gradInputWindow, *buffer, *kwunit;
+  long k;
+
+  gradOutputFrame = THTensor_(new)();
+  gradInputWindow = THTensor_(new)();
+  buffer = THTensor_(new)();
+  kwunit = THTensor_(newWithSize1d)(kW);
+
+  THTensor_(fill)(kwunit, 1);
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+
+  for(k = 0; k < gradOutput->size[0]; k++)
+  {
+    THTensor_(narrow)(gradInputWindow, gradInput, 0, k*dW, kW);
+    THTensor_(select)(gradOutputFrame, gradOutput, 0, k);
+    THTensor_(cmul)(buffer, weight, gradOutputFrame);
+    THTensor_(addr)(gradInputWindow, 1, gradInputWindow, 1, kwunit, buffer);
+  }
+
+  THTensor_(free)(gradOutputFrame);
+  THTensor_(free)(gradInputWindow);
+  THTensor_(free)(buffer);
+  THTensor_(free)(kwunit);
+
+  return 1;
+}
+
+static int nn_(TemporalSubSampling_accGradParameters)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  real scale = luaL_optnumber(L, 4, 1);
+
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+
+  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+
+  THTensor *gradOutputFrame;
+  THTensor *inputWindow, *buffer;
+  long k;
+
+
+  gradOutputFrame = THTensor_(new)();
+  inputWindow = THTensor_(new)();
+  buffer = THTensor_(new)();
+
+  for(k = 0; k < gradOutput->size[0]; k++)
+  {
+    THTensor_(narrow)(inputWindow, input, 0, k*dW, kW);
+    THTensor_(select)(gradOutputFrame, gradOutput, 0, k);
+    THTensor_(sum)(buffer, inputWindow, 0);
+    THTensor_(addcmul)(gradWeight, gradWeight, scale, buffer, gradOutputFrame);
+    THTensor_(cadd)(gradBias, gradBias, scale, gradOutputFrame);
+  }
+
+  THTensor_(free)(gradOutputFrame);
+  THTensor_(free)(inputWindow);
+  THTensor_(free)(buffer);
+
+  return 0;
+}
+
+static const struct luaL_Reg nn_(TemporalSubSampling__) [] = {
+  {"TemporalSubSampling_updateOutput", nn_(TemporalSubSampling_updateOutput)},
+  {"TemporalSubSampling_updateGradInput", nn_(TemporalSubSampling_updateGradInput)},
+  {"TemporalSubSampling_accGradParameters", nn_(TemporalSubSampling_accGradParameters)},
+  {NULL, NULL}
+};
+
+static void nn_(TemporalSubSampling_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(TemporalSubSampling__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Threshold.c b/generic/Threshold.c
new file mode 100644
index 0000000..760e842
--- /dev/null
+++ b/generic/Threshold.c
@@ -0,0 +1,47 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Threshold.c"
+#else
+
+static int nn_(Threshold_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  real val = luaT_getfieldchecknumber(L, 1, "val");
+  real threshold = luaT_getfieldchecknumber(L, 1, "threshold");
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+  
+  THTensor_(resizeAs)(output, input);
+  TH_TENSOR_APPLY2(real, output, real, input, \
+                  *output_data = (*input_data > threshold) ? *input_data : val;);
+
+  return 1;
+}
+
+static int nn_(Threshold_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+  real threshold = luaT_getfieldchecknumber(L, 1, "threshold");
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+  THTensor_(resizeAs)(gradInput, input);
+  TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input,      \
+                   if ((*input_data) > threshold) *gradInput_data = 1;  \
+                   else *gradInput_data = 0;                            \
+                   *gradInput_data = (*gradOutput_data) * (*gradInput_data););
+  return 1;
+}
+
+static const struct luaL_Reg nn_(Threshold__) [] = {
+  {"Threshold_updateOutput", nn_(Threshold_updateOutput)},
+  {"Threshold_updateGradInput", nn_(Threshold_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(Threshold_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(Threshold__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/VolumetricConvolution.c b/generic/VolumetricConvolution.c
new file mode 100644
index 0000000..0ec2247
--- /dev/null
+++ b/generic/VolumetricConvolution.c
@@ -0,0 +1,118 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricConvolution.c"
+#else
+
+static int nn_(VolumetricConvolution_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  int dT = luaT_getfieldcheckint(L, 1, "dT");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+  luaL_argcheck(L, input->nDimension == 4, 2, "4D tensor expected");
+
+  long nOutputPlane = weight->size[0];
+  long nInputPlane  = weight->size[1];
+  long kT           = weight->size[2];
+  long kH           = weight->size[3];
+  long kW           = weight->size[4];
+  long inputDepth   = input->size[1];
+  long inputHeight  = input->size[2];
+  long inputWidth   = input->size[3];
+  long outputDepth  = (inputDepth - kT) / dT + 1;
+  long outputWidth  = (inputWidth - kW) / dW + 1;
+  long outputHeight = (inputHeight - kH) / dH + 1;
+
+  THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
+
+  /* add bias */
+  long i;
+  THTensor *outn = THTensor_(new)();
+  for (i=0; i<bias->size[0]; i++) {
+    THTensor_(select)(outn,output,0,i);
+    THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
+  }
+  THTensor_(free)(outn);
+
+  /* do convolutions */
+  THTensor_(conv3Dmv)(output, 1.0, 1.0, input, weight, dT, dH, dW, "V", "X");
+
+  return 1;
+}
+
+
+static int nn_(VolumetricConvolution_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  int dT = luaT_getfieldcheckint(L, 1, "dT");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+  
+  THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" );
+
+  /* gradient to input */
+  THTensor *tweight = THTensor_(newTranspose)(weight,0,1);
+  THTensor_(conv3Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dT, dH, dW, "F", "C");
+  THTensor_(free)(tweight);
+
+  return 1;
+}
+
+static int nn_(VolumetricConvolution_accGradParameters)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));  
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));  
+  real scale = luaL_optnumber(L, 4, 1);
+  int dT = luaT_getfieldcheckint(L, 1, "dT");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+  THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+  THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+  THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+  
+  THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" );
+
+  long k;
+
+  /* gradient to bias */
+  real *gradBias_data = THTensor_(data)(gradBias);
+  THTensor* gradOutSlice = THTensor_(new)();
+  for(k = 0; k < nOutputPlane; k++)
+  {
+    THTensor_(select)(gradOutSlice, gradOutput, 0, k);
+    gradBias_data[k] += scale*THTensor_(sumall)(gradOutSlice);
+  }
+  THTensor_(free)(gradOutSlice);
+
+  /* gradient to kernels */
+  THTensor_(conv3DRevger)(gradWeight, 1.0, scale, input, gradOutput, dT, dH, dW);
+
+  return 0;
+}
+
+static const struct luaL_Reg nn_(VolumetricConvolution__) [] = {
+  {"VolumetricConvolution_updateOutput", nn_(VolumetricConvolution_updateOutput)},
+  {"VolumetricConvolution_updateGradInput", nn_(VolumetricConvolution_updateGradInput)},
+  {"VolumetricConvolution_accGradParameters", nn_(VolumetricConvolution_accGradParameters)},
+  {NULL, NULL}
+};
+
+static void nn_(VolumetricConvolution_init)(lua_State *L)
+{
+  luaT_pushmetaclass(L, torch_(Tensor_id));
+  luaT_registeratname(L, nn_(VolumetricConvolution__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/init.c b/init.c
new file mode 100644
index 0000000..b2a528e
--- /dev/null
+++ b/init.c
@@ -0,0 +1,163 @@
+#include "TH.h"
+#include "luaT.h"
+
+#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
+#define torch_string_(NAME) TH_CONCAT_STRING_3(torch., Real, NAME)
+#define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME)
+
+static const void* torch_FloatTensor_id = NULL;
+static const void* torch_DoubleTensor_id = NULL;
+
+#include "generic/Square.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Sqrt.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/HardTanh.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Exp.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/LogSigmoid.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/LogSoftMax.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Sigmoid.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SoftPlus.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Tanh.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Abs.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/HardShrink.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SoftShrink.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Threshold.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SoftMax.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Max.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Min.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/MSECriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/AbsCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SparseLinear.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/TemporalConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/TemporalSubSampling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialConvolutionMap.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialSubSampling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialMaxPooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/MultiMarginCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/MultiLabelMarginCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+DLL_EXPORT int luaopen_libnn(lua_State *L)
+{
+  torch_FloatTensor_id = luaT_checktypename2id(L, "torch.FloatTensor");
+  torch_DoubleTensor_id = luaT_checktypename2id(L, "torch.DoubleTensor");
+
+  lua_newtable(L);
+  lua_pushvalue(L, -1);
+  lua_setfield(L, LUA_GLOBALSINDEX, "nn");
+
+  nn_FloatMin_init(L);
+  nn_FloatMax_init(L);
+  nn_FloatExp_init(L);
+  nn_FloatSqrt_init(L);
+  nn_FloatSquare_init(L);
+  nn_FloatHardTanh_init(L);
+  nn_FloatLogSoftMax_init(L);
+  nn_FloatMSECriterion_init(L);
+  nn_FloatAbsCriterion_init(L);
+  nn_FloatLogSigmoid_init(L);
+  nn_FloatSigmoid_init(L);
+  nn_FloatSoftMax_init(L);
+  nn_FloatSoftPlus_init(L);
+  nn_FloatTanh_init(L);
+  nn_FloatAbs_init(L);
+  nn_FloatHardShrink_init(L);
+  nn_FloatSoftShrink_init(L);
+  nn_FloatThreshold_init(L);
+  nn_FloatSparseLinear_init(L);
+  nn_FloatTemporalConvolution_init(L);
+  nn_FloatTemporalSubSampling_init(L);
+  nn_FloatSpatialConvolution_init(L);
+  nn_FloatSpatialConvolutionMap_init(L);
+  nn_FloatSpatialSubSampling_init(L);
+  nn_FloatSpatialMaxPooling_init(L);
+  nn_FloatVolumetricConvolution_init(L);
+  nn_FloatMultiMarginCriterion_init(L);
+  nn_FloatMultiLabelMarginCriterion_init(L);
+
+  nn_DoubleMin_init(L);
+  nn_DoubleMax_init(L);
+  nn_DoubleExp_init(L);
+  nn_DoubleSqrt_init(L);
+  nn_DoubleSquare_init(L);
+  nn_DoubleHardTanh_init(L);
+  nn_DoubleLogSoftMax_init(L);
+  nn_DoubleMSECriterion_init(L);
+  nn_DoubleAbsCriterion_init(L);
+  nn_DoubleLogSigmoid_init(L);
+  nn_DoubleSigmoid_init(L);
+  nn_DoubleSoftMax_init(L);
+  nn_DoubleSoftPlus_init(L);
+  nn_DoubleTanh_init(L);
+  nn_DoubleAbs_init(L);
+  nn_DoubleHardShrink_init(L);
+  nn_DoubleSoftShrink_init(L);
+  nn_DoubleThreshold_init(L);
+  nn_DoubleSparseLinear_init(L);
+  nn_DoubleTemporalConvolution_init(L);
+  nn_DoubleTemporalSubSampling_init(L);
+  nn_DoubleSpatialConvolution_init(L);
+  nn_DoubleSpatialConvolutionMap_init(L);
+  nn_DoubleSpatialSubSampling_init(L);
+  nn_DoubleSpatialMaxPooling_init(L);
+  nn_DoubleVolumetricConvolution_init(L);
+  nn_DoubleMultiMarginCriterion_init(L);
+  nn_DoubleMultiLabelMarginCriterion_init(L);
+
+  return 1;
+}
diff --git a/init.lua b/init.lua
new file mode 100644
index 0000000..c6e7df0
--- /dev/null
+++ b/init.lua
@@ -0,0 +1,91 @@
+require('torch')
+require('libnn')
+
+torch.include('nn', 'Module.lua')
+ 
+torch.include('nn', 'Concat.lua')
+torch.include('nn', 'Parallel.lua')  
+torch.include('nn', 'Sequential.lua')
+
+torch.include('nn', 'Linear.lua')
+torch.include('nn', 'SparseLinear.lua')
+torch.include('nn', 'Reshape.lua')  
+torch.include('nn', 'Select.lua')
+torch.include('nn', 'Narrow.lua')
+torch.include('nn', 'Replicate.lua')
+  
+torch.include('nn', 'Copy.lua')
+torch.include('nn', 'Min.lua')
+torch.include('nn', 'Max.lua')
+torch.include('nn', 'Mean.lua')
+torch.include('nn', 'Sum.lua')
+torch.include('nn', 'CMul.lua')  
+torch.include('nn', 'Mul.lua')  
+torch.include('nn', 'Add.lua')  
+
+torch.include('nn', 'CAddTable.lua')
+torch.include('nn', 'CDivTable.lua')
+torch.include('nn', 'CMulTable.lua')
+torch.include('nn', 'CSubTable.lua')
+
+torch.include('nn', 'Euclidean.lua')  
+torch.include('nn', 'WeightedEuclidean.lua')  
+torch.include('nn', 'PairwiseDistance.lua')  
+torch.include('nn', 'CosineDistance.lua')  
+torch.include('nn', 'DotProduct.lua')  
+
+torch.include('nn', 'Exp.lua')
+torch.include('nn', 'HardTanh.lua')
+torch.include('nn', 'LogSigmoid.lua')
+torch.include('nn', 'LogSoftMax.lua')
+torch.include('nn', 'Sigmoid.lua')
+torch.include('nn', 'SoftMax.lua')
+torch.include('nn', 'SoftMin.lua')
+torch.include('nn', 'SoftPlus.lua')
+torch.include('nn', 'SoftSign.lua')
+torch.include('nn', 'Tanh.lua')
+torch.include('nn', 'Abs.lua')
+torch.include('nn', 'Power.lua')
+torch.include('nn', 'Square.lua')
+torch.include('nn', 'Sqrt.lua')
+torch.include('nn', 'HardShrink.lua')
+torch.include('nn', 'SoftShrink.lua')
+torch.include('nn', 'Threshold.lua')
+
+torch.include('nn', 'LookupTable.lua')
+torch.include('nn', 'SpatialConvolution.lua')
+torch.include('nn', 'SpatialConvolutionMap.lua')
+torch.include('nn', 'SpatialSubSampling.lua')
+torch.include('nn', 'SpatialMaxPooling.lua')
+torch.include('nn', 'SpatialLPPooling.lua')
+torch.include('nn', 'TemporalConvolution.lua')
+torch.include('nn', 'TemporalSubSampling.lua')
+torch.include('nn', 'SpatialSubtractiveNormalization.lua')
+torch.include('nn', 'SpatialZeroPadding.lua')
+
+torch.include('nn', 'VolumetricConvolution.lua')
+
+torch.include('nn', 'ParallelTable.lua')  
+torch.include('nn', 'ConcatTable.lua')  
+torch.include('nn', 'SplitTable.lua')  
+torch.include('nn', 'JoinTable.lua')  
+torch.include('nn', 'CriterionTable.lua')
+torch.include('nn', 'Identity.lua')  
+
+torch.include('nn', 'Criterion.lua')
+torch.include('nn', 'MSECriterion.lua')
+torch.include('nn', 'MarginCriterion.lua')
+torch.include('nn', 'AbsCriterion.lua')
+torch.include('nn', 'ClassNLLCriterion.lua')
+torch.include('nn', 'MultiCriterion.lua')
+torch.include('nn', 'L1HingeEmbeddingCriterion.lua')
+torch.include('nn', 'HingeEmbeddingCriterion.lua')
+torch.include('nn', 'CosineEmbeddingCriterion.lua')
+torch.include('nn', 'MarginRankingCriterion.lua')
+torch.include('nn', 'MultiMarginCriterion.lua')
+torch.include('nn', 'MultiLabelMarginCriterion.lua')
+
+torch.include('nn', 'StochasticGradient.lua')
+
+torch.include('nn', 'Jacobian.lua')
+torch.include('nn', 'test.lua')
diff --git a/test/test.lua b/test/test.lua
new file mode 100644
index 0000000..c18d3a2
--- /dev/null
+++ b/test/test.lua
@@ -0,0 +1,1029 @@
+require 'torch'
+require 'random'
+
+local mytester = torch.Tester()
+local jac
+
+local precision = 1e-5
+
+local nntest = {}
+local nntestx = {}
+
+function nntest.Add()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.Add(ini*inj*ink)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err,precision, 'error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err,precision, 'error on bias [direct update]')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.CMul()
+   local ini = math.random(5,15)
+   local inj = math.random(5,15)
+   local ink = math.random(5,15)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.CMul(ini*inj*ink)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err,precision, 'error on weight ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Exp()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.Exp()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.HardTanh()
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+   
+   local module = nn.HardTanh()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision ,  'error on state ')
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Abs()
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+   
+   local module = nn.Abs()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision ,  'error on state ')
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Threshold()
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+
+   local module = nn.Threshold(random.uniform(-2,2),random.uniform(-2,2))
+
+   local err = nn.Jacobian.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = nn.Jacobian.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.HardShrink()
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+
+   local module = nn.HardShrink(math.random()/2)
+
+   local err = nn.Jacobian.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = nn.Jacobian.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SoftShrink()
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+
+   local module = nn.SoftShrink(math.random()/2)
+
+   local err = nn.Jacobian.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = nn.Jacobian.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Power()
+   local in1 = torch.rand(10,20)
+   local module = nn.Power(2)
+   local out = module:forward(in1)
+   local err = out:dist(in1:cmul(in1))
+   mytester:asserteq(err, 0, torch.typename(module) .. ' - forward err ')
+
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local pw = random.uniform()*math.random(1,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+
+   local module = nn.Power(pw)
+
+   local err = nn.Jacobian.testJacobian(module, input, 0.1, 2)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = nn.Jacobian.testIO(module,input, 0.1, 2)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Square()
+   local in1 = torch.rand(10,20)
+   local module = nn.Square()
+   local out = module:forward(in1)
+   local err = out:dist(in1:cmul(in1))
+   mytester:asserteq(err, 0, torch.typename(module) .. ' - forward err ')
+
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+
+   local module = nn.Square()
+
+   local err = nn.Jacobian.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = nn.Jacobian.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Sqrt()
+   local in1 = torch.rand(10,20)
+   local module = nn.Sqrt()
+   local out = module:forward(in1)
+   local err = out:dist(in1:sqrt())
+   mytester:asserteq(err, 0, torch.typename(module) .. ' - forward err ')
+
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+
+   local module = nn.Sqrt()
+
+   local err = nn.Jacobian.testJacobian(module, input, 0.1, 2)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = nn.Jacobian.testIO(module, input, 0, 2)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Linear()
+   local ini = math.random(50,70)
+   local inj = math.random(50,70)
+   local input = torch.Tensor(ini):zero()
+   local module = nn.Linear(ini,inj)
+
+   -- 1D
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err,precision, 'error on weight ')
+
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err,precision, 'error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err,precision, 'error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+
+   -- 2D
+   local nframe = math.random(50,70)
+   local input = torch.Tensor(nframe, ini):zero()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err,precision, 'error on weight ')
+
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err,precision, 'error on weight ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err,precision, 'error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+
+   -- IO
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Euclidean()
+   local ini = math.random(50,70)
+   local inj = math.random(50,70)
+   local input = torch.Tensor(ini):zero()
+   local module = nn.Euclidean(ini,inj)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err,precision, 'error on weight ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.WeightedEuclidean()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local input = torch.Tensor(ini):zero()
+   local module = nn.WeightedEuclidean(ini,inj)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err,precision, 'error on weight ')
+
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err,precision, 'error on bias ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.LogSigmoid()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.LogSigmoid()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.LogSoftmax()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local input = torch.Tensor(ini,inj):zero()
+   local module = nn.LogSoftMax()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+-- function nntest.TemporalLogSoftmax()
+--    local ini = math.random(10,20)
+--    local inj = math.random(10,20)
+--    local input = torch.Tensor(ini,inj):zero()
+--    local module = nn.TemporalLogSoftMax()
+
+--    local err = jac.testJacobian(module,input)
+--    mytester:assertlt(err,precision, 'error on state ')
+
+--    local ferr,berr = jac.testIO(module,input)
+--    mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+--    mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+-- end
+
+function nntest.Max()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj*ink):zero()
+   local module = nn.Max(1)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Min()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj*ink):zero()
+   local module = nn.Min(1)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Mean()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.Mean(random.random(1,3))
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Mul()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.Mul(ini*inj*ink)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err,precision, 'error on weight ')
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Sigmoid()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.Sigmoid()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Softmax()
+   local ini = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ink, ini):zero()
+   local module = nn.SoftMax()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Softmin()
+   local ini = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ink, ini):zero()
+   local module = nn.SoftMin()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Softsign()
+   local ini = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ink, ini):zero()
+   local module = nn.SoftSign()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SoftPlus()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.SoftPlus()
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialSubtractiveNormalization_2dkernel()
+   local inputSize = math.random(11,20)
+   local kersize = 9
+   local nbfeatures = math.random(5,10)
+   local kernel = torch.Tensor(kersize,kersize):fill(1)
+   local module = nn.SpatialSubtractiveNormalization(nbfeatures,kernel)
+   local input = torch.rand(nbfeatures,inputSize,inputSize)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialSubtractiveNormalization_1dkernel()
+   local inputSize = math.random(11,20)
+   local kersize = 9
+   local nbfeatures = math.random(5,10)
+   local kernel = torch.Tensor(kersize):fill(1)
+   local module = nn.SpatialSubtractiveNormalization(nbfeatures,kernel)
+   local input = torch.rand(nbfeatures,inputSize,inputSize)
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialConvolution()
+   local from = math.random(1,10)
+   local to = math.random(1,10)
+   local ki = math.random(1,10)
+   local kj = math.random(1,10)
+   local si = math.random(1,4)
+   local sj = math.random(1,4)
+   local outi = math.random(10,20)
+   local outj = math.random(10,20)
+   local ini = (outi-1)*si+ki
+   local inj = (outj-1)*sj+kj
+   local module = nn.SpatialConvolution(from, to, ki, kj, si, sj)
+   local input = torch.Tensor(from, inj, ini):zero()
+
+   -- stochastic
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err , precision, 'error on weight [direct update] ')
+   
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+
+   -- batch
+   
+   --verbose = true
+   local batch = math.random(2,5)
+   outi = math.random(4,8)
+   outj = math.random(4,8)
+   ini = (outi-1)*si+ki
+   inj = (outj-1)*sj+kj
+   module = nn.SpatialConvolution(from, to, ki, kj, si, sj)
+   input = torch.Tensor(batch,from,inj,ini):zero()
+
+--    print(from, to, ki, kj, si, sj, batch, ini, inj)
+--    print(module.weight:size())
+--    print(module.gradWeight:size())
+
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'batch error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'batch error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'batch error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+   
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'batch error on bias [%s]', t))
+   end
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialConvolutionMap()
+   local from = math.random(1,10)
+   local fanin = math.random(1, from)
+   local to = math.random(1,10)
+   local ki = math.random(1,10)
+   local kj = math.random(1,10)
+   local si = math.random(1,4)
+   local sj = math.random(1,4)
+   local outi = math.random(10,20)
+   local outj = math.random(10,20)
+   local ini = (outi-1)*si+ki
+   local inj = (outj-1)*sj+kj
+
+   local module = nn.SpatialConvolutionMap(nn.tables.random(from, to, fanin), ki, kj, si, sj)
+   local input = torch.Tensor(from, inj, ini):zero()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'error on bias ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function batchcompare(smod, sin, plist)
+   local bs = torch.LongStorage(sin:size():size()+1)
+   bs[1] = 1
+   for i=1,sin:size():size() do bs[i+1] = sin:size()[i] end
+   local bin = torch.Tensor(bs):copy(sin)
+   local bmod = smod:clone()
+
+   local sout = smod:forward(sin):clone()
+   local bout = bmod:forward(bin):clone()
+
+   local sgout = torch.randn(sout:size())
+   local bgout = torch.Tensor(bout:size())
+   bgout:copy(sgout)
+
+   local sgin = smod:backward(sin, sgout)
+   local bgin = bmod:backward(bin, bgout)
+
+   smod:accGradParameters(sin, sgout, 1)
+   bmod:accGradParameters(bin, bgout, 1)
+   
+   mytester:assertTensorEq(sout,bout:select(1,1), 1e-8, 'batchcompare error on output')
+   mytester:assertTensorEq(sgin,bgin:select(1,1), 1e-8, 'batchcompare error on gradInput')
+
+   for i,v in pairs(plist) do
+      mytester:assertTensorEq(smod[v],bmod[v], 1e-8, 'batchcompare error on ' .. v)
+   end
+end
+
+function nntest.SpatialConvolutionBatchCompare()
+   local from = math.random(1,10)
+   local to = math.random(1,10)
+   local ki = math.random(1,10)
+   local kj = math.random(1,10)
+   local si = math.random(1,4)
+   local sj = math.random(1,4)
+   local outi = math.random(10,20)
+   local outj = math.random(10,20)
+   local ini = (outi-1)*si+ki
+   local inj = (outj-1)*sj+kj
+
+   local module = nn.SpatialConvolution(from, to, ki, kj, si, sj)
+   local input = torch.randn(from,inj,ini)
+
+   batchcompare(module,input, {'weight','bias','gradWeight','gradBias'})
+end
+
+function nntest.SpatialSubSamplingBatchCompare()
+   local from = math.random(1,10)
+   local ki = math.random(1,10)
+   local kj = math.random(1,10)
+   local si = math.random(1,4)
+   local sj = math.random(1,4)
+   local outi = math.random(10,20)
+   local outj = math.random(10,20)
+   local ini = (outi-1)*si+ki
+   local inj = (outj-1)*sj+kj
+   local module = nn.SpatialSubSampling(from, ki, kj, si, sj)
+   local input = torch.randn(from,inj,ini)--torch.Tensor(from, inj, ini):zero()
+
+   batchcompare(module,input, {'weight','bias','gradWeight','gradBias'})
+end
+
+function nntest.SpatialSubSampling()
+   local from = math.random(1,10)
+   local ki = math.random(1,10)
+   local kj = math.random(1,10)
+   local si = math.random(1,4)
+   local sj = math.random(1,4)
+   local outi = math.random(10,20)
+   local outj = math.random(10,20)
+   local ini = (outi-1)*si+ki
+   local inj = (outj-1)*sj+kj
+   local module = nn.SpatialSubSampling(from, ki, kj, si, sj)
+   local input = torch.Tensor(from, inj, ini):zero()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err , precision, 'error on weight [direct update] ')
+   
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+
+   --verbose = true
+   local batch = math.random(2,5)
+   outi = math.random(4,8)
+   outj = math.random(4,8)
+   ini = (outi-1)*si+ki
+   inj = (outj-1)*sj+kj
+   module = nn.SpatialSubSampling(from, ki, kj, si, sj)
+   input = torch.Tensor(batch,from,inj,ini):zero()
+
+--    print(from, to, ki, kj, si, sj, batch, ini, inj)
+--    print(module.weight:size())
+--    print(module.gradWeight:size())
+
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'batch error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'batch error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'batch error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+   
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'batch error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'batch error on bias [%s]', t))
+   end
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialMaxPooling()
+   local fanin = math.random(1,4)
+   local osizex = math.random(1,20)
+   local osizey = math.random(1,20)
+   local mx = math.random(2,4)
+   local my = math.random(2,4)
+   local sizex = osizex*mx
+   local sizey = osizey*my
+   local module = nn.SpatialMaxPooling(mx,my,mx,my)
+   local input = torch.rand(fanin,sizey,sizex)
+
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialLPPooling()
+   local fanin = math.random(1,4)
+   local osizex = math.random(1,4)
+   local osizey = math.random(1,4)
+   local p = math.random(1,4)
+   local mx = math.random(2,8)
+   local my = math.random(2,8)
+   local dx = math.random(2,mx)
+   local dy = math.random(2,my)
+   local sizex = osizex*mx
+   local sizey = osizey*my
+   local module = nn.SpatialLPPooling(fanin,p,mx,my,dx,dy)
+   local input = torch.rand(fanin,sizey,sizex)
+
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Sum()
+   local ini = math.random(10,20)
+   local inj = math.random(10,20)
+   local ink = math.random(10,20)
+   local input = torch.Tensor(ini,inj,ink):zero()
+   local module = nn.Sum(random.random(1,3))
+
+   local err = jac.testJacobian(module,input)
+   mytester:assertlt(err,precision, 'error on state ')
+
+   local ferr,berr = jac.testIO(module,input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Tanh()
+   local ini = math.random(5,10)
+   local inj = math.random(5,10)
+   local ink = math.random(5,10)
+   local input = torch.Tensor(ink, inj, ini):zero()
+   
+   local module = nn.Tanh()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision ,  'error on state ')
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.TemporalConvolution()
+   local from = math.random(1,10)
+   local to = math.random(1,10)
+   local ki = math.random(1,10)
+   local si = math.random(1,4)
+   local outi = math.random(10,20)
+   local ini = (outi-1)*si+ki
+   local module = nn.TemporalConvolution(from, to, ki,si)
+   local input = torch.Tensor(ini, from):zero()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err , precision, 'error on weight [direct update]')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err , precision, 'error on bias [direct update]')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.TemporalSubSampling()
+   local from = math.random(1,10)
+   local ki = math.random(1,10)
+   local si = math.random(1,4)
+   local outi = math.random(10,20)
+   local ini = (outi-1)*si+ki
+   local module = nn.TemporalSubSampling(from, ki, si)
+   local input = torch.Tensor(ini, from):zero()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'error on bias ')
+   
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err , precision, 'error on weight [direct update] ')
+   
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.VolumetricConvolution()
+   local from = math.random(2,5)
+   local to = math.random(2,5)
+   local kt = math.random(3,7)
+   local ki = math.random(3,7)
+   local kj = math.random(3,7)
+   local st = math.random(2,4)
+   local si = math.random(2,4)
+   local sj = math.random(2,4)
+   local outt = math.random(3,7)
+   local outi = math.random(3,7)
+   local outj = math.random(3,7)
+   local int = (outt-1)*st+kt
+   local ini = (outi-1)*si+ki
+   local inj = (outj-1)*sj+kj
+   local module = nn.VolumetricConvolution(from, to, kt, ki, kj, st, si, sj)
+   local input = torch.Tensor(from, int, inj, ini):zero()
+   
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+   
+   local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+   mytester:assertlt(err , precision, 'error on weight ')
+   
+   local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+   mytester:assertlt(err , precision, 'error on bias ')
+
+   local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+   mytester:assertlt(err , precision, 'error on weight [direct update] ')
+   
+   local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+   mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on weight [%s]', t))
+   end
+
+   for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+      mytester:assertlt(err, precision, string.format(
+                         'error on bias [%s]', t))
+   end
+   
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+
+mytester:add(nntest)
+--mytester:add(test_SpatialConvolution)
+--mytester:add(test_AbsCriterion)
+
+if not nn then
+   require 'nn'
+   jac = nn.Jacobian
+   mytester:run()
+else
+   jac = nn.Jacobian
+   function nn.test()
+      -- randomize stuff
+      math.randomseed(os.time())
+      mytester:run()
+   end
+end
-- 
cgit v1.2.3