Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/nn.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRonan Collobert <ronan@collobert.com>2012-01-25 17:55:20 +0400
committerRonan Collobert <ronan@collobert.com>2012-01-25 17:55:20 +0400
commit4df3893abd1b9f840f1d9a8c1859799ccbf941de (patch)
treee8a1e1cc1b6ea6e47855347b157eaf419fdb357b
initial revamp of torch7 tree
-rw-r--r--Abs.lua15
-rw-r--r--AbsCriterion.lua14
-rw-r--r--Add.lua54
-rw-r--r--CAddTable.lua24
-rw-r--r--CDivTable.lua21
-rw-r--r--CMakeLists.txt9
-rw-r--r--CMul.lua36
-rw-r--r--CMulTable.lua26
-rw-r--r--CSubTable.lua21
-rw-r--r--ClassNLLCriterion.lua44
-rw-r--r--Concat.lua119
-rw-r--r--ConcatTable.lua72
-rw-r--r--Copy.lua33
-rw-r--r--CosineDistance.lua40
-rw-r--r--CosineEmbeddingCriterion.lua54
-rw-r--r--Criterion.lua51
-rw-r--r--CriterionTable.lua16
-rw-r--r--DotProduct.lua29
-rw-r--r--Euclidean.lua64
-rw-r--r--Exp.lua9
-rw-r--r--HardShrink.lua16
-rw-r--r--HardTanh.lua9
-rw-r--r--HingeEmbeddingCriterion.lua26
-rw-r--r--Identity.lua12
-rw-r--r--Jacobian.lua239
-rw-r--r--JoinTable.lua50
-rw-r--r--L1HingeEmbeddingCriterion.lua41
-rw-r--r--Linear.lua82
-rw-r--r--LogSigmoid.lua14
-rw-r--r--LogSoftMax.lua9
-rw-r--r--LookupTable.lua76
-rw-r--r--MSECriterion.lua14
-rw-r--r--MarginCriterion.lua23
-rw-r--r--MarginRankingCriterion.lua25
-rw-r--r--Max.lua16
-rw-r--r--Mean.lua26
-rw-r--r--Min.lua16
-rw-r--r--Module.lua211
-rw-r--r--Mul.lua42
-rw-r--r--MultiCriterion.lua32
-rw-r--r--MultiLabelMarginCriterion.lua14
-rw-r--r--MultiMarginCriterion.lua14
-rw-r--r--Narrow.lua24
-rw-r--r--PairwiseDistance.lua33
-rw-r--r--Parallel.lua137
-rw-r--r--ParallelTable.lua71
-rw-r--r--Power.lua21
-rw-r--r--Replicate.lua29
-rw-r--r--Reshape.lua38
-rw-r--r--Select.lua20
-rw-r--r--Sequential.lua129
-rw-r--r--Sigmoid.lua9
-rw-r--r--SoftMax.lua9
-rw-r--r--SoftMin.lua15
-rw-r--r--SoftPlus.lua9
-rw-r--r--SoftShrink.lua16
-rw-r--r--SoftSign.lua15
-rw-r--r--SparseLinear.lua42
-rw-r--r--SpatialConvolution.lua50
-rw-r--r--SpatialConvolutionMap.lua119
-rw-r--r--SpatialLPPooling.lua32
-rw-r--r--SpatialMaxPooling.lua34
-rw-r--r--SpatialSubSampling.lua49
-rw-r--r--SpatialSubtractiveNormalization.lua104
-rw-r--r--SpatialZeroPadding.lua53
-rw-r--r--SplitTable.lua30
-rw-r--r--Sqrt.lua13
-rw-r--r--Square.lua13
-rw-r--r--StochasticGradient.lua57
-rw-r--r--Sum.lua27
-rw-r--r--Tanh.lua9
-rw-r--r--TemporalConvolution.lua51
-rw-r--r--TemporalSubSampling.lua48
-rw-r--r--Threshold.lua20
-rw-r--r--VolumetricConvolution.lua51
-rw-r--r--WeightedEuclidean.lua85
-rw-r--r--dok/abs.pngbin0 -> 5918 bytes
-rw-r--r--dok/exp.pngbin0 -> 6104 bytes
-rw-r--r--dok/hshrink.pngbin0 -> 5576 bytes
-rw-r--r--dok/htanh.pngbin0 -> 5948 bytes
-rw-r--r--dok/index.dok3053
-rw-r--r--dok/lena.jpgbin0 -> 39706 bytes
-rw-r--r--dok/lenap.jpgbin0 -> 34838 bytes
-rw-r--r--dok/logsigmoid.pngbin0 -> 9116 bytes
-rw-r--r--dok/logsoftmax.pngbin0 -> 8712 bytes
-rw-r--r--dok/power.pngbin0 -> 6515 bytes
-rw-r--r--dok/sigmmoid.pngbin0 -> 6533 bytes
-rw-r--r--dok/sigmoid.pngbin0 -> 6533 bytes
-rw-r--r--dok/softmax.pngbin0 -> 6252 bytes
-rw-r--r--dok/softmin.pngbin0 -> 6446 bytes
-rw-r--r--dok/softplus.pngbin0 -> 9375 bytes
-rw-r--r--dok/softsign.pngbin0 -> 6877 bytes
-rw-r--r--dok/sqrt.pngbin0 -> 6008 bytes
-rw-r--r--dok/square.pngbin0 -> 6984 bytes
-rw-r--r--dok/sshrink.pngbin0 -> 5576 bytes
-rw-r--r--dok/tanh.pngbin0 -> 7323 bytes
-rw-r--r--generic/Abs.c43
-rw-r--r--generic/AbsCriterion.c54
-rw-r--r--generic/Exp.c43
-rw-r--r--generic/HardShrink.c50
-rw-r--r--generic/HardTanh.c50
-rw-r--r--generic/LogSigmoid.c49
-rw-r--r--generic/LogSoftMax.c111
-rw-r--r--generic/MSECriterion.c54
-rw-r--r--generic/Max.c100
-rw-r--r--generic/Min.c100
-rw-r--r--generic/MultiLabelMarginCriterion.c185
-rw-r--r--generic/MultiMarginCriterion.c162
-rw-r--r--generic/Sigmoid.c44
-rw-r--r--generic/SoftMax.c114
-rw-r--r--generic/SoftPlus.c44
-rw-r--r--generic/SoftShrink.c50
-rw-r--r--generic/SparseLinear.c130
-rw-r--r--generic/SpatialConvolution.c201
-rw-r--r--generic/SpatialConvolutionMap.c229
-rw-r--r--generic/SpatialMaxPooling.c163
-rw-r--r--generic/SpatialSubSampling.c278
-rw-r--r--generic/Sqrt.c46
-rw-r--r--generic/Square.c45
-rw-r--r--generic/Tanh.c45
-rw-r--r--generic/TemporalConvolution.c194
-rw-r--r--generic/TemporalSubSampling.c139
-rw-r--r--generic/Threshold.c47
-rw-r--r--generic/VolumetricConvolution.c118
-rw-r--r--init.c163
-rw-r--r--init.lua91
-rw-r--r--test/test.lua1029
127 files changed, 10444 insertions, 0 deletions
diff --git a/Abs.lua b/Abs.lua
new file mode 100644
index 0000000..3ccc6af
--- /dev/null
+++ b/Abs.lua
@@ -0,0 +1,15 @@
+local Abs, parent = torch.class('nn.Abs', 'nn.Module')
+
+function Abs:__init()
+ parent.__init(self)
+end
+
+function Abs:updateOutput(input)
+ input.nn.Abs_updateOutput(self, input)
+ return self.output
+end
+
+function Abs:updateGradInput(input, gradOutput)
+ input.nn.Abs_updateGradInput(self, input, gradOutput)
+ return self.gradInput
+end
diff --git a/AbsCriterion.lua b/AbsCriterion.lua
new file mode 100644
index 0000000..be7f6cb
--- /dev/null
+++ b/AbsCriterion.lua
@@ -0,0 +1,14 @@
+local AbsCriterion, parent = torch.class('nn.AbsCriterion', 'nn.Criterion')
+
+function AbsCriterion:__init()
+ parent.__init(self)
+ self.sizeAverage = true
+end
+
+function AbsCriterion:updateOutput(input, target)
+ return input.nn.AbsCriterion_updateOutput(self, input, target)
+end
+
+function AbsCriterion:updateGradInput(input, target)
+ return input.nn.AbsCriterion_updateGradInput(self, input, target)
+end
diff --git a/Add.lua b/Add.lua
new file mode 100644
index 0000000..40da79b
--- /dev/null
+++ b/Add.lua
@@ -0,0 +1,54 @@
+local Add, parent = torch.class('nn.Add', 'nn.Module')
+
+function Add:__init(inputSize,scalar)
+ parent.__init(self)
+
+ local size = inputSize
+ if scalar then size=1 end
+ self.bias = torch.Tensor(size)
+ self.gradBias = torch.Tensor(size)
+
+ -- state
+ self.gradInput:resize(inputSize)
+ self.output:resize(inputSize)
+
+ self:reset()
+end
+
+function Add:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1./math.sqrt(self.bias:size(1))
+ end
+
+ for i=1,self.bias:size(1) do
+ self.bias[i] = torch.uniform(-stdv, stdv)
+ end
+end
+
+function Add:updateOutput(input)
+ self.output:copy(input);
+ if self.gradBias:size(1)==1 then
+ self.output:add(self.bias[1]);
+ else
+ self.output:add(self.bias);
+ end
+ return self.output
+end
+
+function Add:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ self.gradInput:copy(gradOutput)
+ return self.gradInput
+ end
+end
+
+function Add:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ if self.gradBias:size(1) == 1 then
+ self.gradBias[1] = self.gradBias[1] + scale*gradOutput:sumall();
+ else
+ self.gradBias:add(scale, gradOutput)
+ end
+end
diff --git a/CAddTable.lua b/CAddTable.lua
new file mode 100644
index 0000000..afe3568
--- /dev/null
+++ b/CAddTable.lua
@@ -0,0 +1,24 @@
+
+local CAddTable, parent = torch.class('nn.CAddTable', 'nn.Module')
+
+function CAddTable:__init()
+ parent.__init(self)
+ self.gradInput = {}
+end
+
+function CAddTable:updateOutput(input)
+ self.output:resizeAs(input[1]):copy(input[1])
+ for i=2,#input do
+ self.output:add(input[i])
+ end
+ return self.output
+end
+
+function CAddTable:updateGradInput(input, gradOutput)
+ for i=1,#input do
+ self.gradInput[i] = self.gradInput[i] or torch.Tensor()
+ self.gradInput[i]:resizeAs(input[i])
+ self.gradInput[i]:copy(gradOutput)
+ end
+ return self.gradInput
+end
diff --git a/CDivTable.lua b/CDivTable.lua
new file mode 100644
index 0000000..f91d024
--- /dev/null
+++ b/CDivTable.lua
@@ -0,0 +1,21 @@
+
+local CDivTable, parent = torch.class('nn.CDivTable', 'nn.Module')
+
+function CDivTable:__init()
+ parent.__init(self)
+ self.gradInput = {}
+end
+
+function CDivTable:updateOutput(input)
+ self.output:resizeAs(input[1]):copy(input[1])
+ self.output:cdiv(input[2])
+ return self.output
+end
+
+function CDivTable:updateGradInput(input, gradOutput)
+ self.gradInput[1] = self.gradInput[1] or torch.Tensor()
+ self.gradInput[2] = self.gradInput[2] or torch.Tensor()
+ self.gradInput[1]:resizeAs(input[1]):copy(gradOutput):cdiv(input[2])
+ self.gradInput[2]:resizeAs(input[2]):zero():addcdiv(-1,self.gradInput[1],input[2]):cmul(input[1])
+ return self.gradInput
+end
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..75239ad
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,9 @@
+SET(src init.c)
+
+FILE(GLOB luasrc *.lua)
+SET(luasrc ${luasrc} test/test.lua)
+
+ADD_TORCH_PACKAGE(nn "${src}" "${luasrc}" "Machine Learning")
+ADD_TORCH_DOK(dok nn "Machine Learning" "Neural Networks" 3.1)
+
+TARGET_LINK_LIBRARIES(nn luaT TH)
diff --git a/CMul.lua b/CMul.lua
new file mode 100644
index 0000000..9b59944
--- /dev/null
+++ b/CMul.lua
@@ -0,0 +1,36 @@
+local CMul, parent = torch.class('nn.CMul', 'nn.Module')
+
+function CMul:__init(inputSize)
+ parent.__init(self)
+
+ self.weight = torch.Tensor(inputSize)
+ self.gradWeight = torch.Tensor(inputSize)
+
+ -- state
+ self.gradInput:resize(inputSize)
+ self.output:resize(inputSize)
+
+ self:reset()
+end
+
+function CMul:reset()
+ self.weight:fill(1)
+end
+
+function CMul:updateOutput(input)
+ self.output:copy(input);
+ self.output:cmul(self.weight);
+ return self.output
+end
+
+function CMul:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ self.gradInput:zero()
+ self.gradInput:addcmul(1, self.weight, gradOutput)
+ return self.gradInput
+ end
+end
+
+function CMul:accGradParameters(input, gradOutput, scale)
+ self.gradWeight:addcmul(scale or 1, input, gradOutput)
+end
diff --git a/CMulTable.lua b/CMulTable.lua
new file mode 100644
index 0000000..4c058b6
--- /dev/null
+++ b/CMulTable.lua
@@ -0,0 +1,26 @@
+
+local CMulTable, parent = torch.class('nn.CMulTable', 'nn.Module')
+
+function CMulTable:__init()
+ parent.__init(self)
+ self.gradInput = {}
+end
+
+function CMulTable:updateOutput(input)
+ self.output:resizeAs(input[1]):copy(input[1])
+ for i=2,#input do
+ self.output:cmul(input[i])
+ end
+ return self.output
+end
+
+function CMulTable:updateGradInput(input, gradOutput)
+ local tout = torch.Tensor():resizeAs(self.output)
+ for i=1,#input do
+ self.gradInput[i] = self.gradInput[i] or torch.Tensor()
+ self.gradInput[i]:resizeAs(input[i]):copy(gradOutput)
+ tout:copy(self.output):cdiv(input[i])
+ self.gradInput[i]:cmul(tout)
+ end
+ return self.gradInput
+end
diff --git a/CSubTable.lua b/CSubTable.lua
new file mode 100644
index 0000000..ffc495b
--- /dev/null
+++ b/CSubTable.lua
@@ -0,0 +1,21 @@
+
+local CSubTable, parent = torch.class('nn.CSubTable', 'nn.Module')
+
+function CSubTable:__init()
+ parent.__init(self)
+ self.gradInput = {}
+end
+
+function CSubTable:updateOutput(input)
+ self.output:resizeAs(input[1]):copy(input[1])
+ self.output:add(-1,input[2])
+ return self.output
+end
+
+function CSubTable:updateGradInput(input, gradOutput)
+ self.gradInput[1] = self.gradInput[1] or torch.Tensor()
+ self.gradInput[2] = self.gradInput[2] or torch.Tensor()
+ self.gradInput[1]:resizeAs(input[1]):copy(gradOutput)
+ self.gradInput[2]:resizeAs(input[1]):copy(gradOutput):mul(-1)
+ return self.gradInput
+end
diff --git a/ClassNLLCriterion.lua b/ClassNLLCriterion.lua
new file mode 100644
index 0000000..7ac48f4
--- /dev/null
+++ b/ClassNLLCriterion.lua
@@ -0,0 +1,44 @@
+local ClassNLLCriterion, parent = torch.class('nn.ClassNLLCriterion', 'nn.Criterion')
+
+function ClassNLLCriterion:__init()
+ parent.__init(self)
+ self.sizeAverage = true
+end
+
+function ClassNLLCriterion:updateOutput(input, target)
+ if input:dim() == 1 then
+ self.output = -input[target]
+ elseif input:dim() == 2 then
+ local output = 0
+ for i=1,target:size(1) do
+ output = output - input[i][target[i]]
+ end
+ if self.sizeAverage then
+ output = output / target:size(1)
+ end
+ self.output = output
+ else
+ error('matrix or vector expected')
+ end
+ return self.output
+end
+
+function ClassNLLCriterion:updateGradInput(input, target)
+ self.gradInput:resizeAs(input)
+ self.gradInput:zero()
+
+ if input:dim() == 1 then
+ self.gradInput[target] = -1
+ else
+ local z = -1
+ if self.sizeAverage then
+ z = z / target:size(1)
+ end
+ local gradInput = self.gradInput
+ for i=1,target:size(1) do
+ gradInput[i][target[i]] = z
+ end
+ end
+
+ return self.gradInput
+end
diff --git a/Concat.lua b/Concat.lua
new file mode 100644
index 0000000..616c394
--- /dev/null
+++ b/Concat.lua
@@ -0,0 +1,119 @@
+local Concat, parent = torch.class('nn.Concat', 'nn.Module')
+
+function Concat:__init(dimension)
+ parent.__init(self)
+ self.modules = {}
+ self.size = torch.LongStorage()
+ self.dimension = dimension
+end
+
+function Concat:add(module)
+ table.insert(self.modules, module)
+ return self
+end
+
+function Concat:get(index)
+ return self.modules[index]
+end
+
+function Concat:updateOutput(input)
+ for i=1,#self.modules do
+ local currentOutput = self.modules[i]:updateOutput(input)
+
+ if i == 1 then
+ self.size:resize(currentOutput:dim()):copy(currentOutput:size())
+ else
+ self.size[self.dimension] = self.size[self.dimension] + currentOutput:size(self.dimension)
+ end
+ end
+ self.output:resize(self.size)
+
+ local offset = 1
+ for _,module in ipairs(self.modules) do
+ local currentOutput = module:updateOutput(input)
+ self.output:narrow(self.dimension, offset, currentOutput:size(self.dimension)):copy(currentOutput)
+ offset = offset + currentOutput:size(self.dimension)
+ end
+ return self.output
+end
+
+function Concat:updateGradInput(input, gradOutput)
+ self.gradInput:resizeAs(input)
+
+ local offset = 1
+ for i,module in ipairs(self.modules) do
+ local currentOutput = module.output
+ local currentGradInput = module:updateGradInput(input, gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)))
+
+ if i==1 then
+ self.gradInput:copy(currentGradInput)
+ else
+ self.gradInput:add(currentGradInput)
+ end
+ offset = offset + currentOutput:size(self.dimension)
+ end
+ return self.gradInput
+end
+
+function Concat:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ local offset = 1
+ for i,module in ipairs(self.modules) do
+ local currentOutput = module.output
+ local currentGradInput = module:accGradParameters(input,
+ gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)),
+ scale)
+ offset = offset + currentOutput:size(self.dimension)
+ end
+end
+
+function Concat:accUpdateGradParameters(input, gradOutput, lr)
+ local offset = 1
+ for i,module in ipairs(self.modules) do
+ local currentOutput = module.output
+ local currentGradInput = module:accUpdateGradParameters(input,
+ gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)),
+ lr)
+ offset = offset + currentOutput:size(self.dimension)
+ end
+end
+
+function Concat:zeroGradParameters()
+ for _,module in ipairs(self.modules) do
+ module:zeroGradParameters()
+ end
+end
+
+function Concat:updateParameters(learningRate)
+ for _,module in ipairs(self.modules) do
+ module:updateParameters(learningRate)
+ end
+end
+
+function Concat:share(mlp,...)
+ for i=1,#self.modules do
+ self.modules[i]:share(mlp.modules[i],...);
+ end
+end
+
+function Concat:parameters()
+ local function tinsert(to, from)
+ if type(from) == 'table' then
+ for i=1,#from do
+ tinsert(to,from[i])
+ end
+ else
+ table.insert(to,from)
+ end
+ end
+ local w = {}
+ local gw = {}
+ for i=1,#self.modules do
+ local mw,mgw = self.modules[i]:parameters()
+ if mw then
+ tinsert(w,mw)
+ tinsert(gw,mgw)
+ end
+ end
+ return w,gw
+end
diff --git a/ConcatTable.lua b/ConcatTable.lua
new file mode 100644
index 0000000..730d95e
--- /dev/null
+++ b/ConcatTable.lua
@@ -0,0 +1,72 @@
+local ConcatTable, parent = torch.class('nn.ConcatTable', 'nn.Module')
+
+function ConcatTable:__init()
+ parent.__init(self)
+ self.modules = {}
+ self.output = {}
+end
+
+function ConcatTable:add(module)
+ table.insert(self.modules, module)
+ return self
+end
+
+function ConcatTable:get(index)
+ return self.modules[index]
+end
+
+function ConcatTable:size()
+ return #self.modules
+end
+
+function ConcatTable:updateOutput(input)
+ for i=1,#self.modules do
+ self.output[i] = self.modules[i]:updateOutput(input)
+ end
+ return self.output
+end
+
+function ConcatTable:updateGradInput(input, gradOutput)
+ for i,module in ipairs(self.modules) do
+ local currentGradInput = module:updateGradInput(input, gradOutput[i])
+ if i == 1 then
+ self.gradInput:resizeAs(currentGradInput):copy(currentGradInput)
+ else
+ self.gradInput:add(currentGradInput)
+ end
+ end
+ return self.gradInput
+end
+
+function ConcatTable:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ for i,module in ipairs(self.modules) do
+ module:accGradParameters(input, gradOutput[i], scale)
+ end
+end
+
+function ConcatTable:accUpdateGradParameters(input, gradOutput, lr)
+ for i,module in ipairs(self.modules) do
+ module:accUpdateGradParameters(input, gradOutput[i], lr)
+ end
+end
+
+function ConcatTable:zeroGradParameters()
+ for _,module in ipairs(self.modules) do
+ module:zeroGradParameters()
+ end
+end
+
+function ConcatTable:updateParameters(learningRate)
+ for _,module in ipairs(self.modules) do
+ module:updateParameters(learningRate)
+ end
+end
+
+function ConcatTable:share(mlp,...)
+ for i=1,#self.modules do
+ self.modules[i]:share(mlp.modules[i],...);
+ end
+end
+
+
diff --git a/Copy.lua b/Copy.lua
new file mode 100644
index 0000000..7b6eeb3
--- /dev/null
+++ b/Copy.lua
@@ -0,0 +1,33 @@
+local Copy, parent = torch.class('nn.Copy', 'nn.Module')
+
+function Copy:__init(intype, outtype)
+ intype = intype or torch.getmetatable(torch.Tensor.__typename)
+ outtype = outtype or torch.getmetatable(torch.Tensor.__typename)
+
+ parent.__init(self)
+ self.gradInput = torch.getmetatable(intype).new()
+ self.output = torch.getmetatable(outtype).new()
+
+ if intype == outtype then
+
+ self.updateOutput = function(self, input)
+ self.output = input
+ return input
+ end
+
+ self.updateGradInput = function(self, input, gradOutput)
+ self.gradInput = gradOutput
+ return gradOutput
+ end
+ end
+end
+
+function Copy:updateOutput(input)
+ self.output:resize(input:size()):copy(input)
+ return self.output
+end
+
+function Copy:updateGradInput(input, gradOutput)
+ self.gradInput:resize(gradOutput:size()):copy(gradOutput)
+ return self.gradInput
+end
diff --git a/CosineDistance.lua b/CosineDistance.lua
new file mode 100644
index 0000000..061ff92
--- /dev/null
+++ b/CosineDistance.lua
@@ -0,0 +1,40 @@
+local CosineDistance, parent = torch.class('nn.CosineDistance', 'nn.Module')
+
+function CosineDistance:__init()
+ parent.__init(self)
+ self.gradInput = {torch.Tensor(), torch.Tensor()}
+ self.output=torch.Tensor(1)
+end
+
+function CosineDistance:updateOutput(input)
+ local input1, input2 = input[1], input[2]
+ self.w1 = input1:dot(input2)
+ self.w22 = input1:dot(input1)
+ self.w2 = math.sqrt(self.w22)
+ self.w32 = input2:dot(input2)
+ self.w3 = math.sqrt(self.w32)
+ self.output[1] = self.w1/self.w2/self.w3
+ return self.output
+end
+
+function CosineDistance:updateGradInput(input, gradOutput)
+ local v1 = input[1]
+ local v2 = input[2]
+ local gw1 = input[1].new()
+ local gw2 = input[2].new()
+ gw1:resizeAs(v1)
+ gw2:resizeAs(v1)
+
+ gw1:zero()
+ gw1:add(1/(self.w2*self.w3), v2)
+ gw1:add(-self.w1/(self.w22*self.w2*self.w3), v1)
+
+ gw2:zero()
+ gw2:add(1/(self.w2*self.w3), v1)
+ gw2:add(-self.w1/(self.w32*self.w2*self.w3), v2)
+
+ gw1:mul(gradOutput[1])
+ gw2:mul(gradOutput[1])
+ self.gradInput = {gw1, gw2}
+ return self.gradInput
+end
diff --git a/CosineEmbeddingCriterion.lua b/CosineEmbeddingCriterion.lua
new file mode 100644
index 0000000..a9ee2e0
--- /dev/null
+++ b/CosineEmbeddingCriterion.lua
@@ -0,0 +1,54 @@
+local CosineEmbeddingCriterion, parent = torch.class('nn.CosineEmbeddingCriterion', 'nn.Module')
+
+function CosineEmbeddingCriterion:__init(margin)
+ parent.__init(self)
+ margin=margin or 0
+ self.margin = margin
+ self.gradInput = {torch.Tensor(), torch.Tensor()}
+end
+
+function CosineEmbeddingCriterion:updateOutput(input,y)
+ local input1, input2 = input[1], input[2]
+ self.w1 = input1:dot(input2)
+ self.w22 = input1:dot(input1)
+ self.w2 = math.sqrt(self.w22)
+ self.w32 = input2:dot(input2)
+ self.w3 = math.sqrt(self.w32)
+ self.output = self.w1/self.w2/self.w3
+ if y==-1 then
+ self.output = math.max(0, self.output - self.margin);
+ else
+ self.output = 1 - self.output
+ end
+ return self.output
+end
+
+local function mathsign(t)
+ if t>0 then return 1; end
+ if t<0 then return -1; end
+ return 2*torch.random(2)-3;
+end
+
+function CosineEmbeddingCriterion:updateGradInput(input, y)
+ local v1 = input[1]
+ local v2 = input[2]
+ local gw1 = input[1].new()
+ local gw2 = input[2].new()
+ gw1:resizeAs(v1)
+ gw2:resizeAs(v1)
+
+ gw1:zero()
+ gw1:add(1/(self.w2*self.w3), v2)
+ gw1:add(-self.w1/(self.w22*self.w2*self.w3), v1)
+
+ gw2:zero()
+ gw2:add(1/(self.w2*self.w3), v1)
+ gw2:add(-self.w1/(self.w32*self.w2*self.w3), v2)
+
+ if y == 1 then
+ gw1 = -gw1
+ gw2 = -gw2
+ end
+ self.gradInput = {gw1, gw2}
+ return self.gradInput
+end
diff --git a/Criterion.lua b/Criterion.lua
new file mode 100644
index 0000000..6513414
--- /dev/null
+++ b/Criterion.lua
@@ -0,0 +1,51 @@
+local Criterion = torch.class('nn.Criterion')
+
+function Criterion:__init()
+ self.gradInput = torch.Tensor()
+ self.output = 0
+end
+
+function Criterion:updateOutput(input, target)
+end
+
+function Criterion:forward(input, target)
+ return self:updateOutput(input, target)
+end
+
+function Criterion:backward(input, target)
+ return self:updateGradInput(input, target)
+end
+
+function Criterion:updateGradInput(input, target)
+end
+
+function Criterion:clone()
+ local f = torch.MemoryFile("rw"):binary()
+ f:writeObject(self)
+ f:seek(1)
+ local clone = f:readObject()
+ f:close()
+ return clone
+end
+
+function Criterion:type(type)
+ -- find all tensors and convert them
+ for key,param in pairs(self) do
+ if torch.typename(param) and torch.typename(param):find('torch%..+Tensor') then
+ self[key] = param:type(type)
+ end
+ end
+ return self
+end
+
+function Criterion:float()
+ return self:type('torch.FloatTensor')
+end
+
+function Criterion:double()
+ return self:type('torch.DoubleTensor')
+end
+
+function Criterion:cuda()
+ return self:type('torch.CudaTensor')
+end
diff --git a/CriterionTable.lua b/CriterionTable.lua
new file mode 100644
index 0000000..e5538f7
--- /dev/null
+++ b/CriterionTable.lua
@@ -0,0 +1,16 @@
+local CriterionTable, parent = torch.class('nn.CriterionTable', 'nn.Module')
+
+function CriterionTable:__init(criterion)
+ self.criterion = criterion
+ self.gradInput = {criterion.gradInput}
+end
+
+function CriterionTable:updateOutput(input)
+ self.output = self.criterion:updateOutput(unpack(input))
+ return self.output
+end
+
+function CriterionTable:updateGradInput(input, gradOutput)
+ self.criterion:updateGradInput(unpack(input))
+ return self.gradInput
+end
diff --git a/DotProduct.lua b/DotProduct.lua
new file mode 100644
index 0000000..d16d295
--- /dev/null
+++ b/DotProduct.lua
@@ -0,0 +1,29 @@
+local DotProduct, parent = torch.class('nn.DotProduct', 'nn.Module')
+
+function DotProduct:__init()
+ parent.__init(self)
+ self.gradInput = {torch.Tensor(), torch.Tensor()}
+ self.output=torch.Tensor(1)
+end
+
+function DotProduct:updateOutput(input,y)
+ self.output[1] = input[1]:dot(input[2])
+ return self.output
+end
+
+function DotProduct:updateGradInput(input, gradOutput)
+ local v1 = input[1]
+ local v2 = input[2]
+ local gw1=self.gradInput[1];
+ local gw2=self.gradInput[2];
+ gw1:resizeAs(v1)
+ gw2:resizeAs(v1)
+
+ gw1:copy( v2)
+ gw1:mul(gradOutput[1])
+
+ gw2:copy( v1)
+ gw2:mul(gradOutput[1])
+
+ return self.gradInput
+end
diff --git a/Euclidean.lua b/Euclidean.lua
new file mode 100644
index 0000000..808b7ab
--- /dev/null
+++ b/Euclidean.lua
@@ -0,0 +1,64 @@
+local Euclidean, parent = torch.class('nn.Euclidean', 'nn.Module')
+
+function Euclidean:__init(inputSize,outputSize)
+ parent.__init(self)
+
+ self.weight = torch.Tensor(inputSize,outputSize)
+ self.gradWeight = torch.Tensor(inputSize,outputSize)
+
+ -- state
+ self.gradInput:resize(inputSize)
+ self.output:resize(outputSize)
+ self.temp = torch.Tensor(inputSize)
+
+ self:reset()
+end
+
+function Euclidean:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1./math.sqrt(self.weight:size(1))
+ end
+
+ for i=1,self.weight:size(2) do
+ self.weight:select(2, i):apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ end
+end
+
+function Euclidean:updateOutput(input)
+ self.output:zero()
+ for o = 1,self.weight:size(2) do
+ self.output[o] = input:dist(self.weight:select(2,o))
+ end
+ return self.output
+end
+
+function Euclidean:updateGradInput(input, gradOutput)
+ self:updateOutput(input)
+ if self.gradInput then
+ self.gradInput:zero()
+ for o = 1,self.weight:size(2) do
+ if self.output[o] ~= 0 then
+ self.temp:copy(input):add(-1,self.weight:select(2,o))
+ self.temp:mul(gradOutput[o]/self.output[o])
+ self.gradInput:add(self.temp)
+ end
+ end
+ return self.gradInput
+ end
+end
+
+function Euclidean:accGradParameters(input, gradOutput, scale)
+ self:updateOutput(input)
+ scale = scale or 1
+ for o = 1,self.weight:size(2) do
+ if self.output[o] ~= 0 then
+ self.temp:copy(self.weight:select(2,o)):add(-1,input)
+ self.temp:mul(gradOutput[o]/self.output[o])
+ self.gradWeight:select(2,o):add(self.temp)
+ end
+ end
+end
diff --git a/Exp.lua b/Exp.lua
new file mode 100644
index 0000000..c4df86c
--- /dev/null
+++ b/Exp.lua
@@ -0,0 +1,9 @@
+local Exp = torch.class('nn.Exp', 'nn.Module')
+
+function Exp:updateOutput(input)
+ return input.nn.Exp_updateOutput(self, input)
+end
+
+function Exp:updateGradInput(input, gradOutput)
+ return input.nn.Exp_updateGradInput(self, input, gradOutput)
+end
diff --git a/HardShrink.lua b/HardShrink.lua
new file mode 100644
index 0000000..7dfeaca
--- /dev/null
+++ b/HardShrink.lua
@@ -0,0 +1,16 @@
+local HardShrink, parent = torch.class('nn.HardShrink', 'nn.Module')
+
+function HardShrink:__init(lam)
+ parent.__init(self)
+ self.lambda = lam or 0.5
+end
+
+function HardShrink:updateOutput(input)
+ input.nn.HardShrink_updateOutput(self, input)
+ return self.output
+end
+
+function HardShrink:updateGradInput(input, gradOutput)
+ input.nn.HardShrink_updateGradInput(self, input, gradOutput)
+ return self.gradInput
+end
diff --git a/HardTanh.lua b/HardTanh.lua
new file mode 100644
index 0000000..3391479
--- /dev/null
+++ b/HardTanh.lua
@@ -0,0 +1,9 @@
+local HardTanh = torch.class('nn.HardTanh', 'nn.Module')
+
+function HardTanh:updateOutput(input)
+ return input.nn.HardTanh_updateOutput(self, input)
+end
+
+function HardTanh:updateGradInput(input, gradOutput)
+ return input.nn.HardTanh_updateGradInput(self, input, gradOutput)
+end
diff --git a/HingeEmbeddingCriterion.lua b/HingeEmbeddingCriterion.lua
new file mode 100644
index 0000000..e88ef82
--- /dev/null
+++ b/HingeEmbeddingCriterion.lua
@@ -0,0 +1,26 @@
+local HingeEmbeddingCriterion, parent =
+ torch.class('nn.HingeEmbeddingCriterion', 'nn.Module')
+
+function HingeEmbeddingCriterion:__init(margin)
+ parent.__init(self)
+ margin=margin or 1
+ self.margin = margin
+ self.gradInput = torch.Tensor(1)
+end
+
+function HingeEmbeddingCriterion:updateOutput(input,y)
+ self.output=input[1]
+ if y==-1 then
+ self.output = math.max(0,self.margin - self.output);
+ end
+ return self.output
+end
+
+function HingeEmbeddingCriterion:updateGradInput(input, y)
+ self.gradInput[1]=y
+ local dist = input[1]
+ if y == -1 and dist > self.margin then
+ self.gradInput[1]=0;
+ end
+ return self.gradInput
+end
diff --git a/Identity.lua b/Identity.lua
new file mode 100644
index 0000000..79b5c08
--- /dev/null
+++ b/Identity.lua
@@ -0,0 +1,12 @@
+local Identity, parent = torch.class('nn.Identity', 'nn.Module')
+
+function Identity:updateOutput(input)
+ self.output = input
+ return self.output
+end
+
+
+function Identity:updateGradInput(input, gradOutput)
+ self.gradInput = gradOutput
+ return self.gradInput
+end
diff --git a/Jacobian.lua b/Jacobian.lua
new file mode 100644
index 0000000..04330ac
--- /dev/null
+++ b/Jacobian.lua
@@ -0,0 +1,239 @@
+nn.Jacobian = {}
+
+function nn.Jacobian.backward (module, input, param, dparam)
+ local doparam = 0
+ if param then
+ doparam = 1
+ end
+ param = param or input
+ -- output deriv
+ module:forward(input)
+ local dout = module.output.new():resizeAs(module.output)
+ -- 1D view
+ local sdout = module.output.new(dout:storage(),1,dout:nElement())
+ -- jacobian matrix to calculate
+ local jacobian = torch.Tensor(param:nElement(),dout:nElement()):zero()
+
+ for i=1,sdout:nElement() do
+ dout:zero()
+ sdout[i] = 1
+ module:zeroGradParameters()
+ local din = module:updateGradInput(input, dout)
+ module:accGradParameters(input, dout)
+ if doparam == 1 then
+ jacobian:select(2,i):copy(dparam)
+ else
+ jacobian:select(2,i):copy(din)
+ end
+ end
+ return jacobian
+end
+
+function nn.Jacobian.backwardUpdate (module, input, param)
+
+ -- output deriv
+ module:forward(input)
+ local dout = module.output.new():resizeAs(module.output)
+ -- 1D view
+ local sdout = module.output.new(dout:storage(),1,dout:nElement())
+ -- jacobian matrix to calculate
+ local jacobian = torch.Tensor(param:nElement(),dout:nElement()):zero()
+
+ -- original param
+ local origparam = param:clone()
+
+ for i=1,sdout:nElement() do
+ param:copy(origparam)
+ dout:zero()
+ sdout[i] = 1
+ local din = module:updateGradInput(input, dout)
+ module:accUpdateGradParameters(input, dout, 1)
+ jacobian:select(2,i):copy(param)
+ end
+
+ param:copy(origparam)
+
+ return jacobian
+end
+
+function nn.Jacobian.forward(module, input, param)
+ param = param or input
+ -- perturbation amount
+ local small = 1e-6
+ -- 1D view of input
+ local tst = param:storage()
+ local sin = param.new(tst,1,tst:size())
+ -- jacobian matrix to calculate
+ local jacobian = torch.Tensor():resize(param:nElement(),module:forward(input):nElement())
+
+ local outa = torch.Tensor(jacobian:size(2))
+ local outb = torch.Tensor(jacobian:size(2))
+
+ for i=1,sin:nElement() do
+ sin[i] = sin[i] - small
+ outa:copy(module:forward(input))
+ sin[i] = sin[i] + 2*small
+ outb:copy(module:forward(input))
+ sin[i] = sin[i] - small
+
+ outb:add(-1,outa):div(2*small)
+ jacobian:select(1,i):copy(outb)
+ end
+
+ return jacobian
+end
+
+function nn.Jacobian.forwardUpdate(module, input, param)
+ -- perturbation amount
+ local small = 1e-6
+ -- 1D view of input
+ local tst = param:storage()
+ local sin = param.new(tst,1,tst:size())
+ -- jacobian matrix to calculate
+ local jacobian = torch.Tensor():resize(param:nElement(),module:forward(input):nElement())
+
+ local outa = torch.Tensor(jacobian:size(2))
+ local outb = torch.Tensor(jacobian:size(2))
+
+ for i=1,sin:nElement() do
+ sin[i] = sin[i] - small
+ outa:copy(module:forward(input))
+ sin[i] = sin[i] + 2*small
+ outb:copy(module:forward(input))
+ sin[i] = sin[i] - small
+
+ outb:add(-1,outa):div(2*small)
+ jacobian:select(1,i):copy(outb)
+ jacobian:select(1,i):mul(-1)
+ jacobian:select(1,i):add(sin[i])
+ end
+ return jacobian
+end
+
+function nn.Jacobian.testJacobian (module, input, minval, maxval)
+ minval = minval or -2
+ maxval = maxval or 2
+ local inrange = maxval - minval
+ input:copy(torch.rand(input:nElement()):mul(inrange):add(minval))
+ local jac_fprop = nn.Jacobian.forward(module,input)
+ local jac_bprop = nn.Jacobian.backward(module,input)
+ local error = jac_fprop-jac_bprop
+ return error:abs():maxall()
+end
+
+function nn.Jacobian.testJacobianParameters (module, input, param, dparam, minval, maxval)
+ minval = minval or -2
+ maxval = maxval or 2
+ local inrange = maxval - minval
+ input:copy(torch.rand(input:nElement()):mul(inrange):add(minval))
+ param:copy(torch.rand(param:nElement()):mul(inrange):add(minval))
+ local jac_bprop = nn.Jacobian.backward(module, input, param, dparam)
+ local jac_fprop = nn.Jacobian.forward(module, input, param)
+ local error = jac_fprop - jac_bprop
+ return error:abs():maxall()
+end
+
+function nn.Jacobian.testJacobianUpdateParameters (module, input, param, minval, maxval)
+ minval = minval or -2
+ maxval = maxval or 2
+ local inrange = maxval - minval
+ input:copy(torch.rand(input:nElement()):mul(inrange):add(minval))
+ param:copy(torch.rand(param:nElement()):mul(inrange):add(minval))
+ local params_bprop = nn.Jacobian.backwardUpdate(module, input, param)
+ local params_fprop = nn.Jacobian.forwardUpdate(module, input, param)
+
+ local error = params_fprop - params_bprop
+ return error:abs():maxall()
+end
+
+function nn.Jacobian.testIO(module,input, minval, maxval)
+ minval = minval or -2
+ maxval = maxval or 2
+ local inrange = maxval - minval
+
+ -- run module
+ module:forward(input)
+ local go = module.output:clone():copy(torch.rand(module.output:nElement()):mul(inrange):add(minval))
+ module:updateGradInput(input,go)
+ module:accGradParameters(input,go)
+
+ local fo = module.output:clone()
+ local bo = module.gradInput:clone()
+
+ -- write module
+ local f = torch.DiskFile('tmp.bin','w'):binary()
+ f:writeObject(module)
+ f:close()
+ -- read module
+ local m = torch.DiskFile('tmp.bin'):binary():readObject()
+ m:forward(input)
+ m:updateGradInput(input,go)
+ m:accGradParameters(input,go)
+ -- cleanup
+ os.remove('tmp.bin')
+
+ local fo2 = m.output:clone()
+ local bo2 = m.gradInput:clone()
+
+ local errf = fo - fo2
+ local errb = bo - bo2
+ return errf:abs():maxall(), errb:abs():maxall()
+end
+
+function nn.Jacobian.testAllUpdate(module, input, weight, gradWeight)
+ local gradOutput
+ local lr = torch.uniform(0.1, 1)
+ local errors = {}
+
+ -- accGradParameters
+ local maccgp = module:clone()
+ local weightc = maccgp[weight]:clone()
+ maccgp:forward(input)
+ gradOutput = torch.rand(maccgp.output:size())
+ maccgp:zeroGradParameters()
+ maccgp:updateGradInput(input, gradOutput)
+ maccgp:accGradParameters(input, gradOutput)
+ maccgp:updateParameters(lr)
+ errors["accGradParameters"] = (weightc-maccgp[gradWeight]*lr-maccgp[weight]):norm()
+
+ -- accUpdateGradParameters
+ local maccugp = module:clone()
+ maccugp:forward(input)
+ maccugp:updateGradInput(input, gradOutput)
+ maccugp:accUpdateGradParameters(input, gradOutput, lr)
+ errors["accUpdateGradParameters"] = (maccugp[weight]-maccgp[weight]):norm()
+
+ -- shared, accGradParameters
+ local macsh1 = module:clone()
+ local macsh2 = module:clone()
+ macsh2:share(macsh1, weight)
+ macsh1:forward(input)
+ macsh2:forward(input)
+ macsh1:zeroGradParameters()
+ macsh2:zeroGradParameters()
+ macsh1:updateGradInput(input, gradOutput)
+ macsh2:updateGradInput(input, gradOutput)
+ macsh1:accGradParameters(input, gradOutput)
+ macsh2:accGradParameters(input, gradOutput)
+ macsh1:updateParameters(lr)
+ macsh2:updateParameters(lr)
+ local err = (weightc-maccgp[gradWeight]*(lr*2)-macsh1[weight]):norm()
+ err = err + (weightc-maccgp[gradWeight]*(lr*2)-macsh2[weight]):norm()
+ errors["accGradParameters [shared]"] = err
+
+ -- shared, accUpdateGradParameters
+ local macshu1 = module:clone()
+ local macshu2 = module:clone()
+ macshu2:share(macshu1, weight)
+ macshu1:forward(input)
+ macshu2:forward(input)
+ macshu1:updateGradInput(input, gradOutput)
+ macshu2:updateGradInput(input, gradOutput)
+ macshu1:accUpdateGradParameters(input, gradOutput, lr)
+ macshu2:accUpdateGradParameters(input, gradOutput, lr)
+ local err = (weightc-maccgp[gradWeight]*(lr*2)-macshu1[weight]):norm()
+ err = err + (weightc-maccgp[gradWeight]*(lr*2)-macshu2[weight]):norm()
+ errors["accUpdateGradParameters [shared]"] = err
+
+ return errors
+end
diff --git a/JoinTable.lua b/JoinTable.lua
new file mode 100644
index 0000000..dc20246
--- /dev/null
+++ b/JoinTable.lua
@@ -0,0 +1,50 @@
+local JoinTable, parent = torch.class('nn.JoinTable', 'nn.Module')
+
+function JoinTable:__init(dimension)
+ parent.__init(self)
+ self.size = torch.LongStorage()
+ self.dimension = dimension
+ self.gradInput = {}
+end
+
+function JoinTable:updateOutput(input)
+ for i=1,#input do
+ local currentOutput = input[i]
+ if i == 1 then
+ self.size:resize(currentOutput:dim()):copy(currentOutput:size())
+ else
+ self.size[self.dimension] = self.size[self.dimension]
+ + currentOutput:size(self.dimension)
+ end
+ end
+ self.output:resize(self.size)
+
+ local offset = 1
+ for i=1,#input do
+ local currentOutput = input[i]
+ self.output:narrow(self.dimension, offset,
+ currentOutput:size(self.dimension)):copy(currentOutput)
+ offset = offset + currentOutput:size(self.dimension)
+ end
+ return self.output
+
+end
+
+function JoinTable:updateGradInput(input, gradOutput)
+ for i=1,#input do
+ if self.gradInput[i] == nil then
+ self.gradInput[i] = input[i].new()
+ end
+ self.gradInput[i]:resizeAs(input[i])
+ end
+
+ local offset = 1
+ for i=1,#input do
+ local currentOutput = input[i]
+ local currentGradInput = gradOutput:narrow(self.dimension, offset,
+ currentOutput:size(self.dimension))
+ self.gradInput[i]:copy(currentGradInput)
+ offset = offset + currentOutput:size(self.dimension)
+ end
+ return self.gradInput
+end
diff --git a/L1HingeEmbeddingCriterion.lua b/L1HingeEmbeddingCriterion.lua
new file mode 100644
index 0000000..5aa1ae7
--- /dev/null
+++ b/L1HingeEmbeddingCriterion.lua
@@ -0,0 +1,41 @@
+local L1HingeEmbeddingCriterion, parent = torch.class('nn.L1HingeEmbeddingCriterion', 'nn.Module')
+
+function L1HingeEmbeddingCriterion:__init(margin)
+ parent.__init(self)
+ margin=margin or 1
+ self.margin = margin
+ self.gradInput = {torch.Tensor(), torch.Tensor()}
+end
+
+function L1HingeEmbeddingCriterion:updateOutput(input,y)
+ self.output=input[1]:dist(input[2],1);
+ if y==-1 then
+ self.output = math.max(0,self.margin - self.output);
+ end
+ return self.output
+end
+
+
+local function mathsign(t)
+ if t>0 then return 1; end
+ if t<0 then return -1; end
+ return 2*torch.random(2)-3;
+end
+
+function L1HingeEmbeddingCriterion:updateGradInput(input, y)
+ self.gradInput[1]:resizeAs(input[1])
+ self.gradInput[2]:resizeAs(input[2])
+ self.gradInput[1]:copy(input[1])
+ self.gradInput[1]:add(-1, input[2])
+ local dist = self.gradInput[1]:norm(1);
+ self.gradInput[1]:apply(mathsign) -- L1 gradient
+ if y == -1 then -- just to avoid a mul by 1
+ if dist > self.margin then
+ self.gradInput[1]:zero()
+ else
+ self.gradInput[1]:mul(-1)
+ end
+ end
+ self.gradInput[2]:zero():add(-1, self.gradInput[1])
+ return self.gradInput
+end
diff --git a/Linear.lua b/Linear.lua
new file mode 100644
index 0000000..953af78
--- /dev/null
+++ b/Linear.lua
@@ -0,0 +1,82 @@
+local Linear, parent = torch.class('nn.Linear', 'nn.Module')
+
+function Linear:__init(inputSize, outputSize)
+ parent.__init(self)
+
+ self.weight = torch.Tensor(outputSize, inputSize)
+ self.bias = torch.Tensor(outputSize)
+ self.gradWeight = torch.Tensor(outputSize, inputSize)
+ self.gradBias = torch.Tensor(outputSize)
+
+ self:reset()
+end
+
+function Linear:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1./math.sqrt(self.weight:size(2))
+ end
+
+ -- we do this so the initialization is exactly
+ -- the same than in previous torch versions
+ for i=1,self.weight:size(1) do
+ self.weight:select(1, i):apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias[i] = torch.uniform(-stdv, stdv)
+ end
+end
+
+function Linear:updateOutput(input)
+ if input:dim() == 1 then
+ self.output:resize(self.bias:size(1))
+ self.output:copy(self.bias)
+ self.output:addmv(1, self.weight, input)
+ elseif input:dim() == 2 then
+ local nframe = input:size(1)
+ local nunit = self.bias:size(1)
+
+ self.output:resize(nframe, nunit)
+ self.output:zero():addr(1, input.new(nframe):fill(1), self.bias)
+ self.output:addmm(1, input, self.weight:t())
+ else
+ error('input must be vector or matrix')
+ end
+
+ return self.output
+end
+
+function Linear:updateGradInput(input, gradOutput)
+ if self.gradInput then
+
+ if input:dim() == 1 then
+ self.gradInput:resizeAs(input)
+ self.gradInput:addmv(0, 1, self.weight:t(), gradOutput)
+ elseif input:dim() == 2 then
+ self.gradInput:resizeAs(input)
+ self.gradInput:addmm(0, 1, gradOutput, self.weight)
+ end
+
+ return self.gradInput
+ end
+end
+
+function Linear:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+
+ if input:dim() == 1 then
+ self.gradWeight:addr(scale, gradOutput, input)
+ self.gradBias:add(scale, gradOutput)
+ elseif input:dim() == 2 then
+ local nframe = input:size(1)
+ local nunit = self.bias:size(1)
+
+ self.gradWeight:addmm(scale, gradOutput:t(), input)
+ self.gradBias:addmv(scale, gradOutput:t(), input.new(nframe):fill(1))
+ end
+
+end
+
+-- we do not need to accumulate parameters when sharing
+Linear.sharedAccUpdateGradParameters = Linear.accUpdateGradParameters
diff --git a/LogSigmoid.lua b/LogSigmoid.lua
new file mode 100644
index 0000000..7485ae6
--- /dev/null
+++ b/LogSigmoid.lua
@@ -0,0 +1,14 @@
+local LogSigmoid, parent = torch.class('nn.LogSigmoid', 'nn.Module')
+
+function LogSigmoid:__init()
+ parent.__init(self)
+ self.buffer = torch.Tensor()
+end
+
+function LogSigmoid:updateOutput(input)
+ return input.nn.LogSigmoid_updateOutput(self, input)
+end
+
+function LogSigmoid:updateGradInput(input, gradOutput)
+ return input.nn.LogSigmoid_updateGradInput(self, input, gradOutput)
+end
diff --git a/LogSoftMax.lua b/LogSoftMax.lua
new file mode 100644
index 0000000..8d2947e
--- /dev/null
+++ b/LogSoftMax.lua
@@ -0,0 +1,9 @@
+local LogSoftMax = torch.class('nn.LogSoftMax', 'nn.Module')
+
+function LogSoftMax:updateOutput(input)
+ return input.nn.LogSoftMax_updateOutput(self, input)
+end
+
+function LogSoftMax:updateGradInput(input, gradOutput)
+ return input.nn.LogSoftMax_updateGradInput(self, input, gradOutput)
+end
diff --git a/LookupTable.lua b/LookupTable.lua
new file mode 100644
index 0000000..115f19c
--- /dev/null
+++ b/LookupTable.lua
@@ -0,0 +1,76 @@
+local LookupTable, parent = torch.class('nn.LookupTable', 'nn.Module')
+
+LookupTable.__version = 2
+
+function LookupTable:__init(nIndex, ...)
+ parent.__init(self)
+
+ if select('#', ...) == 1 and type(select(1, ...)) ~= "number" then
+ local size = select(1, ...)
+ self.size = torch.LongStorage(#size + 1)
+ for i=1,#size do
+ self.size[i+1] = size[i]
+ end
+ else
+ self.size = torch.LongStorage(select('#', ...)+1)
+ for i=1,select('#',...) do
+ self.size[i+1] = select(i, ...)
+ end
+ end
+
+ self.size[1] = nIndex
+ self.weight = torch.Tensor(self.size)
+ self.gradWeight = torch.Tensor(self.size):zero()
+ self.inputs = {}
+
+ self:reset()
+end
+
+function LookupTable:reset(stdv)
+ stdv = stdv or 1
+ self.weight:apply(function()
+ return torch.normal(0, stdv)
+ end)
+end
+
+function LookupTable:updateOutput(input)
+ local nIndex = input:size(1)
+ self.size[1] = nIndex
+ self.output:resize(self.size)
+
+ for i=1,nIndex do
+ self.output:select(1, i):copy(self.weight:select(1, input[i]))
+ end
+
+ return self.output
+end
+
+function LookupTable:zeroGradParameters()
+ for k,_ in pairs(self.inputs) do
+ self.gradWeight:select(1, k):zero()
+ end
+ self.inputs = {}
+end
+
+function LookupTable:accGradParameters(input, gradOutput, scale)
+ for i=1,input:size(1) do
+ local k = input[i]
+ self.inputs[k] = true
+ self.gradWeight:select(1, k):add(scale, gradOutput:select(1, i))
+ end
+end
+
+function LookupTable:accUpdateGradParameters(input, gradOutput, lr)
+ for i=1,input:size(1) do
+ self.weight:select(1, input[i]):add(-lr, gradOutput:select(1, i))
+ end
+end
+
+function LookupTable:updateParameters(learningRate)
+ for k,_ in pairs(self.inputs) do
+ self.weight:select(1, k):add(-learningRate, self.gradWeight:select(1, k))
+ end
+end
+
+-- we do not need to accumulate parameters when sharing
+LookupTable.sharedAccUpdateGradParameters = LookupTable.accUpdateGradParameters
diff --git a/MSECriterion.lua b/MSECriterion.lua
new file mode 100644
index 0000000..655c74f
--- /dev/null
+++ b/MSECriterion.lua
@@ -0,0 +1,14 @@
+local MSECriterion, parent = torch.class('nn.MSECriterion', 'nn.Criterion')
+
+function MSECriterion:__init()
+ parent.__init(self)
+ self.sizeAverage = true
+end
+
+function MSECriterion:updateOutput(input, target)
+ return input.nn.MSECriterion_updateOutput(self, input, target)
+end
+
+function MSECriterion:updateGradInput(input, target)
+ return input.nn.MSECriterion_updateGradInput(self, input, target)
+end
diff --git a/MarginCriterion.lua b/MarginCriterion.lua
new file mode 100644
index 0000000..deb903e
--- /dev/null
+++ b/MarginCriterion.lua
@@ -0,0 +1,23 @@
+local MarginCriterion, parent =
+ torch.class('nn.MarginCriterion', 'nn.Module')
+
+function MarginCriterion:__init(margin)
+ parent.__init(self)
+ margin=margin or 1
+ self.margin = margin
+ self.gradInput = torch.Tensor(1)
+end
+
+function MarginCriterion:updateOutput(input,y)
+ self.output=math.max(0, self.margin- y* input[1])
+ return self.output
+end
+
+function MarginCriterion:updateGradInput(input, y)
+ if (y*input[1])<self.margin then
+ self.gradInput[1]=-y
+ else
+ self.gradInput[1]=0;
+ end
+ return self.gradInput
+end
diff --git a/MarginRankingCriterion.lua b/MarginRankingCriterion.lua
new file mode 100644
index 0000000..a365ade
--- /dev/null
+++ b/MarginRankingCriterion.lua
@@ -0,0 +1,25 @@
+local MarginRankingCriterion, parent = torch.class('nn.MarginRankingCriterion', 'nn.Module')
+
+function MarginRankingCriterion:__init(margin)
+ parent.__init(self)
+ margin=margin or 1
+ self.margin = margin
+ self.gradInput = {torch.Tensor(1), torch.Tensor(1)}
+end
+
+function MarginRankingCriterion:updateOutput(input,y)
+ self.output=math.max(0, -y*(input[1][1]-input[2][1]) + self.margin )
+ return self.output
+end
+
+function MarginRankingCriterion:updateGradInput(input, y)
+ local dist = -y*(input[1][1]-input[2][1]) + self.margin
+ if dist < 0 then
+ self.gradInput[1][1]=0;
+ self.gradInput[2][1]=0;
+ else
+ self.gradInput[1][1]=-y
+ self.gradInput[2][1]=y
+ end
+ return self.gradInput
+end
diff --git a/Max.lua b/Max.lua
new file mode 100644
index 0000000..a5ba95d
--- /dev/null
+++ b/Max.lua
@@ -0,0 +1,16 @@
+local Max, parent = torch.class('nn.Max', 'nn.Module')
+
+function Max:__init(dimension)
+ parent.__init(self)
+ dimension = dimension or 1
+ self.dimension = dimension
+ self.indices = torch.Tensor()
+end
+
+function Max:updateOutput(input)
+ return input.nn.Max_updateOutput(self, input)
+end
+
+function Max:updateGradInput(input, gradOutput)
+ return input.nn.Max_updateGradInput(self, input, gradOutput)
+end
diff --git a/Mean.lua b/Mean.lua
new file mode 100644
index 0000000..55e7609
--- /dev/null
+++ b/Mean.lua
@@ -0,0 +1,26 @@
+local Mean, parent = torch.class('nn.Mean', 'nn.Module')
+
+function Mean:__init(dimension)
+ parent.__init(self)
+ dimension = dimension or 1
+ self.dimension = dimension
+end
+
+function Mean:updateOutput(input)
+ input.torch.mean(self.output, input, self.dimension)
+ self.output = self.output:select(self.dimension, 1)
+ return self.output
+end
+
+function Mean:updateGradInput(input, gradOutput)
+ local size = gradOutput:size():totable()
+ local stride = gradOutput:stride():totable()
+ table.insert(size, self.dimension, input:size(self.dimension))
+ table.insert(stride, self.dimension, 0)
+
+ self.gradInput:resizeAs(gradOutput):copy(gradOutput)
+ self.gradInput:mul(1/input:size(self.dimension))
+ self.gradInput:resize(torch.LongStorage(size), torch.LongStorage(stride))
+
+ return self.gradInput
+end
diff --git a/Min.lua b/Min.lua
new file mode 100644
index 0000000..f4edbd8
--- /dev/null
+++ b/Min.lua
@@ -0,0 +1,16 @@
+local Min, parent = torch.class('nn.Min', 'nn.Module')
+
+function Min:__init(dimension)
+ parent.__init(self)
+ dimension = dimension or 1
+ self.dimension = dimension
+ self.indices = torch.Tensor()
+end
+
+function Min:updateOutput(input)
+ return input.nn.Min_updateOutput(self, input)
+end
+
+function Min:updateGradInput(input, gradOutput)
+ return input.nn.Min_updateGradInput(self, input, gradOutput)
+end
diff --git a/Module.lua b/Module.lua
new file mode 100644
index 0000000..2ae8115
--- /dev/null
+++ b/Module.lua
@@ -0,0 +1,211 @@
+local Module = torch.class('nn.Module')
+
+function Module:__init()
+ self.gradInput = torch.Tensor()
+ self.output = torch.Tensor()
+end
+
+function Module:parameters()
+ if self.weight and self.bias then
+ return {self.weight, self.bias}, {self.gradWeight, self.gradBias}
+ elseif self.weight then
+ return {self.weight}, {self.gradWeight}
+ elseif self.bias then
+ return {self.bias}, {self.gradBias}
+ else
+ return
+ end
+end
+
+function Module:updateOutput(input)
+ return self.output
+end
+
+function Module:forward(input)
+ return self:updateOutput(input, target)
+end
+
+function Module:backward(input, gradOutput)
+ self:updateGradInput(input, gradOutput)
+ self:accGradParameters(input, gradOutput)
+ return self.gradInput
+end
+
+function Module:backwardUpdate(input, gradOutput, lr)
+ self:updateGradInput(input, gradOutput)
+ self:accUpdateGradParameters(input, gradOutput, lr)
+ return self.gradInput
+end
+
+function Module:updateGradInput(input, gradOutput)
+ return self.gradInput
+end
+
+function Module:accGradParameters(input, gradOutput, scale)
+end
+
+function Module:accUpdateGradParameters(input, gradOutput, lr)
+ local gradWeight = self.gradWeight
+ local gradBias = self.gradBias
+ self.gradWeight = self.weight
+ self.gradBias = self.bias
+ self:accGradParameters(input, gradOutput, -lr)
+ self.gradWeight = gradWeight
+ self.gradBias = gradBias
+end
+
+function Module:sharedAccUpdateGradParameters(input, gradOutput, lr)
+ if self:parameters() then
+ self:zeroGradParameters()
+ self:accGradParameters(input, gradOutput, 1)
+ self:updateParameters(lr)
+ end
+end
+
+function Module:zeroGradParameters()
+ local _,gradParams = self:parameters()
+ if gradParams then
+ for i=1,#gradParams do
+ gradParams[i]:zero()
+ end
+ end
+end
+
+function Module:updateParameters(learningRate)
+ local params, gradParams = self:parameters()
+ if params then
+ for i=1,#params do
+ params[i]:add(-learningRate, gradParams[i])
+ end
+ end
+end
+
+function Module:share(mlp, ...)
+ for i,v in ipairs(arg) do
+ if self[v] ~= nil then
+ self[v]:set(mlp[v])
+ self.accUpdateGradParameters = self.sharedAccUpdateGradParameters
+ mlp.accUpdateGradParameters = mlp.sharedAccUpdateGradParameters
+ end
+ end
+ return self
+end
+
+function Module:clone(...)
+ local f = torch.MemoryFile("rw"):binary()
+ f:writeObject(self)
+ f:seek(1)
+ local clone = f:readObject()
+ f:close()
+ if select('#',...) > 0 then
+ clone:share(self,...)
+ end
+ return clone
+end
+
+function Module:type(type)
+ -- find all tensors and convert them
+ for key,param in pairs(self) do
+ if torch.typename(param) and torch.typename(param):find('torch%..+Tensor') then
+ self[key] = param:type(type)
+ end
+ end
+ -- find submodules in classic containers 'modules'
+ if self.modules then
+ for _,module in ipairs(self.modules) do
+ module:type(type)
+ end
+ end
+ return self
+end
+
+function Module:float()
+ return self:type('torch.FloatTensor')
+end
+
+function Module:double()
+ return self:type('torch.DoubleTensor')
+end
+
+function Module:cuda()
+ return self:type('torch.CudaTensor')
+end
+
+function Module:getParameters()
+ -- get parameters
+ local parameters,gradParameters = self:parameters()
+
+ -- this function flattens arbitrary lists of parameters,
+ -- even complex shared ones
+ local function flatten(parameters)
+ -- already flat ?
+ local flat = true
+ for k = 2,#parameters do
+ if parameters[k]:storage() ~= parameters[k-1]:storage() then
+ flat = false
+ break
+ end
+ end
+ if flat then
+ local nParameters = 0
+ for k,param in ipairs(parameters) do
+ nParameters = nParameters + param:nElement()
+ end
+ local flatParameters = parameters[1].new(parameters[1]:storage())
+ if nParameters ~= flatParameters:nElement() then
+ error('flattenParameters(): weird parameters')
+ end
+ return flatParameters
+ end
+ -- compute offsets of each parameter
+ local offsets = {}
+ local sizes = {}
+ local strides = {}
+ local elements = {}
+ local storageOffsets = {}
+ local params = {}
+ local nParameters = 0
+ for k,param in ipairs(parameters) do
+ table.insert(offsets, nParameters+1)
+ table.insert(sizes, param:size())
+ table.insert(strides, param:stride())
+ table.insert(elements, param:nElement())
+ table.insert(storageOffsets, param:storageOffset())
+ local isView = false
+ for i = 1,k-1 do
+ if param:storage() == parameters[i]:storage() then
+ offsets[k] = offsets[i]
+ if storageOffsets[k] ~= storageOffsets[i] or elements[k] ~= elements[i] then
+ error('flattenParameters(): cannot flatten shared weights with different structures')
+ end
+ isView = true
+ break
+ end
+ end
+ if not isView then
+ nParameters = nParameters + param:nElement()
+ end
+ end
+ -- create flat vector
+ local flatParameters = parameters[1].new(nParameters)
+ local storage = flatParameters:storage()
+ -- reallocate all parameters in flat vector
+ for i = 1,#parameters do
+ local data = parameters[i]:clone()
+ parameters[i]:set(storage, offsets[i], elements[i]):resize(sizes[i],strides[i]):copy(data)
+ data = nil
+ collectgarbage()
+ end
+ -- cleanup
+ collectgarbage()
+ -- return flat param
+ return flatParameters
+ end
+
+ -- flatten parameters and gradients
+ local flatParameters = flatten(parameters)
+ local flatGradParameters = flatten(gradParameters)
+
+ -- return new flat vector that contains all discrete parameters
+ return flatParameters, flatGradParameters
+end
diff --git a/Mul.lua b/Mul.lua
new file mode 100644
index 0000000..7841470
--- /dev/null
+++ b/Mul.lua
@@ -0,0 +1,42 @@
+local Mul, parent = torch.class('nn.Mul', 'nn.Module')
+
+function Mul:__init(inputSize)
+ parent.__init(self)
+
+ self.weight = torch.Tensor(1)
+ self.gradWeight = torch.Tensor(1)
+
+ -- state
+ self.gradInput:resize(inputSize)
+ self.output:resize(inputSize)
+
+ self:reset()
+end
+
+
+function Mul:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1./math.sqrt(self.weight:size(1))
+ end
+
+ self.weight[1] = torch.uniform(-stdv, stdv);
+end
+
+function Mul:updateOutput(input)
+ self.output:copy(input);
+ self.output:mul(self.weight[1]);
+ return self.output
+end
+
+function Mul:updateGradInput(input, gradOutput)
+ self.gradInput:zero()
+ self.gradInput:add(self.weight[1], gradOutput)
+ return self.gradInput
+end
+
+function Mul:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ self.gradWeight[1] = self.gradWeight[1] + scale*input:dot(gradOutput);
+end
diff --git a/MultiCriterion.lua b/MultiCriterion.lua
new file mode 100644
index 0000000..e83b97e
--- /dev/null
+++ b/MultiCriterion.lua
@@ -0,0 +1,32 @@
+local MultiCriterion, parent = torch.class('nn.MultiCriterion', 'nn.Criterion')
+
+function MultiCriterion:__init()
+ parent.__init(self)
+ self.criterions = {}
+ self.weights = torch.DoubleStorage()
+end
+
+function MultiCriterion:add(criterion, weight)
+ weight = weight or 1
+ table.insert(self.criterions, criterion)
+ self.weights:resize(#self.criterions, true)
+ self.weights[#self.criterions] = weight
+ return self
+end
+
+function MultiCriterion:updateOutput(input, target)
+ self.output = 0
+ for i=1,#self.criterions do
+ self.output = self.output + self.weights[i]*self.criterions[i]:updateOutput(input, target)
+ end
+ return self.output
+end
+
+function MultiCriterion:updateGradInput(input, target)
+ self.gradInput:resizeAs(input)
+ self.gradInput:zero()
+ for i=1,#self.criterions do
+ self.gradInput:add(self.weights[i], self.criterions[i]:updateGradInput(input, target))
+ end
+ return self.gradInput
+end
diff --git a/MultiLabelMarginCriterion.lua b/MultiLabelMarginCriterion.lua
new file mode 100644
index 0000000..c435888
--- /dev/null
+++ b/MultiLabelMarginCriterion.lua
@@ -0,0 +1,14 @@
+local MultiLabelMarginCriterion, parent = torch.class('nn.MultiLabelMarginCriterion', 'nn.Criterion')
+
+function MultiLabelMarginCriterion:__init()
+ parent.__init(self)
+ self.sizeAverage = true
+end
+
+function MultiLabelMarginCriterion:updateOutput(input, target)
+ return input.nn.MultiLabelMarginCriterion_updateOutput(self, input, target)
+end
+
+function MultiLabelMarginCriterion:updateGradInput(input, target)
+ return input.nn.MultiLabelMarginCriterion_updateGradInput(self, input, target)
+end
diff --git a/MultiMarginCriterion.lua b/MultiMarginCriterion.lua
new file mode 100644
index 0000000..e8de9d9
--- /dev/null
+++ b/MultiMarginCriterion.lua
@@ -0,0 +1,14 @@
+local MultiMarginCriterion, parent = torch.class('nn.MultiMarginCriterion', 'nn.Criterion')
+
+function MultiMarginCriterion:__init()
+ parent.__init(self)
+ self.sizeAverage = true
+end
+
+function MultiMarginCriterion:updateOutput(input, target)
+ return input.nn.MultiMarginCriterion_updateOutput(self, input, target)
+end
+
+function MultiMarginCriterion:updateGradInput(input, target)
+ return input.nn.MultiMarginCriterion_updateGradInput(self, input, target)
+end
diff --git a/Narrow.lua b/Narrow.lua
new file mode 100644
index 0000000..4445983
--- /dev/null
+++ b/Narrow.lua
@@ -0,0 +1,24 @@
+local Narrow, parent = torch.class('nn.Narrow', 'nn.Module')
+
+function Narrow:__init(dimension,offset,length)
+ parent.__init(self)
+ self.dimension=dimension
+ self.index=offset
+ self.length=length or 1
+ if not dimension or not offset then
+ error('nn.Narrow(dimension, offset, length)')
+ end
+end
+
+function Narrow:updateOutput(input)
+ local output=input:narrow(self.dimension,self.index,self.length);
+ self.output:resizeAs(output)
+ return self.output:copy(output)
+end
+
+function Narrow:updateGradInput(input, gradOutput)
+ self.gradInput:resizeAs(input)
+ self.gradInput:zero();
+ self.gradInput:narrow(self.dimension,self.index,self.length):copy(gradOutput)
+ return self.gradInput
+end
diff --git a/PairwiseDistance.lua b/PairwiseDistance.lua
new file mode 100644
index 0000000..638c58f
--- /dev/null
+++ b/PairwiseDistance.lua
@@ -0,0 +1,33 @@
+local PairwiseDistance, parent = torch.class('nn.PairwiseDistance', 'nn.Module')
+
+function PairwiseDistance:__init(p)
+ parent.__init(self)
+
+ -- state
+ self.gradInput = {torch.Tensor(), torch.Tensor()}
+ self.output = torch.Tensor(1)
+ self.norm=p
+end
+
+function PairwiseDistance:updateOutput(input)
+ self.output[1]=input[1]:dist(input[2],self.norm);
+ return self.output
+end
+
+local function mathsign(x)
+ if x==0 then return 2*torch.random(2)-3; end
+ if x>0 then return 1; else return -1; end
+end
+
+function PairwiseDistance:updateGradInput(input, gradOutput)
+ self.gradInput[1]:resizeAs(input[1])
+ self.gradInput[2]:resizeAs(input[2])
+ self.gradInput[1]:copy(input[1])
+ self.gradInput[1]:add(-1, input[2])
+ if self.norm==1 then
+ self.gradInput[1]:apply(mathsign)
+ end
+ self.gradInput[1]:mul(gradOutput[1]);
+ self.gradInput[2]:zero():add(-1, self.gradInput[1])
+ return self.gradInput
+end
diff --git a/Parallel.lua b/Parallel.lua
new file mode 100644
index 0000000..04a8bdb
--- /dev/null
+++ b/Parallel.lua
@@ -0,0 +1,137 @@
+local Parallel, parent = torch.class('nn.Parallel', 'nn.Module')
+
+function Parallel:__init(inputDimension,outputDimension)
+ parent.__init(self)
+ self.modules = {}
+ self.size = torch.LongStorage()
+ self.inputDimension = inputDimension
+ self.outputDimension = outputDimension
+end
+
+function Parallel:add(module)
+ table.insert(self.modules, module)
+ return self
+end
+
+function Parallel:get(index)
+ return self.modules[index]
+end
+
+function Parallel:updateOutput(input)
+
+ local modules=input:size(self.inputDimension)
+
+ for i=1,modules do
+ local currentOutput =
+ self.modules[i]:updateOutput(input:select(self.inputDimension,i))
+
+ if i == 1 then
+ self.size:resize(currentOutput:dim()):copy(currentOutput:size())
+ else
+ self.size[self.outputDimension] = self.size[self.outputDimension]
+ + currentOutput:size(self.outputDimension)
+ end
+ end
+ self.output:resize(self.size)
+
+ local offset = 1
+ for i=1,modules do
+ local currentOutput = self.modules[i]:updateOutput(input:select(self.inputDimension,i))
+
+ self.output:narrow(self.outputDimension, offset,
+ currentOutput:size(self.outputDimension)):copy(currentOutput)
+ offset = offset + currentOutput:size(self.outputDimension)
+ end
+ return self.output
+end
+
+function Parallel:updateGradInput(input, gradOutput)
+ local nModule=input:size(self.inputDimension)
+ self.gradInput:resizeAs(input)
+
+ local offset = 1
+ for i=1,nModule do
+ local module=self.modules[i];
+ local currentOutput = module.output
+ local currentGradInput =
+ module:updateGradInput(input:select(self.inputDimension,i),
+ gradOutput:narrow(self.outputDimension,
+ offset, currentOutput:size(self.outputDimension)))
+
+ self.gradInput:select(self.inputDimension,i):copy(currentGradInput)
+ offset = offset + currentOutput:size(self.outputDimension)
+ end
+ return self.gradInput
+end
+
+function Parallel:accGradParameters(input, gradOutput, scale)
+ local nModule=input:size(self.inputDimension)
+
+ local offset = 1
+ for i=1,nModule do
+ local module = self.modules[i];
+ local currentOutput = module.output
+ local currentGradInput =
+ module:accGradParameters(input:select(self.inputDimension,i),
+ gradOutput:narrow(self.outputDimension,
+ offset, currentOutput:size(self.outputDimension)), scale)
+
+ offset = offset + currentOutput:size(self.outputDimension)
+ end
+end
+
+function Parallel:accUpdateGradParameters(input, gradOutput, lr)
+ local nModule=input:size(self.inputDimension)
+
+ local offset = 1
+ for i=1,nModule do
+ local module = self.modules[i];
+ local currentOutput = module.output
+ local currentGradInput =
+ module:accUpdateGradParameters(input:select(self.inputDimension,i),
+ gradOutput:narrow(self.outputDimension,
+ offset, currentOutput:size(self.outputDimension)), lr)
+
+ offset = offset + currentOutput:size(self.outputDimension)
+ end
+end
+
+function Parallel:zeroGradParameters()
+ for _,module in ipairs(self.modules) do
+ module:zeroGradParameters()
+ end
+end
+
+function Parallel:updateParameters(learningRate)
+ for _,module in ipairs(self.modules) do
+ module:updateParameters(learningRate)
+ end
+end
+
+function Parallel:share(mlp,...)
+ for i=1,#self.modules do
+ self.modules[i]:share(mlp.modules[i],...);
+ end
+end
+
+function Parallel:parameters()
+ local function tinsert(to, from)
+ if type(from) == 'table' then
+ for i=1,#from do
+ tinsert(to,from[i])
+ end
+ else
+ table.insert(to,from)
+ end
+ end
+ local w = {}
+ local gw = {}
+ for i=1,#self.modules do
+ local mw,mgw = self.modules[i]:parameters()
+ if mw then
+ tinsert(w,mw)
+ tinsert(gw,mgw)
+ end
+ end
+ return w,gw
+end
diff --git a/ParallelTable.lua b/ParallelTable.lua
new file mode 100644
index 0000000..a97904f
--- /dev/null
+++ b/ParallelTable.lua
@@ -0,0 +1,71 @@
+local ParallelTable, parent = torch.class('nn.ParallelTable', 'nn.Module')
+
+function ParallelTable:__init()
+ parent.__init(self)
+ self.modules = {}
+ self.output = {}
+ self.gradInput = {}
+end
+
+function ParallelTable:add(module)
+ table.insert(self.modules, module)
+ return self
+end
+
+function ParallelTable:get(index)
+ return self.modules[index]
+end
+
+function ParallelTable:size()
+ return #self.modules
+end
+
+function ParallelTable:updateOutput(input)
+ for i=1,#self.modules do
+ self.output[i] = self.modules[i]:updateOutput(input[i])
+ end
+ return self.output
+end
+
+
+function ParallelTable:updateGradInput(input, gradOutput)
+ for i,module in ipairs(self.modules) do
+ self.gradInput[i]= module:updateGradInput(input[i], gradOutput[i])
+ end
+ return self.gradInput
+end
+
+function ParallelTable:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ for i,module in ipairs(self.modules) do
+ module:accGradParameters(input[i], gradOutput[i], scale)
+ end
+end
+
+function ParallelTable:accUpdateGradParameters(input, gradOutput, lr)
+ lr = lr or 1
+ for i,module in ipairs(self.modules) do
+ module:accUpdateGradParameters(input[i], gradOutput[i], lr)
+ end
+end
+
+function ParallelTable:zeroGradParameters()
+ for _,module in ipairs(self.modules) do
+ module:zeroGradParameters()
+ end
+end
+
+function ParallelTable:updateParameters(learningRate)
+ for _,module in ipairs(self.modules) do
+ module:updateParameters(learningRate)
+ end
+end
+
+function ParallelTable:share(mlp,...)
+ for i=1,#self.modules do
+ self.modules[i]:share(mlp.modules[i],...);
+ end
+end
+
+
+
diff --git a/Power.lua b/Power.lua
new file mode 100644
index 0000000..8052b3f
--- /dev/null
+++ b/Power.lua
@@ -0,0 +1,21 @@
+local Power, parent = torch.class('nn.Power','nn.Module')
+
+function Power:__init(p)
+ parent.__init(self)
+ self.pow = p
+ if not p then
+ error('nn.Power(power)')
+ end
+end
+
+function Power:updateOutput(input)
+ self.output:resizeAs(input):copy(input)
+ self.output:pow(self.pow)
+ return self.output
+end
+
+function Power:updateGradInput(input, gradOutput)
+ self.gradInput:resizeAs(input):copy(gradOutput)
+ self.gradInput:cmul(self.output):cdiv(input):mul(self.pow)
+ return self.gradInput
+end
diff --git a/Replicate.lua b/Replicate.lua
new file mode 100644
index 0000000..c30a86a
--- /dev/null
+++ b/Replicate.lua
@@ -0,0 +1,29 @@
+local Replicate, parent = torch.class('nn.Replicate','nn.Module')
+
+function Replicate:__init(nf)
+ parent.__init(self)
+ self.nfeatures = nf
+end
+
+function Replicate:updateOutput(input)
+ local sz = torch.LongStorage(input:dim()+1)
+ sz[1] = self.nfeatures
+ for i = 1,input:dim() do
+ sz[i+1] = input:size(i)
+ end
+ local st = torch.LongStorage(input:dim()+1)
+ st[1] = 0
+ for i = 1,input:dim() do
+ st[i+1] = input:stride(i)
+ end
+ self.output = input.new(input:storage(),input:storageOffset(),sz,st)
+ return self.output
+end
+
+function Replicate:updateGradInput(input, gradOutput)
+ self.gradInput:resizeAs(input):zero()
+ for k = 1,gradOutput:size(1) do
+ self.gradInput:add(gradOutput[k])
+ end
+ return self.gradInput
+end
diff --git a/Reshape.lua b/Reshape.lua
new file mode 100644
index 0000000..0be793f
--- /dev/null
+++ b/Reshape.lua
@@ -0,0 +1,38 @@
+local Reshape, parent = torch.class('nn.Reshape', 'nn.Module')
+
+function Reshape:__init(...)
+ parent.__init(self)
+ self.size = torch.LongStorage()
+ self.batchsize = torch.LongStorage()
+ local n = select('#', ...)
+ if n == 1 and torch.typename(select(1, ...)) == 'torch.LongStorage' then
+ self.size:resize(#select(1, ...)):copy(select(1, ...))
+ else
+ self.size:resize(n)
+ self.batchsize:resize(n+1)
+ self.nelement = 1
+ for i=1,n do
+ self.size[i] = select(i, ...)
+ self.batchsize[i+1] = select(i, ...)
+ self.nelement = self.nelement * self.size[i]
+ end
+ end
+end
+
+function Reshape:updateOutput(input)
+ input = input:contiguous()
+ local nelement = input:nElement()
+ if nelement == self.nelement then
+ self.output:set(input):resize(self.size)
+ else
+ self.batchsize[1] = input:size(1)
+ self.output:set(input):resize(self.batchsize)
+ end
+ return self.output
+end
+
+function Reshape:updateGradInput(input, gradOutput)
+ gradOutput = gradOutput:contiguous()
+ self.gradInput:set(gradOutput):resizeAs(input)
+ return self.gradInput
+end
diff --git a/Select.lua b/Select.lua
new file mode 100644
index 0000000..acf8e06
--- /dev/null
+++ b/Select.lua
@@ -0,0 +1,20 @@
+local Select, parent = torch.class('nn.Select', 'nn.Module')
+
+function Select:__init(dimension,index)
+ parent.__init(self)
+ self.dimension = dimension
+ self.index = index
+end
+
+function Select:updateOutput(input)
+ local output = input:select(self.dimension,self.index);
+ self.output:resizeAs(output)
+ return self.output:copy(output)
+end
+
+function Select:updateGradInput(input, gradOutput)
+ self.gradInput:resizeAs(input)
+ self.gradInput:zero()
+ self.gradInput:select(self.dimension,self.index):copy(gradOutput)
+ return self.gradInput
+end
diff --git a/Sequential.lua b/Sequential.lua
new file mode 100644
index 0000000..3e23350
--- /dev/null
+++ b/Sequential.lua
@@ -0,0 +1,129 @@
+local Sequential, parent = torch.class('nn.Sequential', 'nn.Module')
+
+function Sequential:__init()
+ self.modules = {}
+end
+
+function Sequential:add(module)
+ if #self.modules == 0 then
+ self.gradInput = module.gradInput
+ end
+ table.insert(self.modules, module)
+ self.output = module.output
+ return self
+end
+
+function Sequential:size()
+ return #self.modules
+end
+
+function Sequential:get(index)
+ return self.modules[index]
+end
+
+function Sequential:updateOutput(input)
+ local currentOutput = input
+ for i=1,#self.modules do
+ currentOutput = self.modules[i]:updateOutput(currentOutput)
+ end
+ self.output = currentOutput
+ return currentOutput
+end
+
+function Sequential:updateGradInput(input, gradOutput)
+ local currentGradOutput = gradOutput
+ local currentModule = self.modules[#self.modules]
+ for i=#self.modules-1,1,-1 do
+ local previousModule = self.modules[i]
+ currentGradOutput = currentModule:updateGradInput(previousModule.output, currentGradOutput)
+ currentModule = previousModule
+ end
+ currentGradOutput = currentModule:updateGradInput(input, currentGradOutput)
+ self.gradInput = currentGradOutput
+ return currentGradOutput
+end
+
+function Sequential:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+
+ local currentGradOutput = gradOutput
+ local currentModule = self.modules[#self.modules]
+ for i=#self.modules-1,1,-1 do
+ local previousModule = self.modules[i]
+ currentModule:accGradParameters(previousModule.output, currentGradOutput, scale)
+ currentGradOutput = currentModule.gradInput
+ currentModule = previousModule
+ end
+
+ currentModule:accGradParameters(input, currentGradOutput, scale)
+end
+
+function Sequential:accUpdateGradParameters(input, gradOutput, lr)
+ local currentGradOutput = gradOutput
+ local currentModule = self.modules[#self.modules]
+ for i=#self.modules-1,1,-1 do
+ local previousModule = self.modules[i]
+ currentModule:accUpdateGradParameters(previousModule.output, currentGradOutput, lr)
+ currentGradOutput = currentModule.gradInput
+ currentModule = previousModule
+ end
+
+ currentModule:accUpdateGradParameters(input, currentGradOutput, lr)
+end
+
+function Sequential:zeroGradParameters()
+ for i=1,#self.modules do
+ self.modules[i]:zeroGradParameters()
+ end
+end
+
+function Sequential:updateParameters(learningRate)
+ for i=1,#self.modules do
+ self.modules[i]:updateParameters(learningRate)
+ end
+end
+
+function Sequential:share(mlp,...)
+ for i=1,#self.modules do
+ self.modules[i]:share(mlp.modules[i],...);
+ end
+end
+
+function Sequential:parameters()
+ local function tinsert(to, from)
+ if type(from) == 'table' then
+ for i=1,#from do
+ tinsert(to,from[i])
+ end
+ else
+ table.insert(to,from)
+ end
+ end
+ local w = {}
+ local gw = {}
+ for i=1,#self.modules do
+ local mw,mgw = self.modules[i]:parameters()
+ if mw then
+ tinsert(w,mw)
+ tinsert(gw,mgw)
+ end
+ end
+ return w,gw
+end
+
+function Sequential:__tostring__()
+ local tab = ' '
+ local line = '\n'
+ local next = ' -> '
+ local str = 'nn.Sequential'
+ str = str .. ' {' .. line .. tab .. '[input'
+ for i=1,#self.modules do
+ str = str .. next .. '(' .. i .. ')'
+ end
+ str = str .. next .. 'output]'
+ for i=1,#self.modules do
+ str = str .. line .. tab .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab)
+ end
+ str = str .. line .. '}'
+ return str
+end
diff --git a/Sigmoid.lua b/Sigmoid.lua
new file mode 100644
index 0000000..efde004
--- /dev/null
+++ b/Sigmoid.lua
@@ -0,0 +1,9 @@
+local Sigmoid = torch.class('nn.Sigmoid', 'nn.Module')
+
+function Sigmoid:updateOutput(input)
+ return input.nn.Sigmoid_updateOutput(self, input)
+end
+
+function Sigmoid:updateGradInput(input, gradOutput)
+ return input.nn.Sigmoid_updateGradInput(self, input, gradOutput)
+end
diff --git a/SoftMax.lua b/SoftMax.lua
new file mode 100644
index 0000000..609b353
--- /dev/null
+++ b/SoftMax.lua
@@ -0,0 +1,9 @@
+local SoftMax, parent = torch.class('nn.SoftMax', 'nn.Module')
+
+function SoftMax:updateOutput(input)
+ return input.nn.SoftMax_updateOutput(self, input)
+end
+
+function SoftMax:updateGradInput(input, gradOutput)
+ return input.nn.SoftMax_updateGradInput(self, input, gradOutput)
+end
diff --git a/SoftMin.lua b/SoftMin.lua
new file mode 100644
index 0000000..90c6c60
--- /dev/null
+++ b/SoftMin.lua
@@ -0,0 +1,15 @@
+local SoftMin, parent = torch.class('nn.SoftMin', 'nn.Module')
+
+function SoftMin:updateOutput(input)
+ self.mininput = self.mininput or input.new()
+ self.mininput:resizeAs(input):copy(input):mul(-1)
+ return input.nn.SoftMax_updateOutput(self, self.mininput)
+end
+
+function SoftMin:updateGradInput(input, gradOutput)
+ self.mininput = self.mininput or input.new()
+ self.mininput:resizeAs(input):copy(input):mul(-1)
+ self.gradInput = input.nn.SoftMax_updateGradInput(self, self.mininput, gradOutput)
+ self.gradInput:mul(-1)
+ return self.gradInput
+end
diff --git a/SoftPlus.lua b/SoftPlus.lua
new file mode 100644
index 0000000..18d586a
--- /dev/null
+++ b/SoftPlus.lua
@@ -0,0 +1,9 @@
+local SoftPlus = torch.class('nn.SoftPlus', 'nn.Module')
+
+function SoftPlus:updateOutput(input)
+ return input.nn.SoftPlus_updateOutput(self, input)
+end
+
+function SoftPlus:updateGradInput(input, gradOutput)
+ return input.nn.SoftPlus_updateGradInput(self, input, gradOutput)
+end
diff --git a/SoftShrink.lua b/SoftShrink.lua
new file mode 100644
index 0000000..379dc61
--- /dev/null
+++ b/SoftShrink.lua
@@ -0,0 +1,16 @@
+local SoftShrink, parent = torch.class('nn.SoftShrink', 'nn.Module')
+
+function SoftShrink:__init(lam)
+ parent.__init(self)
+ self.lambda = lam or 0.5
+end
+
+function SoftShrink:updateOutput(input)
+ input.nn.SoftShrink_updateOutput(self, input)
+ return self.output
+end
+
+function SoftShrink:updateGradInput(input, gradOutput)
+ input.nn.SoftShrink_updateGradInput(self, input, gradOutput)
+ return self.gradInput
+end
diff --git a/SoftSign.lua b/SoftSign.lua
new file mode 100644
index 0000000..480894c
--- /dev/null
+++ b/SoftSign.lua
@@ -0,0 +1,15 @@
+local SoftSign = torch.class('nn.SoftSign', 'nn.Module')
+
+function SoftSign:updateOutput(input)
+ self.temp = self.temp or input.new()
+ self.temp:resizeAs(input):copy(input):abs():add(1)
+ self.output:resizeAs(input):copy(input):cdiv(self.temp)
+ return self.output
+end
+
+function SoftSign:updateGradInput(input, gradOutput)
+ self.tempgrad = self.tempgrad or input.new()
+ self.tempgrad:resizeAs(self.output):copy(input):abs():add(1):cmul(self.tempgrad)
+ self.gradInput:resizeAs(input):copy(gradOutput):cdiv(self.tempgrad)
+ return self.gradInput
+end
diff --git a/SparseLinear.lua b/SparseLinear.lua
new file mode 100644
index 0000000..ec8845e
--- /dev/null
+++ b/SparseLinear.lua
@@ -0,0 +1,42 @@
+local SparseLinear, parent = torch.class('nn.SparseLinear', 'nn.Module')
+
+function SparseLinear:__init(inputSize, outputSize)
+ parent.__init(self)
+
+ self.weightDecay = 0
+ self.weight = torch.Tensor(outputSize, inputSize)
+ self.bias = torch.Tensor(outputSize)
+ self.gradWeight = torch.Tensor(outputSize, inputSize)
+ self.gradBias = torch.Tensor(outputSize)
+ self.lastInput = torch.Tensor()
+ -- state
+ self.gradInput:resize(inputSize)
+ self.output:resize(outputSize)
+
+ self:reset()
+end
+
+function SparseLinear:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1./math.sqrt(self.weight:size(1))
+ end
+
+ -- we do this so the initialization is exactly
+ -- the same than in previous torch versions
+ for i=1,self.weight:size(1) do
+ self.weight:select(1, i):apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias[i] = torch.uniform(-stdv, stdv) * 0.000001
+ end
+end
+
+function SparseLinear:updateOutput(input)
+ return input.nn.SparseLinear_updateOutput(self, input)
+end
+
+function SparseLinear:accGradParameters(input, gradOutput, scale)
+ return input.nn.SparseLinear_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua
new file mode 100644
index 0000000..38d2737
--- /dev/null
+++ b/SpatialConvolution.lua
@@ -0,0 +1,50 @@
+local SpatialConvolution, parent = torch.class('nn.SpatialConvolution', 'nn.Module')
+
+function SpatialConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH)
+ parent.__init(self)
+
+ dW = dW or 1
+ dH = dH or 1
+
+ self.nInputPlane = nInputPlane
+ self.nOutputPlane = nOutputPlane
+ self.kW = kW
+ self.kH = kH
+ self.dW = dW
+ self.dH = dH
+
+ self.weight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW)
+ self.bias = torch.Tensor(nOutputPlane)
+ self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW)
+ self.gradBias = torch.Tensor(nOutputPlane)
+
+ self:reset()
+end
+
+function SpatialConvolution:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane)
+ end
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+end
+
+function SpatialConvolution:updateOutput(input)
+ return input.nn.SpatialConvolution_updateOutput(self, input)
+end
+
+function SpatialConvolution:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ return input.nn.SpatialConvolution_updateGradInput(self, input, gradOutput)
+ end
+end
+
+function SpatialConvolution:accGradParameters(input, gradOutput, scale)
+ return input.nn.SpatialConvolution_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/SpatialConvolutionMap.lua b/SpatialConvolutionMap.lua
new file mode 100644
index 0000000..0dbff2f
--- /dev/null
+++ b/SpatialConvolutionMap.lua
@@ -0,0 +1,119 @@
+local SpatialConvolutionMap, parent = torch.class('nn.SpatialConvolutionMap', 'nn.Module')
+
+nn.tables = nn.tables or {}
+
+function nn.tables.full(nin, nout)
+ local ft = torch.Tensor(nin*nout,2)
+ local p = 1
+ for j=1,nout do
+ for i=1,nin do
+ ft[p][1] = i
+ ft[p][2] = j
+ p = p + 1
+ end
+ end
+ return ft
+end
+
+function nn.tables.oneToOne(nfeat)
+ local ft = torch.Tensor(nfeat,2)
+ for i=1,nfeat do
+ ft[i][1] = i
+ ft[i][2] = i
+ end
+ return ft
+end
+
+function nn.tables.random(nin, nout, nto)
+ local nker = nto * nout
+ local tbl = torch.Tensor(nker, 2)
+ local fi = torch.randperm(nin)
+ local frcntr = 1
+ local tocntr = 1
+ local nfi = math.floor(nin/nto) -- number of distinct nto chunks
+ local rfi = math.mod(nin,nto) -- number of remaining from maps
+ local totbl = tbl:select(2,2)
+ local frtbl = tbl:select(2,1)
+ local fitbl = fi:narrow(1, 1, (nfi * nto)) -- part of fi that covers distinct chunks
+ local ufrtbl= frtbl:unfold(1, nto, nto)
+ local utotbl= totbl:unfold(1, nto, nto)
+ local ufitbl= fitbl:unfold(1, nto, nto)
+
+ -- start filling frtbl
+ for i=1,nout do -- fro each unit in target map
+ ufrtbl:select(1,i):copy(ufitbl:select(1,frcntr))
+ frcntr = frcntr + 1
+ if frcntr-1 == nfi then -- reset fi
+ fi:copy(torch.randperm(nin))
+ frcntr = 1
+ end
+ end
+ for tocntr=1,utotbl:size(1) do
+ utotbl:select(1,tocntr):fill(tocntr)
+ end
+ return tbl
+end
+
+function SpatialConvolutionMap:__init(conMatrix, kW, kH, dW, dH)
+ parent.__init(self)
+
+ dW = dW or 1
+ dH = dH or 1
+
+ self.kW = kW
+ self.kH = kH
+ self.dW = dW
+ self.dH = dH
+ self.connTable = conMatrix
+ self.nInputPlane = self.connTable:select(2,1):maxall()
+ self.nOutputPlane = self.connTable:select(2,2):maxall()
+
+ self.weight = torch.Tensor(self.connTable:size(1), kH, kW)
+ self.bias = torch.Tensor(self.nOutputPlane)
+ self.gradWeight = torch.Tensor(self.connTable:size(1), kH, kW)
+ self.gradBias = torch.Tensor(self.nOutputPlane)
+
+ self:reset()
+end
+
+function SpatialConvolutionMap:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ else
+ local ninp = torch.Tensor(self.nOutputPlane):zero()
+ for i=1,self.connTable:size(1) do ninp[self.connTable[i][2]] = ninp[self.connTable[i][2]]+1 end
+ for k=1,self.connTable:size(1) do
+ stdv = 1/math.sqrt(self.kW*self.kH*ninp[self.connTable[k][2]])
+ self.weight:select(1,k):apply(function() return torch.uniform(-stdv,stdv) end)
+ end
+ for k=1,self.bias:size(1) do
+ stdv = 1/math.sqrt(self.kW*self.kH*ninp[k])
+ self.bias[k] = torch.uniform(-stdv,stdv)
+ end
+ end
+end
+
+function SpatialConvolutionMap:updateOutput(input)
+ input.nn.SpatialConvolutionMap_updateOutput(self, input)
+ return self.output
+end
+
+function SpatialConvolutionMap:updateGradInput(input, gradOutput)
+ input.nn.SpatialConvolutionMap_updateGradInput(self, input, gradOutput)
+ return self.gradInput
+end
+
+function SpatialConvolutionMap:accGradParameters(input, gradOutput, scale)
+ return input.nn.SpatialConvolutionMap_accGradParameters(self, input, gradOutput, scale)
+end
+
+function SpatialConvolutionMap:decayParameters(decay)
+ self.weight:add(-decay, self.weight)
+ self.bias:add(-decay, self.bias)
+end
diff --git a/SpatialLPPooling.lua b/SpatialLPPooling.lua
new file mode 100644
index 0000000..9b9c87d
--- /dev/null
+++ b/SpatialLPPooling.lua
@@ -0,0 +1,32 @@
+local SpatialLPPooling, parent = torch.class('nn.SpatialLPPooling', 'nn.Sequential')
+
+function SpatialLPPooling:__init(nInputPlane, pnorm, kW, kH, dW, dH)
+ parent.__init(self)
+
+ dW = dW or kW
+ dH = dH or kH
+
+ self.kW = kW
+ self.kH = kH
+ self.dW = dW
+ self.dH = dH
+
+ self.nInputPlane = nInputPlane
+ self.learnKernel = learnKernel
+
+ if pnorm == 2 then
+ self:add(nn.Square())
+ else
+ self:add(nn.Power(pnorm))
+ end
+ self:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(nInputPlane), kW, kH, dW, dH))
+ if pnorm == 2 then
+ self:add(nn.Sqrt())
+ else
+ self:add(nn.Power(1/pnorm))
+ end
+
+ self:get(2).bias:zero()
+ self:get(2).weight:fill(1/(kW*kH))
+ self:get(2).accGradParameters = nil
+end
diff --git a/SpatialMaxPooling.lua b/SpatialMaxPooling.lua
new file mode 100644
index 0000000..21197ac
--- /dev/null
+++ b/SpatialMaxPooling.lua
@@ -0,0 +1,34 @@
+local SpatialMaxPooling, parent = torch.class('nn.SpatialMaxPooling', 'nn.Module')
+
+function SpatialMaxPooling:__init(kW, kH, dW, dH)
+ parent.__init(self)
+
+ dW = dW or kW
+ dH = dH or kH
+
+ self.kW = kW
+ self.kH = kH
+ self.dW = dW
+ self.dH = dH
+
+ self.indices = torch.Tensor()
+end
+
+function SpatialMaxPooling:updateOutput(input)
+ input.nn.SpatialMaxPooling_updateOutput(self, input)
+ return self.output
+end
+
+function SpatialMaxPooling:updateGradInput(input, gradOutput)
+ input.nn.SpatialMaxPooling_updateGradInput(self, input, gradOutput)
+ return self.gradInput
+end
+
+function SpatialMaxPooling:empty()
+ self.gradInput:resize()
+ self.gradInput:storage():resize(0)
+ self.output:resize()
+ self.output:storage():resize(0)
+ self.indices:resize()
+ self.indices:storage():resize(0)
+end
diff --git a/SpatialSubSampling.lua b/SpatialSubSampling.lua
new file mode 100644
index 0000000..48b32b9
--- /dev/null
+++ b/SpatialSubSampling.lua
@@ -0,0 +1,49 @@
+local SpatialSubSampling, parent = torch.class('nn.SpatialSubSampling', 'nn.Module')
+
+function SpatialSubSampling:__init(nInputPlane, kW, kH, dW, dH)
+ parent.__init(self)
+
+ dW = dW or 1
+ dH = dH or 1
+
+ self.nInputPlane = nInputPlane
+ self.kW = kW
+ self.kH = kH
+ self.dW = dW
+ self.dH = dH
+
+ self.weight = torch.Tensor(nInputPlane)
+ self.bias = torch.Tensor(nInputPlane)
+ self.gradWeight = torch.Tensor(nInputPlane)
+ self.gradBias = torch.Tensor(nInputPlane)
+
+ self:reset()
+end
+
+function SpatialSubSampling:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1/math.sqrt(self.kW*self.kH)
+ end
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+end
+
+function SpatialSubSampling:updateOutput(input)
+ return input.nn.SpatialSubSampling_updateOutput(self, input)
+end
+
+function SpatialSubSampling:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ return input.nn.SpatialSubSampling_updateGradInput(self, input, gradOutput)
+ end
+end
+
+function SpatialSubSampling:accGradParameters(input, gradOutput, scale)
+ return input.nn.SpatialSubSampling_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/SpatialSubtractiveNormalization.lua b/SpatialSubtractiveNormalization.lua
new file mode 100644
index 0000000..4df0fc1
--- /dev/null
+++ b/SpatialSubtractiveNormalization.lua
@@ -0,0 +1,104 @@
+local SpatialSubtractiveNormalization, parent = torch.class('nn.SpatialSubtractiveNormalization','nn.Module')
+
+function SpatialSubtractiveNormalization:__init(nInputPlane, kernel)
+ parent.__init(self)
+
+ -- get args
+ self.nInputPlane = nInputPlane or 1
+ self.kernel = kernel or torch.Tensor(9,9):fill(1)
+ local kdim = self.kernel:nDimension()
+
+ -- check args
+ if kdim ~= 2 and kdim ~= 1 then
+ error('<SpatialSubtractiveNormalization> averaging kernel must be 2D or 1D')
+ end
+ if (self.kernel:size(1) % 2) == 0 or (kdim == 2 and (self.kernel:size(2) % 2) == 0) then
+ error('<SpatialSubtractiveNormalization> averaging kernel must have ODD dimensions')
+ end
+
+ -- normalize kernel
+ self.kernel:div(self.kernel:sumall() * self.nInputPlane)
+
+ -- padding values
+ local padH = math.floor(self.kernel:size(1)/2)
+ local padW = padH
+ if kdim == 2 then
+ padW = math.floor(self.kernel:size(2)/2)
+ end
+
+ -- create convolutional mean extractor
+ self.meanestimator = nn.Sequential()
+ self.meanestimator:add(nn.SpatialZeroPadding(padW, padW, padH, padH))
+ if kdim == 2 then
+ self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane),
+ self.kernel:size(2), self.kernel:size(1)))
+ else
+ self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane),
+ self.kernel:size(1), 1))
+ self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane),
+ 1, self.kernel:size(1)))
+ end
+ self.meanestimator:add(nn.Sum(1))
+ self.meanestimator:add(nn.Replicate(self.nInputPlane))
+
+ -- set kernel and bias
+ if kdim == 2 then
+ for i = 1,self.nInputPlane do
+ self.meanestimator.modules[2].weight[i] = self.kernel
+ end
+ self.meanestimator.modules[2].bias:zero()
+ else
+ for i = 1,self.nInputPlane do
+ self.meanestimator.modules[2].weight[i]:copy(self.kernel)
+ self.meanestimator.modules[3].weight[i]:copy(self.kernel)
+ end
+ self.meanestimator.modules[2].bias:zero()
+ self.meanestimator.modules[3].bias:zero()
+ end
+
+ -- other operation
+ self.subtractor = nn.CSubTable()
+ self.divider = nn.CDivTable()
+
+ -- coefficient array, to adjust side effects
+ self.coef = torch.Tensor(1,1,1)
+end
+
+function SpatialSubtractiveNormalization:updateOutput(input)
+ -- compute side coefficients
+ if (input:size(3) ~= self.coef:size(2)) or (input:size(2) ~= self.coef:size(1)) then
+ local ones = input.new():resizeAs(input):fill(1)
+ self.coef = self.meanestimator:updateOutput(ones)
+ self.coef = self.coef:clone()
+ end
+
+ -- compute mean
+ self.localsums = self.meanestimator:updateOutput(input)
+ self.adjustedsums = self.divider:updateOutput{self.localsums, self.coef}
+ self.output = self.subtractor:updateOutput{input, self.adjustedsums}
+
+ -- done
+ return self.output
+end
+
+function SpatialSubtractiveNormalization:updateGradInput(input, gradOutput)
+ -- resize grad
+ self.gradInput:resizeAs(input):zero()
+
+ -- backprop through all modules
+ local gradsub = self.subtractor:updateGradInput({input, self.adjustedsums}, gradOutput)
+ local graddiv = self.divider:updateGradInput({self.localsums, self.coef}, gradsub[2])
+ self.gradInput:add(self.meanestimator:updateGradInput(input, graddiv[1]))
+ self.gradInput:add(gradsub[1])
+
+ -- done
+ return self.gradInput
+end
+
+function SpatialSubtractiveNormalization:type(type)
+ parent.type(self,type)
+ self.meanestimator:type(type)
+ self.divider:type(type)
+ self.subtractor:type(type)
+ return self
+end
diff --git a/SpatialZeroPadding.lua b/SpatialZeroPadding.lua
new file mode 100644
index 0000000..af03e71
--- /dev/null
+++ b/SpatialZeroPadding.lua
@@ -0,0 +1,53 @@
+local SpatialZeroPadding, parent = torch.class('nn.SpatialZeroPadding', 'nn.Module')
+
+function SpatialZeroPadding:__init(pad_l, pad_r, pad_t, pad_b)
+ parent.__init(self)
+ self.pad_l = pad_l
+ self.pad_r = pad_r or self.pad_l
+ self.pad_t = pad_t or self.pad_l
+ self.pad_b = pad_b or self.pad_l
+end
+
+function SpatialZeroPadding:updateOutput(input)
+ if input:dim() ~= 3 then error('input must be 3-dimensional') end
+ local h = input:size(2) + self.pad_t + self.pad_b
+ local w = input:size(3) + self.pad_l + self.pad_r
+ if w < 1 or h < 1 then error('input is too small') end
+ self.output:resize(input:size(1), h, w)
+ self.output:zero()
+ -- crop input if necessary
+ local c_input = input
+ if self.pad_t < 0 then c_input = c_input:narrow(2, 1 - self.pad_t, c_input:size(2) + self.pad_t) end
+ if self.pad_b < 0 then c_input = c_input:narrow(2, 1, c_input:size(2) + self.pad_b) end
+ if self.pad_l < 0 then c_input = c_input:narrow(3, 1 - self.pad_l, c_input:size(3) + self.pad_l) end
+ if self.pad_r < 0 then c_input = c_input:narrow(3, 1, c_input:size(3) + self.pad_r) end
+ -- crop outout if necessary
+ local c_output = self.output
+ if self.pad_t > 0 then c_output = c_output:narrow(2, 1 + self.pad_t, c_output:size(2) - self.pad_t) end
+ if self.pad_b > 0 then c_output = c_output:narrow(2, 1, c_output:size(2) - self.pad_b) end
+ if self.pad_l > 0 then c_output = c_output:narrow(3, 1 + self.pad_l, c_output:size(3) - self.pad_l) end
+ if self.pad_r > 0 then c_output = c_output:narrow(3, 1, c_output:size(3) - self.pad_r) end
+ -- copy input to output
+ c_output:copy(c_input)
+ return self.output
+end
+
+function SpatialZeroPadding:updateGradInput(input, gradOutput)
+ if input:dim() ~= 3 then error('input must be 3-dimensional') end
+ self.gradInput:resizeAs(input):zero()
+ -- crop gradInput if necessary
+ local cg_input = self.gradInput
+ if self.pad_t < 0 then cg_input = cg_input:narrow(2, 1 - self.pad_t, cg_input:size(2) + self.pad_t) end
+ if self.pad_b < 0 then cg_input = cg_input:narrow(2, 1, cg_input:size(2) + self.pad_b) end
+ if self.pad_l < 0 then cg_input = cg_input:narrow(3, 1 - self.pad_l, cg_input:size(3) + self.pad_l) end
+ if self.pad_r < 0 then cg_input = cg_input:narrow(3, 1, cg_input:size(3) + self.pad_r) end
+ -- crop gradOutout if necessary
+ local cg_output = gradOutput
+ if self.pad_t > 0 then cg_output = cg_output:narrow(2, 1 + self.pad_t, cg_output:size(2) - self.pad_t) end
+ if self.pad_b > 0 then cg_output = cg_output:narrow(2, 1, cg_output:size(2) - self.pad_b) end
+ if self.pad_l > 0 then cg_output = cg_output:narrow(3, 1 + self.pad_l, cg_output:size(3) - self.pad_l) end
+ if self.pad_r > 0 then cg_output = cg_output:narrow(3, 1, cg_output:size(3) - self.pad_r) end
+ -- copy gradOuput to gradInput
+ cg_input:copy(cg_output)
+ return self.gradInput
+end
diff --git a/SplitTable.lua b/SplitTable.lua
new file mode 100644
index 0000000..d2c690e
--- /dev/null
+++ b/SplitTable.lua
@@ -0,0 +1,30 @@
+local SplitTable, parent = torch.class('nn.SplitTable', 'nn.Module')
+
+function SplitTable:__init(dimension)
+ parent.__init(self)
+ self.modules = {}
+ self.dimension = dimension
+end
+
+function SplitTable:updateOutput(input)
+ local currentOutput= {};
+ local slices = input:size(self.dimension)
+ for i=1,slices do
+ currentOutput[#currentOutput+1] = input:select(self.dimension,i)
+ end
+ self.output = currentOutput
+ return self.output
+end
+
+
+function SplitTable:updateGradInput(input, gradOutput)
+ local slices = input:size(self.dimension)
+ self.gradInput:resizeAs(input)
+
+ local offset = 1
+ for i=1,slices do
+ local currentGradInput = gradOutput[i];
+ self.gradInput:select(self.dimension,i):copy(currentGradInput)
+ end
+ return self.gradInput
+end
diff --git a/Sqrt.lua b/Sqrt.lua
new file mode 100644
index 0000000..664d434
--- /dev/null
+++ b/Sqrt.lua
@@ -0,0 +1,13 @@
+local Sqrt, parent = torch.class('nn.Sqrt','nn.Module')
+
+function Sqrt:__init(args)
+ parent.__init(self)
+end
+
+function Sqrt:updateOutput(input)
+ return input.nn.Sqrt_updateOutput(self,input)
+end
+
+function Sqrt:updateGradInput(input, gradOutput)
+ return input.nn.Sqrt_updateGradInput(self,input,gradOutput)
+end
diff --git a/Square.lua b/Square.lua
new file mode 100644
index 0000000..c1b80dc
--- /dev/null
+++ b/Square.lua
@@ -0,0 +1,13 @@
+local Square, parent = torch.class('nn.Square','nn.Module')
+
+function Square:__init(args)
+ parent.__init(self)
+end
+
+function Square:updateOutput(input)
+ return input.nn.Square_updateOutput(self, input)
+end
+
+function Square:updateGradInput(input, gradOutput)
+ return input.nn.Square_updateGradInput(self, input, gradOutput)
+end
diff --git a/StochasticGradient.lua b/StochasticGradient.lua
new file mode 100644
index 0000000..2d5e810
--- /dev/null
+++ b/StochasticGradient.lua
@@ -0,0 +1,57 @@
+local StochasticGradient = torch.class('nn.StochasticGradient')
+
+function StochasticGradient:__init(module, criterion)
+ self.learningRate = 0.01
+ self.learningRateDecay = 0
+ self.maxIteration = 25
+ self.shuffleIndices = true
+ self.module = module
+ self.criterion = criterion
+end
+
+function StochasticGradient:train(dataset)
+ local iteration = 1
+ local currentLearningRate = self.learningRate
+ local module = self.module
+ local criterion = self.criterion
+
+ local shuffledIndices = torch.randperm(dataset:size(), 'torch.LongTensor')
+ if not self.shuffleIndices then
+ for t = 1,dataset:size() do
+ shuffledIndices[t] = t
+ end
+ end
+
+ print("# StochasticGradient: training")
+
+ while true do
+ local currentError = 0
+ for t = 1,dataset:size() do
+ local example = dataset[shuffledIndices[t]]
+ local input = example[1]
+ local target = example[2]
+
+ currentError = currentError + criterion:forward(module:forward(input), target)
+
+ module:updateGradInput(input, criterion:updateGradInput(module.output, target))
+ module:accUpdateGradParameters(input, criterion.gradInput, currentLearningRate)
+
+ if self.hookExample then
+ self.hookExample(self, example)
+ end
+ end
+
+ if self.hookIteration then
+ self.hookIteration(self, iteration)
+ end
+
+ currentError = currentError / dataset:size()
+ print("# current error = " .. currentError)
+ iteration = iteration + 1
+ currentLearningRate = self.learningRate/(1+iteration*self.learningRateDecay)
+ if self.maxIteration > 0 and iteration > self.maxIteration then
+ print("# StochasticGradient: you have reached the maximum number of iterations")
+ break
+ end
+ end
+end
diff --git a/Sum.lua b/Sum.lua
new file mode 100644
index 0000000..b068e25
--- /dev/null
+++ b/Sum.lua
@@ -0,0 +1,27 @@
+local Sum, parent = torch.class('nn.Sum', 'nn.Module')
+
+function Sum:__init(dimension)
+ parent.__init(self)
+ dimension = dimension or 1
+ self.dimension = dimension
+end
+
+function Sum:updateOutput(input)
+ input.torch.sum(self.output, input, self.dimension)
+ self.output = self.output:select(self.dimension, 1)
+ return self.output
+end
+
+function Sum:updateGradInput(input, gradOutput)
+ local size = gradOutput:size():totable()
+ local stride = gradOutput:stride():totable()
+ table.insert(size, self.dimension, input:size(self.dimension))
+ table.insert(stride, self.dimension, 0)
+
+ self.gradInput:set(gradOutput:storage(),
+ 1,
+ torch.LongStorage(size),
+ torch.LongStorage(stride))
+
+ return self.gradInput
+end
diff --git a/Tanh.lua b/Tanh.lua
new file mode 100644
index 0000000..b6cf1bf
--- /dev/null
+++ b/Tanh.lua
@@ -0,0 +1,9 @@
+local Tanh = torch.class('nn.Tanh', 'nn.Module')
+
+function Tanh:updateOutput(input)
+ return input.nn.Tanh_updateOutput(self, input)
+end
+
+function Tanh:updateGradInput(input, gradOutput)
+ return input.nn.Tanh_updateGradInput(self, input, gradOutput)
+end
diff --git a/TemporalConvolution.lua b/TemporalConvolution.lua
new file mode 100644
index 0000000..a3aaa7f
--- /dev/null
+++ b/TemporalConvolution.lua
@@ -0,0 +1,51 @@
+local TemporalConvolution, parent = torch.class('nn.TemporalConvolution', 'nn.Module')
+
+function TemporalConvolution:__init(inputFrameSize, outputFrameSize, kW, dW)
+ parent.__init(self)
+
+ dW = dW or 1
+
+ self.inputFrameSize = inputFrameSize
+ self.outputFrameSize = outputFrameSize
+ self.kW = kW
+ self.dW = dW
+
+ self.weight = torch.Tensor(outputFrameSize, inputFrameSize*kW)
+ self.bias = torch.Tensor(outputFrameSize)
+ self.gradWeight = torch.Tensor(outputFrameSize, inputFrameSize*kW)
+ self.gradBias = torch.Tensor(outputFrameSize)
+
+ self:reset()
+end
+
+function TemporalConvolution:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1/math.sqrt(self.kW*self.inputFrameSize)
+ end
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+end
+
+function TemporalConvolution:updateOutput(input)
+ return input.nn.TemporalConvolution_updateOutput(self, input)
+end
+
+function TemporalConvolution:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ return input.nn.TemporalConvolution_updateGradInput(self, input, gradOutput)
+ end
+end
+
+function TemporalConvolution:accGradParameters(input, gradOutput, scale)
+ scale = scale or 1
+ input.nn.TemporalConvolution_accGradParameters(self, input, gradOutput, scale)
+end
+
+-- we do not need to accumulate parameters when sharing
+TemporalConvolution.sharedAccUpdateGradParameters = TemporalConvolution.accUpdateGradParameters
diff --git a/TemporalSubSampling.lua b/TemporalSubSampling.lua
new file mode 100644
index 0000000..3d06f6e
--- /dev/null
+++ b/TemporalSubSampling.lua
@@ -0,0 +1,48 @@
+local TemporalSubSampling, parent = torch.class('nn.TemporalSubSampling', 'nn.Module')
+
+function TemporalSubSampling:__init(inputFrameSize, kW, dW)
+ parent.__init(self)
+
+ dW = dW or 1
+
+ self.inputFrameSize = inputFrameSize
+ self.kW = kW
+ self.dW = dW
+
+ self.weight = torch.Tensor(inputFrameSize)
+ self.bias = torch.Tensor(inputFrameSize)
+ self.gradWeight = torch.Tensor(inputFrameSize)
+ self.gradBias = torch.Tensor(inputFrameSize)
+
+ self:reset()
+end
+
+function TemporalSubSampling:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1/math.sqrt(self.kW)
+ end
+
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+end
+
+function TemporalSubSampling:updateOutput(input)
+ return input.nn.TemporalSubSampling_updateOutput(self, input)
+end
+
+function TemporalSubSampling:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ return input.nn.TemporalSubSampling_updateGradInput(self, input, gradOutput)
+ end
+end
+
+function TemporalSubSampling:accGradParameters(input, gradOutput, scale)
+ return input.nn.TemporalSubSampling_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/Threshold.lua b/Threshold.lua
new file mode 100644
index 0000000..6083957
--- /dev/null
+++ b/Threshold.lua
@@ -0,0 +1,20 @@
+local Threshold, parent = torch.class('nn.Threshold','nn.Module')
+
+function Threshold:__init(th,v)
+ parent.__init(self)
+ self.threshold = th or 1e-6
+ self.val = v or 0
+ if (th and type(th) ~= 'number') or (v and type(v) ~= 'number') then
+ error('nn.Threshold(threshold, value)')
+ end
+end
+
+function Threshold:updateOutput(input)
+ input.nn.Threshold_updateOutput(self, input)
+ return self.output
+end
+
+function Threshold:updateGradInput(input, gradOutput)
+ input.nn.Threshold_updateGradInput(self, input, gradOutput)
+ return self.gradInput
+end
diff --git a/VolumetricConvolution.lua b/VolumetricConvolution.lua
new file mode 100644
index 0000000..4262199
--- /dev/null
+++ b/VolumetricConvolution.lua
@@ -0,0 +1,51 @@
+local VolumetricConvolution, parent = torch.class('nn.VolumetricConvolution', 'nn.Module')
+
+function VolumetricConvolution:__init(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH)
+ parent.__init(self)
+
+ dT = dT or 1
+ dW = dW or 1
+ dH = dH or 1
+
+ self.nInputPlane = nInputPlane
+ self.nOutputPlane = nOutputPlane
+ self.kT = kT
+ self.kW = kW
+ self.kH = kH
+ self.dT = dT
+ self.dW = dW
+ self.dH = dH
+
+ self.weight = torch.Tensor(nOutputPlane, nInputPlane, kT, kH, kW)
+ self.bias = torch.Tensor(nOutputPlane)
+ self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane, kT, kH, kW)
+ self.gradBias = torch.Tensor(nOutputPlane)
+
+ self:reset()
+end
+
+function VolumetricConvolution:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1/math.sqrt(self.kT*self.kW*self.kH*self.nInputPlane)
+ end
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+end
+
+function VolumetricConvolution:updateOutput(input)
+ return input.nn.VolumetricConvolution_updateOutput(self, input)
+end
+
+function VolumetricConvolution:updateGradInput(input, gradOutput)
+ return input.nn.VolumetricConvolution_updateGradInput(self, input, gradOutput)
+end
+
+function VolumetricConvolution:accGradParameters(input, gradOutput, scale)
+ return input.nn.VolumetricConvolution_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/WeightedEuclidean.lua b/WeightedEuclidean.lua
new file mode 100644
index 0000000..2761228
--- /dev/null
+++ b/WeightedEuclidean.lua
@@ -0,0 +1,85 @@
+local WeightedEuclidean, parent = torch.class('nn.WeightedEuclidean', 'nn.Module')
+
+function WeightedEuclidean:__init(inputSize,outputSize)
+ parent.__init(self)
+
+ self.templates = torch.Tensor(inputSize,outputSize)
+ self.gradTemplates = torch.Tensor(inputSize,outputSize)
+
+ self.diagCov = torch.Tensor(inputSize,outputSize)
+ self.gradDiagCov = torch.Tensor(inputSize,outputSize)
+
+ self.gradInput:resize(inputSize)
+ self.output:resize(outputSize)
+ self.temp = torch.Tensor(inputSize)
+
+ -- for compat with Torch's modules (it's bad we have to do that)
+ do
+ self.weight = self.templates
+ self.gradWeight = self.gradTemplates
+ self.bias = self.diagCov
+ self.gradBias = self.gradDiagCov
+ end
+
+ self:reset()
+end
+
+function WeightedEuclidean:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ stdv = 1./math.sqrt(self.templates:size(1))
+ end
+
+ for i=1,self.templates:size(2) do
+ self.templates:select(2, i):apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ end
+
+ self.diagCov:fill(1)
+end
+
+function WeightedEuclidean:updateOutput(input)
+ self.output:zero()
+ for o = 1,self.templates:size(2) do
+ self.temp:copy(input):add(-1,self.templates:select(2,o))
+ self.temp:cmul(self.temp)
+ self.temp:cmul(self.diagCov:select(2,o)):cmul(self.diagCov:select(2,o))
+ self.output[o] = math.sqrt(self.temp:sumall())
+ end
+ return self.output
+end
+
+function WeightedEuclidean:updateGradInput(input, gradOutput)
+ self:forward(input)
+ self.gradInput:zero()
+ for o = 1,self.templates:size(2) do
+ if self.output[o] ~= 0 then
+ self.temp:copy(input):add(-1,self.templates:select(2,o))
+ self.temp:cmul(self.diagCov:select(2,o)):cmul(self.diagCov:select(2,o))
+ self.temp:mul(gradOutput[o]/self.output[o])
+ self.gradInput:add(self.temp)
+ end
+ end
+ return self.gradInput
+end
+
+function WeightedEuclidean:accGradParameters(input, gradOutput, scale)
+ self:forward(input)
+ scale = scale or 1
+ for o = 1,self.templates:size(2) do
+ if self.output[o] ~= 0 then
+ self.temp:copy(self.templates:select(2,o)):add(-1,input)
+ self.temp:cmul(self.diagCov:select(2,o)):cmul(self.diagCov:select(2,o))
+ self.temp:mul(gradOutput[o]/self.output[o])
+ self.gradTemplates:select(2,o):add(self.temp)
+
+ self.temp:copy(self.templates:select(2,o)):add(-1,input)
+ self.temp:cmul(self.temp)
+ self.temp:cmul(self.diagCov:select(2,o))
+ self.temp:mul(gradOutput[o]/self.output[o])
+ self.gradDiagCov:select(2,o):add(self.temp)
+ end
+ end
+end
diff --git a/dok/abs.png b/dok/abs.png
new file mode 100644
index 0000000..fa7f470
--- /dev/null
+++ b/dok/abs.png
Binary files differ
diff --git a/dok/exp.png b/dok/exp.png
new file mode 100644
index 0000000..07d28d4
--- /dev/null
+++ b/dok/exp.png
Binary files differ
diff --git a/dok/hshrink.png b/dok/hshrink.png
new file mode 100644
index 0000000..7f96292
--- /dev/null
+++ b/dok/hshrink.png
Binary files differ
diff --git a/dok/htanh.png b/dok/htanh.png
new file mode 100644
index 0000000..c8e6084
--- /dev/null
+++ b/dok/htanh.png
Binary files differ
diff --git a/dok/index.dok b/dok/index.dok
new file mode 100644
index 0000000..ded5265
--- /dev/null
+++ b/dok/index.dok
@@ -0,0 +1,3053 @@
+====== Neural Network Package =======
+{{anchor:nn.dok}}
+
+This package provides an easy way to build and train simple or complex
+neural networks.
+
+Each module of a network is composed of [[#nn.Modules|Modules]] and there
+are several sub-classes of ''Module'' available: container classes like
+[[#nn.Sequential|Sequential]], [[#nn.Parallel|Parallel]] and
+[[#nn.Concat|Concat]] , which can contain simple layers like
+[[#nn.Linear|Linear]], [[#nn.Mean|Mean]], [[#nn.Max|Max]] and
+[[#nn.Reshape|Reshape]], as well as convolutional layers, and transfer
+functions like [[#nn.Tanh|Tanh]].
+
+Loss functions are implemented as sub-classes of
+[[#nn.Criterions|Criterion]]. They are helpful to train neural network on
+classical tasks. Common criterions are the Mean Squared Error
+criterion implemented in [[#nn.MSECriterion|MSECriterion]] and the
+cross-entropy criterion implemented in
+[[#nn.ClassNLLCriterion|ClassNLLCriterion]].
+
+Finally, the [[#nn.StochasticGradient|StochasticGradient]] class provides a
+high level way to train the neural network of choice, even though it is
+easy with a simple for loop to [[#nn.DoItYourself|train a neural network yourself]].
+
+For those who want to implement their own modules, we suggest using
+the ''nn.Jacobian'' class for testing the derivatives of their class,
+together with the [[..:torch:tester|torch.Tester]] class. The sources
+of ''nn'' package contains sufficiently many examples of such tests.
+
+
+====== Detailed Overview of the Neural Network Package ======
+{{anchor:nn.overview.dok}}
+
+**Module**
+
+A neural network is called a [[#nn.Module|Module]] (or simply
+//module// in this documentation) in Torch. ''Module'' is an abstract
+class which defines four main methods:
+ * [[#nn.Module.forward|forward(input)]] which computes the output of the module given the ''input'' [[..:torch:tensor|Tensor]].
+ * [[#nn.Module.backward|backward(input, gradOutput)]] which computes the gradients of the module with respect to its own parameters, and its own inputs.
+ * [[#nn.Module.zeroGradParameters|zeroGradParameters()]] which zeroes the gradient with respect to the parameters of the module.
+ * [[#nn.Module.updateParameters|updateParameters(learningRate)]] which updates the parameters after one has computed the gradients with ''backward()''
+
+It also declares two members:
+ * [[#nn.Module.output|output]] which is the output returned by ''forward()''.
+ * [[#nn.Module.gradInput|gradInput]] which contains the gradients with respect to the input of the module, computed in a ''backward()''.
+
+Two other perhaps less used but handy methods are also defined:
+ * [[#nn.Module.share|share(mlp,s1,s2,...,sn)]] which makes this module share the parameters s1,..sn of the module ''mlp''. This is useful if you want to have modules that share the same weights.
+ * [[#nn.Module.clone|clone(...)]] which produces a deep copy of (i.e. not just a pointer to) this Module, including the current state of its parameters (if any).
+
+Some important remarks:
+ * ''output'' contains only valid values after a [[#nn.Module.forward|forward(input)]].
+ * ''gradInput'' contains only valid values after a [[#nn.Module.backward|backward(input, gradOutput)]].
+ * [[#nn.Module.backward|backward(input, gradOutput)]] uses certain computations obtained during [[#nn.Module.forward|forward(input)]]. You //must// call ''forward()'' before calling a ''backward()'', on the //same// ''input'', or your gradients are going to be incorrect!
+
+
+**Plug and play**
+
+Building a simple neural network can be achieved by constructing an available layer.
+A linear neural network (perceptron!) is built only in one line:
+<file lua>
+nn = nn.Linear(10,1) -- perceptron with 10 inputs
+</file>
+
+More complex neural networks are easily built using container classes
+[[#nn.Sequential|Sequential]] and [[#nn.Concat|Concat]]. ''Sequential'' plugs
+layer in a feed-forward fully connected manner. ''Concat'' concatenates in
+one layer several modules: they take the same inputs, and their output is
+concatenated.
+
+Creating a one hidden-layer multi-layer perceptron is thus just as easy as:
+<file lua>
+mlp = nn.Sequential()
+mlp:add( nn.Linear(10, 25) ) -- 10 input, 25 hidden units
+mlp:add( nn.Tanh() ) -- some hyperbolic tangent transfer function
+mlp:add( nn.Linear(25, 1) ) -- 1 output
+</file>
+
+Of course, ''Sequential'' and ''Concat'' can contains other
+''Sequential'' or ''Concat'', allowing you to try the craziest neural
+networks you ever dreamt of! See the [[#nn.Modules|complete list of
+available modules]].
+
+**Training a neural network**
+
+Once you built your neural network, you have to choose a particular
+[[#nn.Criterions|Criterion]] to train it. A criterion is a class which
+describes the cost to be minimized during training.
+
+You can then train the neural network by using the
+[[#nn.StochasticGradient|StochasticGradient]] class.
+
+<file lua>
+ criterion = nn.MSECriterion() -- Mean Squared Error criterion
+ trainer = nn.StochasticGradient(mlp, criterion)
+ trainer:train(dataset) -- train using some examples
+</file>
+
+StochasticGradient expect as a ''dataset'' an object which implements
+the operator ''dataset[index]'' and implements the method
+''dataset:size()''. The ''size()'' methods returns the number of
+examples and ''dataset[i]'' has to return the i-th example.
+
+An ''example'' has to be an object which implements the operator
+''example[field]'', where ''field'' might take the value ''1'' (input
+features) or ''2'' (corresponding label which will be given to the
+criterion). The input is usually a Tensor (except if you use special
+kind of gradient modules, like [[#nn.TableLayers|table layers]]). The
+label type depends of the criterion. For example, the
+[[#nn.MSECriterion|MSECriterion]] expect a Tensor, but the
+[[#nn.ClassNLLCriterion|ClassNLLCriterion]] except a integer number (the
+class).
+
+Such a dataset is easily constructed by using Lua tables, but it could
+any ''C'' object for example, as long as required operators/methods
+are implemented. [[#nn.DoItStochasticGradient|See an example]].
+
+''StochasticGradient'' being written in ''Lua'', it is extremely easy
+to cut-and-paste it and create a variant to it adapted to your needs
+(if the constraints of ''StochasticGradient'' do not satisfy you).
+
+**Low Level Training Of a Neural Network**
+
+If you want to program the ''StochasticGradient'' by hand, you
+essentially need to control the use of forwards and backwards through
+the network yourself. For example, here is the code fragment one
+would need to make a gradient step given an input ''x'', a desired
+output ''y'', a network ''mlp'' and a given criterion ''criterion''
+and learning rate ''learningRate'':
+
+<file lua>
+function gradUpdate(mlp, x, y, criterion, learningRate)
+ local pred = mlp:forward(x)
+ local err = criterion:forward(pred, y)
+ local gradCriterion = criterion:backward(pred, y)
+ mlp:zeroGradParameters()
+ mlp:backward(x, gradCriterion)
+ mlp:updateParameters(learningRate)
+end
+</file>
+For example, if you wish to use your own criterion you can simple replace
+''gradCriterion'' with the gradient vector of your criterion of choice.
+
+
+====== Modules ======
+{{anchor:nn.Modules}}
+
+Modules are bricks to build neural networks. A [[#nn.Module|Module]] is a neural network
+by itself, but it can be combined with other networks using [[#nn.Containers|container classes]] to create
+complex neural networks.
+
+===== Module =====
+{{anchor:nn.Module}}
+
+''Module'' is an abstract class which defines fundamental methods necessary
+for a training a neural network. Modules are [[..:torch:file#torch.file.serialization|serializable]].
+
+Modules contain two states variables: [[#nn.ModuleOutput|output]] and
+[[#nn.ModuleGradInput|gradInput]].
+
+==== [output] forward(input) ====
+{{anchor:nn.Module.forward}}
+
+Takes an ''input'' object, and computes the corresponding ''output'' of the
+module. In general ''input'' and ''output'' are
+[[..:torch:tensor|Tensors]]. However, some special sub-classes
+like [[#nn.TableLayers|table layers]] might expect something else. Please,
+refer to each module specification for further information.
+
+After a ''forward()'', the [[#nn.ModuleOutput|ouput]] state variable should
+have been updated to the new value.
+
+It is not advised to override this function. Instead, one should
+implement [[#nn.Module.updateOutput|updateOutput(input)]]
+function. The forward module in the abstract parent class
+[[#nn.Module|Module]] will call ''updateOutput(input)''.
+
+==== [gradInput] backward(input, gradOutput) ====
+{{anchor:nn.Module.backward}}
+
+Performs a //backpropagation step// through the module, with respect to the
+given ''input''. In general this method makes the assumption
+[[#nn.Module.forward|forward(input)]] has been called before, //with the same input//.
+This is necessary for optimization reasons. If you do not respect
+this rule, ''backward()'' will compute incorrect gradients.
+
+In general ''input'' and ''gradOutput'' and ''gradInput'' are
+[[..:torch:tensor|Tensors]]. However, some special sub-classes
+like [[#nn.TableLayers|table layers]] might expect something else. Please,
+refer to each module specification for further information.
+
+A //backpropagation step// consist in computing two kind of gradients
+at ''input'' given ''gradOutput'' (gradients with respect to the
+output of the module). This function simply performs this task using
+two function calls:
+
+ - A function call to [[#nn.Module.updateGradInput|updateGradInput(input, gradOutput)]].
+ - A function call to [[#nn.Module.accGradParameters|accGradParameters(input,gradOutput)]].
+
+It is not advised to override this function call in custom classes. It
+is better to override
+[[#nn.Module.updateGradInput|updateGradInput(input, gradOutput)]] and
+[[#nn.Module.accGradParameters|accGradParameters(input, gradOutput)]]
+functions.
+
+==== updateOutput(input) ====
+{{anchor:nn.Module.updateOutput}}
+
+Computes the output using the current parameter set of the class and
+input. This function returns the result which is stored in the
+[[#nn.Module.output|output]] field.
+
+==== updateGradInput(input, gradOutput) ====
+{{anchor:nn.Module.updateGradInput}}
+
+Computing the gradient of the module with respect to its own
+input. This is returned in ''gradInput''. Also, the
+[[#nn.Module.gradInput|gradInput]] state variable is updated
+accordingly.
+
+==== accGradParameters(input, gradOutput) ====
+{{anchor:nn.Module.accGradParameters}}
+
+Computing the gradient of the module with respect to its
+ownparameters. Many modules do not perform this step as they do not
+have any parameters. The state variable name for the parameters is
+module dependent. The module is expected to //accumulate// the
+gradients with respect to the parameters in some variable.
+
+Zeroing this accumulation is achieved with
+[[#nn.Module.zeroGradParameters|zeroGradParameters()]] and updating
+the parameters according to this accumulation is done with
+[[#nn.Module.updateParameters|updateParameters()]].
+
+==== zeroGradParameters() ====
+{{anchor:nn.Module.zeroGradParameters}}
+
+If the module has parameters, this will zero the accumulation of the
+gradients with respect to these parameters, accumulated through
+[[#nn.Module.accGradParameters|accGradParameters(input, gradOutput)]]
+calls. Otherwise, it does nothing.
+
+==== updateParameters(learningRate) ====
+{{anchor:nn.Module.updateParameters}}
+
+If the module has parameters, this will update these parameters, according
+to the accumulation of the gradients with respect to these parameters,
+accumulated through [[#nn.Module.backward|backward()]] calls.
+
+The update is basically:
+<file lua>
+parameters = parameters - learningRate * gradients_wrt_parameters
+</file>
+If the module does not have parameters, it does nothing.
+
+==== accUpdateGradParameters(input, gradOutput, learningRate) ====
+{{anchor:nn.Module.accUpdateGradParameters}}
+
+This is a convenience module that performs two functions at
+once. Calculates and accumulates the gradients with respect to the
+weights after mutltiplying with negative of the learning rate
+''learningRate''. Performing these two operations at once is more
+performance efficient and it might be advantageous in certain
+situations.
+
+Keep in mind that, this function uses a simple trick to achieve its
+goal and it might not be valid for a custom module.
+
+<file lua>
+function Module:accUpdateGradParameters(input, gradOutput, lr)
+ local gradWeight = self.gradWeight
+ local gradBias = self.gradBias
+ self.gradWeight = self.weight
+ self.gradBias = self.bias
+ self:accGradParameters(input, gradOutput, -lr)
+ self.gradWeight = gradWeight
+ self.gradBias = gradBias
+end
+</file>
+
+As it can be seen, the gradients are accumulated directly into
+weights. This assumption may not be true for a module that computes a
+nonlinear operation.
+
+==== share(mlp,s1,s2,...,sn) ====
+{{anchor:nn.Module.share}}
+
+This function modifies the parameters of the module named
+''s1'',..''sn'' (if they exist) so that they are shared with (pointers
+to) the parameters with the same names in the given module ''mlp''.
+
+The parameters have to be Tensors. This function is typically used if
+you want to have modules that share the same weights or biases.
+
+Note that this function if called on a [[#nn.Containers|Container]]
+module will share the same parameters for all the contained modules as
+well.
+
+Example:
+<file lua>
+
+-- make an mlp
+mlp1=nn.Sequential();
+mlp1:add(nn.Linear(100,10));
+
+-- make a second mlp
+mlp2=nn.Sequential();
+mlp2:add(nn.Linear(100,10));
+
+-- the second mlp shares the bias of the first
+mlp2:share(mlp1,'bias');
+
+-- we change the bias of the first
+mlp1:get(1).bias[1]=99;
+
+-- and see that the second one's bias has also changed..
+print(mlp2:get(1).bias[1])
+
+</file>
+
+
+==== clone(mlp,...) ====
+{{anchor:nn.Module.clone}}
+
+Creates a deep copy of (i.e. not just a pointer to) the module,
+including the current state of its parameters (e.g. weight, biases
+etc., if any).
+
+If arguments are provided to the ''clone(...)'' function it also calls
+[[#nn.Module.share|share(...)]] with those arguments on the cloned
+module after creating it, hence making a deep copy of this module with
+some shared parameters.
+
+Example:
+<file lua>
+-- make an mlp
+mlp1=nn.Sequential();
+mlp1:add(nn.Linear(100,10));
+
+-- make a copy that shares the weights and biases
+mlp2=mlp1:clone('weight','bias');
+
+-- we change the bias of the first mlp
+mlp1:get(1).bias[1]=99;
+
+-- and see that the second one's bias has also changed..
+print(mlp2:get(1).bias[1])
+
+</file>
+
+==== type(type) ====
+{{anchor:nn.Module.type}}
+
+This function converts all the parameters of a module to the given
+''type''. The ''type'' can be one of the types defined for
+[[..:torch:tensor|torch.Tensor]].
+
+==== float() ====
+{{anchor:nn.Module.float}}
+
+Convenience method for calling [[#nn.Module.type|module:type('torch.FloatTensor')]]
+
+==== double() ====
+{{anchor:nn.Module.double}}
+
+Convenience method for calling [[#nn.Module.type|module:type('torch.DoubleTensor')]]
+
+==== cuda() ====
+{{anchor:nn.Module.cuda}}
+
+Convenience method for calling [[#nn.Module.type|module:type('torch.CudaTensor')]]
+
+==== State Variables ====
+{{anchor:nn.statevars.dok}}
+
+These state variables are useful objects if one wants to check the guts of
+a ''Module''. The object pointer is //never// supposed to change. However, its
+contents (including its size if it is a Tensor) are supposed to change.
+
+In general state variables are
+[[..:torch:tensor|Tensors]]. However, some special sub-classes
+like [[#nn.TableLayers|table layers]] contain something else. Please,
+refer to each module specification for further information.
+
+=== output ===
+{{anchor:nn.Module.output}}
+
+This contains the output of the module, computed with the last call of
+[[#nn.Module.forward|forward(input)]].
+
+=== gradInput ===
+{{anchor:nn.Module.gradInput}}
+
+This contains the gradients with respect to the inputs of the module, computed with the last call of
+[[#nn.Module.updateGradInput|updateGradInput(input, gradOutput)]].
+
+==== Parameters and gradients w.r.t parameters ====
+
+Some modules contain parameters (the ones that we actually want to
+train!). The name of these parameters, and gradients w.r.t these parameters
+are module dependent.
+
+==== [{weights}, {gradWeights}] parameters() ====
+{{anchor:nn.Module.parameters}}
+
+This function should returns two tables. One for the learnable
+parameters ''{weights}'' and another for the gradients of the energy
+wrt to the learnable parameters ''{gradWeights}''.
+
+For custom modules, it is a good idea to also override this
+function. By default none of the built-in functions/modules use this
+function call, but it is especialy useful when one wants to obtain a
+global view of the whole network.
+
+===== Containers =====
+{{anchor:nn.Containers}}
+
+==== Concat ====
+{{anchor:nn.Concat}}
+
+<file lua>
+module = nn.Concat(dim)
+</file>
+Concat concatenates the output of one layer of "parallel" modules along the
+provided dimension ''dim'': they take the same inputs, and their output is
+concatenated.
+<file lua>
+mlp=nn.Concat(1);
+mlp:add(nn.Linear(5,3))
+mlp:add(nn.Linear(5,7))
+require "lab"
+print(mlp:forward(lab.randn(5)))
+</file>
+which gives the output:
+<file lua>
+ 0.7486
+ 0.1349
+ 0.7924
+-0.0371
+-0.4794
+ 0.3044
+-0.0835
+-0.7928
+ 0.7856
+-0.1815
+[torch.Tensor of dimension 10]
+</file>
+
+
+==== Sequential ====
+{{anchor:nn.Sequential}}
+
+Sequential provides a means to plug layers together
+in a feed-forward fully connected manner.
+
+E.g.
+creating a one hidden-layer multi-layer perceptron is thus just as easy as:
+<file lua>
+mlp = nn.Sequential()
+mlp:add( nn.Linear(10, 25) ) -- 10 input, 25 hidden units
+mlp:add( nn.Tanh() ) -- some hyperbolic tangent transfer function
+mlp:add( nn.Linear(25, 1) ) -- 1 output
+
+require "lab"
+print(mlp:forward(lab.randn(10)))
+</file>
+which gives the output:
+<file lua>
+-0.1815
+[torch.Tensor of dimension 1]
+</file>
+
+==== Parallel ====
+{{anchor:nn.Parallel}}
+
+''module'' = ''Parallel(inputDimension,outputDimension)''
+
+Creates a container module that applies its ''ith'' child module to the ''ith'' slice of the input Tensor by using [[..:torch:tensor#torch.tensor.select|select]]
+on dimension ''inputDimension''. It concatenates the results of its contained modules together along dimension ''outputDimension''.
+
+Example:
+<file lua>
+ require "lab"
+ mlp=nn.Parallel(2,1); -- iterate over dimension 2 of input
+ mlp:add(nn.Linear(10,3)); -- apply to first slice
+ mlp:add(nn.Linear(10,2)) -- apply to first second slice
+ print(mlp:forward(lab.randn(10,2)))
+</file>
+gives the output:
+<file lua>
+-0.5300
+-1.1015
+ 0.7764
+ 0.2819
+-0.6026
+[torch.Tensor of dimension 5]
+</file>
+
+A more complicated example:
+<file lua>
+require "lab"
+
+mlp=nn.Sequential();
+c=nn.Parallel(1,2)
+for i=1,10 do
+ local t=nn.Sequential()
+ t:add(nn.Linear(3,2))
+ t:add(nn.Reshape(2,1))
+ c:add(t)
+end
+mlp:add(c)
+
+pred=mlp:forward(lab.randn(10,3))
+print(pred)
+
+for i=1,10000 do -- Train for a few iterations
+ x=lab.randn(10,3);
+ y=lab.ones(2,10);
+ pred=mlp:forward(x)
+
+ criterion= nn.MSECriterion()
+ local err=criterion:forward(pred,y)
+ local gradCriterion = criterion:backward(pred,y);
+ mlp:zeroGradParameters();
+ mlp:backward(x, gradCriterion);
+ mlp:updateParameters(0.01);
+ print(err)
+end
+</file>
+===== Simple layers =====
+{{anchor:nn.simplelayers.dok}}
+==== Linear ====
+{{anchor:nn.Linear}}
+
+''module'' = ''Linear(inputDimension,outputDimension)''
+
+Applies a linear transformation to the incoming data, i.e. //y=
+Ax+b//. The ''input'' tensor given in ''forward(input)'' must be
+either a vector (1D tensor) or matrix (2D tensor). If the input is a
+matrix, then each row is assumed to be an input sample of given batch.
+
+You can create a layer in the following way:
+<file lua>
+ module= nn.Linear(10,5) -- 10 inputs, 5 outputs
+</file>
+Usually this would be added to a network of some kind, e.g.:
+<file lua>
+ mlp = nn.Sequential();
+ mlp:add(module)
+</file>
+The weights and biases (//A// and //b//) can be viewed with:
+<file lua>
+ print(module.weight)
+ print(module.bias)
+</file>
+The gradients for these weights can be seen with:
+<file lua>
+ print(module.gradWeight)
+ print(module.gradBias)
+</file>
+As usual with ''nn'' modules,
+applying the linear transformation is performed with:
+<file lua>
+ x=torch.Tensor(10) -- 10 inputs
+ y=module:forward(x)
+</file>
+
+==== SparseLinear ====
+{{anchor:nn.SparseLinear}}
+
+''module'' = ''SparseLinear(inputDimension,outputDimension)''
+
+Applies a linear transformation to the incoming sparse data, i.e.
+//y= Ax+b//. The ''input'' tensor given in ''forward(input)'' must
+be a sparse vector represented as 2D tensor of the form
+torch.Tensor(N, 2) where the pairs represent indices and values.
+The SparseLinear layer is useful when the number of input
+dimensions is very large and the input data is sparse.
+
+You can create a sparse linear layer in the following way:
+
+<file lua>
+ module= nn.SparseLinear(10000,2) -- 10000 inputs, 2 outputs
+</file>
+The sparse linear module may be used as part of a larger network,
+and apart from the form of the input,
+[[#nn.SparseLinear|SparseLinear]]
+operates in exactly the same way as the [[#nn.Linear|Linear]] layer.
+
+A sparse input vector may be created as so..
+<file lua>
+
+ x=lab.new({1, 0.1},{2, 0.3},{10, 0.3},{31, 0.2})
+
+ print(x)
+
+ 1.0000 0.1000
+ 2.0000 0.3000
+ 10.0000 0.3000
+ 31.0000 0.2000
+[torch.Tensor of dimension 4x2]
+
+</file>
+
+The first column contains indices, the second column contains
+values in a a vector where all other elements are zeros. The
+indices should not exceed the stated dimesions of the input to the
+layer (10000 in the example).
+
+==== Abs ====
+{{anchor:nn.Abs}}
+
+''module'' = ''Abs()''
+
+''output = abs(input)''.
+
+<file lua>
+m=nn.Abs()
+ii=lab.linspace(-5,5)
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+
+{{abs.png?400}}
+
+==== Add ====
+{{anchor:nn.Add }}
+
+''module'' = ''Add(inputDimension,scalar)''
+
+Applies a bias term to the incoming data, i.e.
+//y_i= x_i + b_i, or if _scalar=true// then uses a single bias term,
+_y_i= x_i + b.
+
+Example:
+<file lua>
+y=torch.Tensor(5);
+mlp=nn.Sequential()
+mlp:add(nn.Add(5))
+
+function gradUpdate(mlp, x, y, criterion, learningRate)
+ local pred = mlp:forward(x)
+ local err = criterion:forward(pred, y)
+ local gradCriterion = criterion:backward(pred, y)
+ mlp:zeroGradParameters()
+ mlp:backward(x, gradCriterion)
+ mlp:updateParameters(learningRate)
+ return err
+end
+
+for i=1,10000 do
+ x=lab.rand(5)
+ y:copy(x);
+ for i=1,5 do y[i]=y[i]+i; end
+ err=gradUpdate(mlp,x,y,nn.MSECriterion(),0.01)
+end
+print(mlp:get(1).bias)
+</file>
+gives the output:
+<file lua>
+ 1.0000
+ 2.0000
+ 3.0000
+ 4.0000
+ 5.0000
+[torch.Tensor of dimension 5]
+</file>
+i.e. the network successfully learns the input //x// has been shifted
+to produce the output //y//.
+
+
+==== Mul ====
+{{anchor:nn.Mul}}
+
+''module'' = ''Mul(inputDimension)''
+
+Applies a //single// scaling factor to the incoming data, i.e.
+//y= w x//, where //w// is a scalar.
+
+Example:
+<file lua>
+y=torch.Tensor(5);
+mlp=nn.Sequential()
+mlp:add(nn.Mul(5))
+
+function gradUpdate(mlp, x, y, criterion, learningRate)
+ local pred = mlp:forward(x)
+ local err = criterion:forward(pred,y)
+ local gradCriterion = criterion:backward(pred,y);
+ mlp:zeroGradParameters();
+ mlp:backward(x, gradCriterion);
+ mlp:updateParameters(learningRate);
+ return err
+end
+
+
+for i=1,10000 do
+ x=lab.rand(5)
+ y:copy(x); y:mul(math.pi);
+ err=gradUpdate(mlp,x,y,nn.MSECriterion(),0.01)
+end
+print(mlp:get(1).weight)
+</file>
+gives the output:
+<file lua>
+ 3.1416
+[torch.Tensor of dimension 1]
+</file>
+i.e. the network successfully learns the input ''x'' has been scaled by
+pi.
+
+==== CMul ====
+{{anchor:nn.CMul }}
+
+''module'' = ''CMul(inputDimension)''
+
+Applies a component-wise multiplication to the incoming data, i.e.
+''y_i'' = ''w_i'' =x_i=.
+
+Example:
+<file lua>
+mlp=nn.Sequential()
+mlp:add(nn.CMul(5))
+
+y=torch.Tensor(5);
+sc=torch.Tensor(5); for i=1,5 do sc[i]=i; end -- scale input with this
+
+function gradUpdate(mlp,x,y,criterion,learningRate)
+ local pred = mlp:forward(x)
+ local err = criterion:forward(pred,y)
+ local gradCriterion = criterion:backward(pred,y);
+ mlp:zeroGradParameters();
+ mlp:backward(x, gradCriterion);
+ mlp:updateParameters(learningRate);
+ return err
+end
+
+for i=1,10000 do
+ x=lab.rand(5)
+ y:copy(x); y:cmul(sc);
+ err=gradUpdate(mlp,x,y,nn.MSECriterion(),0.01)
+end
+print(mlp:get(1).weight)
+</file>
+gives the output:
+<file lua>
+ 1.0000
+ 2.0000
+ 3.0000
+ 4.0000
+ 5.0000
+[torch.Tensor of dimension 5]
+</file>
+i.e. the network successfully learns the input //x// has been scaled by
+those scaling factors to produce the output //y//.
+
+
+==== Max ====
+{{anchor:nn.Max}}
+
+''module'' = ''Max(dimension)''
+
+Applies a max operation over dimension ''dimension''.
+Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2''
+then an ''nxq'' matrix would be output.
+
+
+==== Min ====
+{{anchor:nn.Min}}
+
+''module'' = ''Min(dimension)''
+
+Applies a min operation over dimension ''dimension''.
+Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2''
+then an ''nxq'' matrix would be output.
+
+
+==== Mean ====
+{{anchor:nn.Mean}}
+
+''module'' = ''Mean(dimension)''
+
+Applies a mean operation over dimension ''dimension''.
+Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2''
+then an ''nxq'' matrix would be output.
+
+==== Sum ====
+{{anchor:nn.Sum}}
+
+''module'' = ''Sum(dimension)''
+
+Applies a sum operation over dimension ''dimension''.
+Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2''
+then an ''nxq'' matrix would be output.
+
+
+==== Euclidean ====
+{{anchor:nn.Euclidean}}
+
+''module'' = ''Euclidean(inputDimension,outputDimension)''
+
+Outputs the Euclidean distance of the input to ''outputDimension'' centers,
+i.e. this layer has the weights ''c_i'', ''i'' = ''1'',..,''outputDimension'', where
+''c_i'' are vectors of dimension ''inputDimension''. Output dimension ''j'' is
+''|| c_i - x||^2'', where ''x'' is the input.
+
+==== WeightedEuclidean ====
+{{anchor:nn.WeightedEuclidean}}
+
+''module'' = ''WeightedEuclidean(inputDimension,outputDimension)''
+
+This module is similar to [[#nn.Euclidian|Euclidian]], but
+additionally learns a separate diagonal covariance matrix across the
+features of the input space for each center.
+
+
+==== Copy ====
+{{anchor:nn.Copy}}
+
+''module'' = ''Copy(inputType,outputType)''
+
+This layer copies the input to output with type casting from input
+type from ''inputType'' to ''outputType''.
+
+
+==== Narrow ====
+{{anchor:nn.Narrow}}
+
+''module'' = ''Narrow(dimension, offset, length)''
+
+Narrow is application of
+[[..:torch:tensor:#torch.Tensor.narrow|narrow]] operation in a
+module.
+
+==== Replicate ====
+{{anchor:nn.Replicate}}
+
+''module'' = ''Replicate(nFeature)''
+
+This class creates an output where the input is replicated
+''nFeature'' times along its first dimension. There is no memory
+allocation or memory copy in this module. It sets the
+[[..:torch:tensor#torch.Tensor.stride|stride]] along the first
+dimension to zero.
+
+<file lua>
+torch> x=lab.linspace(1,5,5)
+torch> =x
+ 1
+ 2
+ 3
+ 4
+ 5
+[torch.DoubleTensor of dimension 5]
+
+torch> m=nn.Replicate(3)
+torch> o=m:forward(x)
+torch> =o
+ 1 2 3 4 5
+ 1 2 3 4 5
+ 1 2 3 4 5
+[torch.DoubleTensor of dimension 3x5]
+
+torch> x:fill(13)
+torch> =x
+ 13
+ 13
+ 13
+ 13
+ 13
+[torch.DoubleTensor of dimension 5]
+
+torch> =o
+ 13 13 13 13 13
+ 13 13 13 13 13
+ 13 13 13 13 13
+[torch.DoubleTensor of dimension 3x5]
+
+</file>
+
+
+==== Reshape ====
+{{anchor:nn.Reshape}}
+
+''module'' = ''Reshape(dimension1, dimension2, ..)''
+
+Reshapes an ''nxpxqx..'' Tensor into a ''dimension1xdimension2x...'' Tensor,
+taking the elements column-wise.
+
+Example:
+<file lua>
+> x=torch.Tensor(4,4)
+> for i=1,4 do
+> for j=1,4 do
+> x[i][j]=(i-1)*4+j;
+> end
+> end
+> print(x)
+
+ 1 2 3 4
+ 5 6 7 8
+ 9 10 11 12
+ 13 14 15 16
+[torch.Tensor of dimension 4x4]
+
+> print(nn.Reshape(2,8):forward(x))
+
+ 1 9 2 10 3 11 4 12
+ 5 13 6 14 7 15 8 16
+[torch.Tensor of dimension 2x8]
+
+> print(nn.Reshape(8,2):forward(x))
+
+ 1 3
+ 5 7
+ 9 11
+ 13 15
+ 2 4
+ 6 8
+ 10 12
+ 14 16
+[torch.Tensor of dimension 8x2]
+
+> print(nn.Reshape(16):forward(x))
+
+ 1
+ 5
+ 9
+ 13
+ 2
+ 6
+ 10
+ 14
+ 3
+ 7
+ 11
+ 15
+ 4
+ 8
+ 12
+ 16
+[torch.Tensor of dimension 16]
+
+
+</file>
+
+
+==== Select ====
+{{anchor:nn.Select}}
+
+Selects a dimension and index of a ''nxpxqx..'' Tensor.
+
+Example:
+<file lua>
+mlp=nn.Sequential();
+mlp:add(nn.Select(1,3))
+
+require "lab"
+x=lab.randn(10,5)
+print(x)
+print(mlp:forward(x))
+</file>
+gives the output:
+<file lua>
+ 0.9720 -0.0836 0.0831 -0.2059 -0.0871
+ 0.8750 -2.0432 -0.1295 -2.3932 0.8168
+ 0.0369 1.1633 0.6483 1.2862 0.6596
+ 0.1667 -0.5704 -0.7303 0.3697 -2.2941
+ 0.4794 2.0636 0.3502 0.3560 -0.5500
+-0.1898 -1.1547 0.1145 -1.1399 0.1711
+-1.5130 1.4445 0.2356 -0.5393 -0.6222
+-0.6587 0.4314 1.1916 -1.4509 1.9400
+ 0.2733 1.0911 0.7667 0.4002 0.1646
+ 0.5804 -0.5333 1.1621 1.5683 -0.1978
+[torch.Tensor of dimension 10x5]
+
+ 0.0369
+ 1.1633
+ 0.6483
+ 1.2862
+ 0.6596
+[torch.Tensor of dimension 5]
+</file>
+
+This can be used in conjunction with [[#nn.Concat|Concat]]
+to emulate the behavior
+of [[#nn.Parallel|Parallel]], or to select various parts of an input Tensor to
+perform operations on. Here is a fairly complicated example:
+<file lua>
+require "lab"
+
+mlp=nn.Sequential();
+c=nn.Concat(2)
+for i=1,10 do
+ local t=nn.Sequential()
+ t:add(nn.Select(1,i))
+ t:add(nn.Linear(3,2))
+ t:add(nn.Reshape(2,1))
+ c:add(t)
+end
+mlp:add(c)
+
+pred=mlp:forward(lab.randn(10,3))
+print(pred)
+
+for i=1,10000 do -- Train for a few iterations
+ x=lab.randn(10,3);
+ y=lab.ones(2,10);
+ pred=mlp:forward(x)
+
+ criterion= nn.MSECriterion()
+ err=criterion:forward(pred,y)
+ gradCriterion = criterion:backward(pred,y);
+ mlp:zeroGradParameters();
+ mlp:backward(x, gradCriterion);
+ mlp:updateParameters(0.01);
+ print(err)
+end
+</file>
+
+==== Exp ====
+{{anchor:nn.Exp}}
+
+Applies the ''exp'' function element-wise to the input Tensor,
+thus outputting a Tensor of the same dimension.
+<file lua>
+ii=lab.linspace(-2,2)
+m=nn.Exp()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{exp.png?400}}
+
+
+==== Square ====
+{{anchor:nn.Square}}
+
+Takes the square of each element.
+
+<file lua>
+ii=lab.linspace(-5,5)
+m=nn.Square()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{square.png?400}}
+
+==== Sqrt ====
+{{anchor:nn.Sqrt}}
+
+Takes the square root of each element.
+
+<file lua>
+ii=lab.linspace(0,5)
+m=nn.Sqrt()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{sqrt.png?400}}
+
+==== Power ====
+{{anchor:nn.Power}}
+
+''module'' = ''Power(p)''
+
+Raises each element to its ''pth'' power.
+
+<file lua>
+ii=lab.linspace(0,2)
+m=nn.Power(1.25)
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{power.png?400}}
+
+===== Transfer Function Layers =====
+{{anchor:nn.transfer.dok}}
+
+==== HardTanh ====
+{{anchor:nn.HardTanh}}
+
+Applies the ''HardTanh'' function element-wise to the input Tensor,
+thus outputting a Tensor of the same dimension.
+
+''HardTanh'' is defined as:
+
+ * ''f(x)'' = ''1, if x >'' ''1,''
+ * ''f(x)'' = ''-1, if x <'' ''-1,''
+ * ''f(x)'' = ''x,'' ''otherwise.''
+
+<file lua>
+ii=lab.linspace(-2,2)
+m=nn.HardTanh()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{htanh.png?400}}
+
+
+==== HardShrink ====
+{{anchor:nn.HardShrink}}
+
+''module = nn.HardShrink(lambda)''
+
+Applies the hard shrinkage function element-wise to the input
+[[..:torch:Tensor|Tensor]]. The output is the same size as the input.
+
+''HardShrinkage'' operator is defined as:
+
+ * ''f(x) = x, if x > lambda''
+ * ''f(x) = -x, if < -lambda''
+ * ''f(x) = 0, otherwise''
+
+<file lua>
+ii=lab.linspace(-2,2)
+m=nn.HardShrink(0.85)
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{hshrink.png?400}}
+
+==== SoftShrink ====
+{{anchor:nn.SoftShrink}}
+
+''module = nn.SoftShrink(lambda)''
+
+Applies the hard shrinkage function element-wise to the input
+[[..:torch:Tensor|Tensor]]. The output is the same size as the input.
+
+''HardShrinkage'' operator is defined as:
+
+ * ''f(x) = x-lambda, if x > lambda''
+ * ''f(x) = -x+lambda, if < -lambda''
+ * ''f(x) = 0, otherwise''
+
+<file lua>
+ii=lab.linspace(-2,2)
+m=nn.SoftShrink(0.85)
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{sshrink.png?400}}
+
+
+==== SoftMax ====
+{{anchor:nn.SoftMax}}
+
+Applies the ''Softmax'' function to an n-dimensional input Tensor,
+rescaling them so that the elements of the n-dimensional output Tensor
+lie in the range (0,1) and sum to 1.
+
+''Softmax'' is defined as ''f_i(x)'' = ''exp(x_i-shift) / sum_j exp(x_j-shift)'',
+where ''shift'' = ''max_i x_i''.
+
+
+<file lua>
+ii=lab.exp(lab.abs(lab.randn(10)))
+m=nn.SoftMax()
+oo=m:forward(ii)
+gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'})
+gnuplot.grid(true)
+</file>
+{{softmax.png?400}}
+
+==== SoftMin ====
+{{anchor:nn.SoftMin}}
+
+Applies the ''Softmin'' function to an n-dimensional input Tensor,
+rescaling them so that the elements of the n-dimensional output Tensor
+lie in the range (0,1) and sum to 1.
+
+''Softmin'' is defined as ''f_i(x)'' = ''exp(-x_i-shift) / sum_j exp(-x_j-shift)'',
+where ''shift'' = ''max_i x_i''.
+
+
+<file lua>
+ii=lab.exp(lab.abs(lab.randn(10)))
+m=nn.SoftMin()
+oo=m:forward(ii)
+gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'})
+gnuplot.grid(true)
+</file>
+{{softmin.png?400}}
+
+==== SoftPlus ====
+{{anchor:nn.SoftPlus}}
+
+Applies the ''SoftPlus'' function to an n-dimensioanl input Tensor.
+Can be used to constrain the output of a machine to always be positive.
+
+''SoftPlus'' is defined as ''f_i(x)'' = ''log(1 + exp(x_i)))''.
+
+<file lua>
+ii=lab.randn(10)
+m=nn.SoftPlus()
+oo=m:forward(ii)
+go=lab.ones(10)
+gi=m:backward(ii,go)
+gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'},{'gradInput',gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{softplus.png?400}}
+
+==== SoftSign ====
+{{anchor:nn.SoftSign}}
+
+Applies the ''SoftSign'' function to an n-dimensioanl input Tensor.
+
+''SoftSign'' is defined as ''f_i(x) = x_i / (1+|x_i|)''
+
+<file lua>
+ii=lab.linspace(-5,5)
+m=nn.SoftSign()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{softsign.png?400}}
+
+==== LogSigmoid ====
+{{anchor:nn.LogSigmoid}}
+
+Applies the ''LogSigmoid'' function to an n-dimensional input Tensor.
+
+''LogSigmoid'' is defined as ''f_i(x)'' = ''log(1/(1+ exp(-x_i)))''.
+
+
+<file lua>
+ii=lab.randn(10)
+m=nn.LogSigmoid()
+oo=m:forward(ii)
+go=lab.ones(10)
+gi=m:backward(ii,go)
+gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'},{'gradInput',gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{logsigmoid.png?400}}
+
+
+==== LogSoftMax ====
+{{anchor:nn.LogSoftMax}}
+
+Applies the ''LogSoftmax'' function to an n-dimensional input Tensor.
+
+''LogSoftmax'' is defined as ''f_i(x)'' = ''log(1/a exp(x_i))'',
+where ''a'' = ''sum_j exp(x_j)''.
+
+<file lua>
+ii=lab.randn(10)
+m=nn.LogSoftMax()
+oo=m:forward(ii)
+go=lab.ones(10)
+gi=m:backward(ii,go)
+gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'},{'gradInput',gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{logsoftmax.png?400}}
+
+==== Sigmoid ====
+{{anchor:nn.Sigmoid}}
+
+Applies the ''Sigmoid'' function element-wise to the input Tensor,
+thus outputting a Tensor of the same dimension.
+
+''Sigmoid'' is defined as ''f(x)'' = ''1/(1+exp(-x))''.
+
+<file lua>
+ii=lab.linspace(-5,5)
+m=nn.Sigmoid()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{sigmoid.png?400}}
+
+==== Tanh ====
+{{anchor:nn.Tanh}}
+
+Applies the ''Tanh'' function element-wise to the input Tensor,
+thus outputting a Tensor of the same dimension.
+
+<file lua>
+ii=lab.linspace(-3,3)
+m=nn.Tanh()
+oo=m:forward(ii)
+go=lab.ones(100)
+gi=m:backward(ii,go)
+gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'})
+gnuplot.grid(true)
+</file>
+{{tanh.png?400}}
+
+===== Convolutional layers =====
+{{anchor:nn.convlayers.dok}}
+
+SpatialConvolution and SpatialSubsampling apply to inputs with
+two-dimensional relationships (e.g. images). TemporalConvolution and
+TemporalSubsampling apply to sequences with a one-dimensional
+relationship (e.g. strings of some kind).
+
+For spatial convolutional layers, the input is supposed to be 3D. The
+first dimension is the number of features, the last two dimenstions
+are spatial.
+
+==== SpatialConvolution ====
+{{anchor:nn.SpatialConvolution}}
+
+<file lua>
+module = nn.SpatialConvolution(nInputPlane, nOutputPlane, kW, kH, [dW], [dH])
+</file>
+
+Applies a 2D convolution over an input image composed of several input planes. The ''input'' tensor in
+''forward(input)'' is expected to be a 3D tensor (''width x height x nInputPlane'').
+
+The parameters are the following:
+ * ''nInputPlane'': The number of expected input planes in the image given into ''forward()''.
+ * ''nOutputPlane'': The number of output planes the convolution layer will produce.
+ * ''kW'': The kernel width of the convolution
+ * ''kH'': The kernel height of the convolution
+ * ''dW'': The step of the convolution in the width dimension. Default is ''1''.
+ * ''dH'': The step of the convolution in the height dimension. Default is ''1''.
+
+Note that depending of the size of your kernel, several (of the last)
+columns or rows of the input image might be lost. It is up to the user to
+add proper padding in images.
+
+If the input image is a 3D tensor ''nInputPlane x width x height'', the output image size
+will be ''nOutputPlane x owidth x oheight'' where
+<file lua>
+owidth = (width - kW) / dW + 1
+oheight = (height - kH) / dH + 1 .
+</file>
+
+The parameters of the convolution can be found in ''self.weight'' (Tensor of
+size ''nOutputPlane x nInputPlane x kH x kW'') and ''self.bias'' (Tensor of
+size ''nOutputPlane''). The corresponding gradients can be found in
+''self.gradWeight'' and ''self.gradBias''.
+
+The output value of the layer can be precisely described as:
+<file lua>
+output[i][j][k] = bias[k]
+ + sum_l sum_{s=1}^kW sum_{t=1}^kH weight[s][t][l][k]
+ * input[dW*(i-1)+s)][dH*(j-1)+t][l]
+</file>
+
+==== SpatialConvolutionMap ====
+{{anchor:nn.SpatialConvolutionMap}}
+
+<file lua>
+module = nn.SpatialConvolutionMap(connectionMatrix, kW, kH, [dW], [dH])
+</file>
+
+This class is a generalization of
+[[#nn.SpatialConvolution|nn.SpatialConvolution]]. It uses a geenric
+connection table between input and output features. The
+[[#nn.SpatialConvolution|nn.SpatialConvolution]] is equivalent to
+using a [[#nn.tables.full|full connection table]]. One can specify
+different types of connection tables.
+
+=== Full Connection Table ===
+{{anchor:nn.tables.full}}
+
+''table = nn.tables.full(nin,nout)''
+
+This is a precomputed table that specifies connections between every
+input and output node.
+
+=== One to One Connection Table ===
+{{anchor:nn.tables.onetoone}}
+
+''table = nn.tables.oneToOne(n)''
+
+This is a precomputed table that specifies a single connection to each
+output node from corresponding input node.
+
+=== Random Connection Table ===
+{{anchor:nn.tables.random}}
+
+''table = nn.tables.random(nin,nout, nto)''
+
+This table is randomly populated such that each output unit has
+''nto'' incoming connections. The algorihtm tries to assign uniform
+number of outgoing connections to each input node if possible.
+
+==== SpatialLPPooling ====
+{{anchor:nn.SpatialLPPooling}}
+
+<file lua>
+module = nn.SpatialLPPooling(nInputPlane, pnorm, kW, kH, [dW], [dH])
+</file>
+
+Computes the ''p'' norm in a convolutional manner on a set of 2D input planes.
+
+==== SpatialMaxPooling ====
+{{anchor:nn.SpatialMaxPooling}}
+
+<file lua>
+module = nn.SpatialMaxPooling(kW, kH [, dW, dH])
+</file>
+
+Applies 2D max-pooling operation in ''kWxkH'' regions by step size
+''dWxdH'' steps. The number of output features is equal to the number of
+input planes.
+
+==== SpatialSubSampling ====
+{{anchor:nn.SpatialSubSampling}}
+
+<file lua>
+module = nn.SpatialSubSampling(nInputPlane, kW, kH, [dW], [dH])
+</file>
+
+Applies a 2D sub-sampling over an input image composed of several input planes. The ''input'' tensor in
+''forward(input)'' is expected to be a 3D tensor (''nInputPlane x width x height''). The number of output
+planes will be the same as ''nInputPlane''.
+
+The parameters are the following:
+ * ''nInputPlane'': The number of expected input planes in the image given into ''forward()''.
+ * ''kW'': The kernel width of the sub-sampling
+ * ''kH'': The kernel height of the sub-sampling
+ * ''dW'': The step of the sub-sampling in the width dimension. Default is ''1''.
+ * ''dH'': The step of the sub-sampling in the height dimension. Default is ''1''.
+
+Note that depending of the size of your kernel, several (of the last)
+columns or rows of the input image might be lost. It is up to the user to
+add proper padding in images.
+
+If the input image is a 3D tensor ''width x height x nInputPlane'', the output image size
+will be ''owidth x oheight x nInputPlane'' where
+<file lua>
+owidth = (width - kW) / dW + 1
+oheight = (height - kH) / dH + 1 .
+</file>
+
+The parameters of the sub-sampling can be found in ''self.weight'' (Tensor of
+size ''nInputPlane'') and ''self.bias'' (Tensor of size ''nInputPlane''). The
+corresponding gradients can be found in ''self.gradWeight'' and
+''self.gradBias''.
+
+The output value of the layer can be precisely described as:
+<file lua>
+output[i][j][k] = bias[k]
+ + weight[k] sum_{s=1}^kW sum_{t=1}^kH input[dW*(i-1)+s)][dH*(j-1)+t][k]
+</file>
+
+==== SpatialZeroPadding ====
+{{anchor:nn.SpatialZeroPadding}}
+
+<file lua>
+module = nn.SpatialZeroPadding(padLeft, padRight, padTop, padBottom)
+</file>
+
+Each feature map of a given input is padded with specified number of
+zeros. If padding values are negative, then input is cropped.
+
+==== SpatialSubtractiveNormalization ====
+{{anchor:nn.SpatialSubtractiveNormalization}}
+
+<file lua>
+module = nn.SpatialSubtractiveNormalization(ninputplane, kernel)
+</file>
+
+Applies a spatial subtraction operation on a series of 2D inputs using
+''kernel'' for computing the weighted average in a neighborhood. The
+neighborhood is defined for a local spatial region that is the size as
+kernel and across all features. For a an input image, since there is
+only one feature, the region is only spatial. For an RGB image, the
+weighted anerage is taken over RGB channels and a spatial region.
+
+If the ''kernel'' is 1D, then it will be used for constructing and seperable
+2D kernel. The operations will be much more efficient in this case.
+
+The kernel is generally chosen as a gaussian when it is believed that
+the correlation of two pixel locations decrease with increasing
+distance. On the feature dimension, a uniform average is used since
+the weighting across features is not known.
+
+For this example we use an external package
+[[http://www.github.com/clementfarabet/lua---image/|image]]
+
+<file lua>
+require 'image'
+require 'nn'
+lena = image.rgb2y(image.lena())
+ker = lab.ones(11)
+m=nn.SpatialSubtractiveNormalization(1,ker)
+processed = m:forward(lena)
+w1=image.display(lena)
+w2=image.display(processed)
+</file>
+{{lena.jpg?300}}{{lenap.jpg?300}}
+
+==== TemporalConvolution ====
+{{anchor:nn.TemporalConvolution}}
+
+<file lua>
+module = nn.TemporalConvolution(inputFrameSize, outputFrameSize, kW, [dW])
+</file>
+
+Applies a 1D convolution over an input sequence composed of ''nInputFrame'' frames. The ''input'' tensor in
+''forward(input)'' is expected to be a 2D tensor (''nInputFrame x inputFrameSize'').
+
+The parameters are the following:
+ * ''inputFrameSize'': The input frame size expected in sequences given into ''forward()''.
+ * ''outputFrameSize'': The output frame size the convolution layer will produce.
+ * ''kW'': The kernel width of the convolution
+ * ''dW'': The step of the convolution. Default is ''1''.
+
+Note that depending of the size of your kernel, several (of the last)
+frames of the sequence might be lost. It is up to the user to add proper padding frames in the input
+sequences.
+
+If the input sequence is a 2D tensor ''inputFrameSize x nInputFrame'', the output sequence will be
+''nOutputFrame x outputFrameSize'' where
+<file lua>
+nOutputFrame = (nInputFrame - kW) / dW + 1
+</file>
+
+The parameters of the convolution can be found in ''self.weight'' (Tensor of
+size ''outputFrameSize x (inputFrameSize x kW) '') and ''self.bias'' (Tensor of
+size ''outputFrameSize''). The corresponding gradients can be found in
+''self.gradWeight'' and ''self.gradBias''.
+
+The output value of the layer can be precisely described as:
+<file lua>
+output[i][t] = bias[i]
+ + sum_j sum_{k=1}^kW weight[j][k][i]
+ * input[j][dW*(t-1)+k)]
+</file>
+
+Here is a simple example:
+
+<file lua>
+inp=5; -- dimensionality of one sequence element
+outp=1; -- number of derived features for one sequence element
+kw=1; -- kernel only operates on one sequence element at once
+dw=1; -- we step once and go on to the next sequence element
+
+mlp=nn.TemporalConvolution(inp,outp,kw,dw)
+
+require "lab"
+x=lab.rand(7,inp) -- a sequence of 7 elements
+print(mlp:forward(x))
+</file>
+which gives:
+<file lua>
+-0.9109
+-0.9872
+-0.6808
+-0.9403
+-0.9680
+-0.6901
+-0.6387
+[torch.Tensor of dimension 7x1]
+</file>
+
+This is equivalent to:
+<file lua>
+weights=lab.reshape(mlp.weight,inp) -- weights applied to all
+bias= mlp.bias[1];
+for i=1,x:size(1) do -- for each sequence element
+ element= x[i]; -- features of ith sequence element
+ print(element:dot(weights) + bias)
+end
+</file>
+which gives:
+<file lua>
+-0.91094998687717
+-0.98721705771773
+-0.68075004276185
+-0.94030132495887
+-0.96798754116609
+-0.69008470895581
+-0.63871422284166
+</file>
+
+
+==== TemporalSubSampling ====
+{{anchor:nn.TemporalSubSampling}}
+
+<file lua>
+module = nn.TemporalSubSampling(inputFrameSize, kW, [dW])
+</file>
+
+Applies a 1D sub-sampling over an input sequence composed of ''nInputFrame'' frames. The ''input'' tensor in
+''forward(input)'' is expected to be a 2D tensor (''nInputFrame x inputFrameSize''). The output frame size
+will be the same as the input one (''inputFrameSize'').
+
+The parameters are the following:
+ * ''inputFrameSize'': The input frame size expected in sequences given into ''forward()''.
+ * ''kW'': The kernel width of the sub-sampling
+ * ''dW'': The step of the sub-sampling. Default is ''1''.
+
+Note that depending of the size of your kernel, several (of the last)
+frames of the sequence might be lost. It is up to the user to add proper padding frames in the input
+sequences.
+
+If the input sequence is a 2D tensor ''nInputFrame x inputFrameSize'', the output sequence will be
+''inputFrameSize x nOutputFrame'' where
+<file lua>
+nOutputFrame = (nInputFrame - kW) / dW + 1
+</file>
+
+The parameters of the sub-sampling can be found in ''self.weight'' (Tensor of
+size ''inputFrameSize'') and ''self.bias'' (Tensor of
+size ''inputFrameSize''). The corresponding gradients can be found in
+''self.gradWeight'' and ''self.gradBias''.
+
+The output value of the layer can be precisely described as:
+<file lua>
+output[i][t] = bias[i] + weight[i] * sum_{k=1}^kW input[i][dW*(t-1)+k)]
+</file>
+
+==== LookupTable ====
+{{anchor:nn.LookupTable}}
+
+<file lua>
+module = nn.LookupTable(nIndex, sizes)
+</file>
+or
+<file lua>
+module = nn.LookupTable(nIndex, size1, [size2], [size3], ...)
+</file>
+
+This layer is a particular case of a convolution, where the width of the convolution would be ''1''.
+When calling ''forward(input)'', it assumes ''input'' is a 1D tensor filled with indices. Indices start
+at ''1'' and can go up to ''nIndex''. For each index, it outputs a corresponding ''Tensor'' of size
+specified by ''sizes'' (an ''LongStorage'') or ''size1 x size2 x...''.
+
+The output tensors are concatenated, generating a ''size1 x size2 x ... x sizeN x n'' tensor, where ''n''
+is the size of the ''input'' tensor.
+
+When only ''size1'' is provided, this is equivalent to do the following matrix-matrix multiplication
+in an efficient manner:
+<file lua>
+M P
+</file>
+where ''M'' is a 2D matrix ''size1 x nIndex'' containing the parameters of the lookup-table and
+''P'' is a 2D matrix, where each column vector ''i'' is a zero vector except at index ''input[i]'' where it is ''1''.
+
+Example:
+<file lua>
+ -- a lookup table containing 10 tensors of size 3
+ module = nn.LookupTable(10, 3)
+
+ input = torch.Tensor(4)
+ input[1] = 1; input[2] = 2; input[3] = 1; input[4] = 10;
+ print(module:forward(input))
+</file>
+
+Outputs something like:
+<file lua>
+-0.1784 2.2045 -0.1784 -0.2475
+-1.0120 0.0537 -1.0120 -0.2148
+-1.2840 0.8685 -1.2840 -0.2792
+[torch.Tensor of dimension 3x4]
+</file>
+Note that the first column vector is the same than the 3rd one!
+
+===== Layers for manipulating tables =====
+{{anchor:nn.TableLayers}}
+
+This set of modules allows the manipulation of Tables
+through the layers of a neural network.
+This allows one to build very rich architectures.
+
+Table-based modules work by supporting forward and backward methods that can accept
+tables as inputs. It turns out that the usual [[#nn.Sequential|Sequential]] module can do this, so all that is needed is other child modules that take advantage of such tables.
+<file lua>
+mlp = nn.Sequential();
+t={x,y,z}
+pred=mlp:forward(t)
+pred=mlp:forward{x,y,z} -- This is equivalent to the line before
+</file>
+
+==== ConcatTable ====
+{{anchor:nn.ConcatTable}}
+
+ConcatTable is a container module that applies each member module to
+the same input Tensor.
+
+Example:
+<file lua>
+mlp= nn.ConcatTable()
+mlp:add(nn.Linear(5,2))
+mlp:add(nn.Linear(5,3))
+
+require "lab"
+pred=mlp:forward(lab.randn(5));
+for i,k in pairs(pred) do print(i,k); end
+</file>
+which gives the output:
+<file lua>
+1
+-0.4073
+ 0.0110
+[torch.Tensor of dimension 2]
+
+2
+ 0.0027
+-0.0598
+-0.1189
+[torch.Tensor of dimension 3]
+</file>
+
+==== ParallelTable ====
+{{anchor:nn.ParallelTable}}
+
+ParallelTable is a container module that, in its ''forward'' method, applies the ''ith'' member module to the ''ith'' input, and outputs a table of the set of outputs.
+
+Example:
+<file lua>
+mlp= nn.ParallelTable()
+mlp:add(nn.Linear(10,2))
+mlp:add(nn.Linear(5,3))
+
+require "lab"
+x=lab.randn(10)
+y=lab.rand(5)
+
+pred=mlp:forward{x,y}
+for i,k in pairs(pred) do print(i,k); end
+</file>
+which gives the output:
+<file lua>
+1
+ 0.0331
+ 0.7003
+[torch.Tensor of dimension 2]
+
+2
+ 0.0677
+-0.1657
+-0.7383
+[torch.Tensor of dimension 3]
+</file>
+
+==== SplitTable ====
+{{anchor:nn.SplitTable}}
+
+''module'' = ''SplitTable(dimension)''
+
+Creates a module that takes a Tensor as input and outputs several tables, splitting the Tensor along dimension ''dimension''.
+
+Example 1:
+<file lua>
+require "lab"
+mlp=nn.SplitTable(2)
+x=lab.randn(4,3)
+pred=mlp:forward(x)
+for i,k in pairs(pred) do print(i,k); end
+</file>
+gives the output:
+<file lua>
+1
+ 1.3885
+ 1.3295
+ 0.4281
+-1.0171
+[torch.Tensor of dimension 4]
+
+2
+-1.1565
+-0.8556
+-1.0717
+-0.8316
+[torch.Tensor of dimension 4]
+
+3
+-1.3678
+-0.1709
+-0.0191
+-2.5871
+[torch.Tensor of dimension 4]
+</file>
+
+Example 2:
+<file lua>
+require "lab"
+mlp=nn.SplitTable(1)
+pred=mlp:forward(lab.randn(10,3))
+for i,k in pairs(pred) do print(i,k); end
+</file>
+gives the output:
+<file lua>
+1
+ 1.6114
+ 0.9038
+ 0.8419
+[torch.Tensor of dimension 3]
+
+2
+ 2.4742
+ 0.2208
+ 1.6043
+[torch.Tensor of dimension 3]
+
+3
+ 1.3415
+ 0.2984
+ 0.2260
+[torch.Tensor of dimension 3]
+
+4
+ 2.0889
+ 1.2309
+ 0.0983
+[torch.Tensor of dimension 3]
+</file>
+
+A more complicated example:
+<file lua>
+require "lab"
+
+mlp=nn.Sequential(); --Create a network that takes a Tensor as input
+mlp:add(nn.SplitTable(2))
+ c=nn.ParallelTable() --The two Tensors go through two different Linear
+ c:add(nn.Linear(10,3)) --Layers in Parallel
+ c:add(nn.Linear(10,7))
+mlp:add(c) --Outputing a table with 2 elements
+ p=nn.ParallelTable() --These tables go through two more linear layers
+ p:add(nn.Linear(3,2)) -- separately.
+ p:add(nn.Linear(7,1))
+mlp:add(p)
+mlp:add(nn.JoinTable(1)) --Finally, the tables are joined together and output.
+
+pred=mlp:forward(lab.randn(10,2))
+print(pred)
+
+for i=1,100 do -- A few steps of training such a network..
+ x=lab.ones(10,2);
+ y=torch.Tensor(3); y:copy(x:select(2,1,1):narrow(1,1,3))
+ pred=mlp:forward(x)
+
+ criterion= nn.MSECriterion()
+ local err=criterion:forward(pred,y)
+ local gradCriterion = criterion:backward(pred,y);
+ mlp:zeroGradParameters();
+ mlp:backward(x, gradCriterion);
+ mlp:updateParameters(0.05);
+
+ print(err)
+end
+</file>
+
+==== JoinTable ====
+{{anchor:nn.JoinTable}}
+
+''module'' = ''JoinTable(dimension)''
+
+Creates a module that takes a list of Tensors as input and outputs a Tensor by joining them together along dimension ''dimension''.
+
+Example:
+<file lua>
+require "lab"
+x=lab.randn(5,1)
+y=lab.randn(5,1)
+z=lab.randn(2,1)
+
+print(nn.JoinTable(1):forward{x,y})
+print(nn.JoinTable(2):forward{x,y})
+print(nn.JoinTable(1):forward{x,z})
+</file>
+gives the output:
+<file lua>
+1.3965
+ 0.5146
+-1.5244
+-0.9540
+ 0.4256
+ 0.1575
+ 0.4491
+ 0.6580
+ 0.1784
+-1.7362
+
+ 1.3965 0.1575
+ 0.5146 0.4491
+-1.5244 0.6580
+-0.9540 0.1784
+ 0.4256 -1.7362
+
+ 1.3965
+ 0.5146
+-1.5244
+-0.9540
+ 0.4256
+-1.2660
+ 1.0869
+[torch.Tensor of dimension 7x1]
+</file>
+
+A more complicated example:
+<file lua>
+require "lab"
+
+mlp=nn.Sequential(); --Create a network that takes a Tensor as input
+ c=nn.ConcatTable() --The same Tensor goes through two different Linear
+ c:add(nn.Linear(10,3)) --Layers in Parallel
+ c:add(nn.Linear(10,7))
+mlp:add(c) --Outputing a table with 2 elements
+ p=nn.ParallelTable() --These tables go through two more linear layers
+ p:add(nn.Linear(3,2)) -- separately.
+ p:add(nn.Linear(7,1))
+mlp:add(p)
+mlp:add(nn.JoinTable(1)) --Finally, the tables are joined together and output.
+
+pred=mlp:forward(lab.randn(10))
+print(pred)
+
+for i=1,100 do -- A few steps of training such a network..
+ x=lab.ones(10);
+ y=torch.Tensor(3); y:copy(x:narrow(1,1,3))
+ pred=mlp:forward(x)
+
+ criterion= nn.MSECriterion()
+ local err=criterion:forward(pred,y)
+ local gradCriterion = criterion:backward(pred,y);
+ mlp:zeroGradParameters();
+ mlp:backward(x, gradCriterion);
+ mlp:updateParameters(0.05);
+
+ print(err)
+end
+</file>
+
+==== Identity ====
+{{anchor:nn.Identity}}
+
+''module'' = ''Identity()''
+
+Creates a module that returns whatever is input to it as output.
+This is useful when combined with the module
+[[#nn.ParallelTable|ParallelTable]]
+in case you do not wish to do anything to one of the input Tensors.
+Example:
+<file lua>
+require "lab"
+mlp=nn.Identity()
+print(mlp:forward(lab.ones(5,2)))
+</file>
+gives the output:
+<file lua>
+ 1 1
+ 1 1
+ 1 1
+ 1 1
+ 1 1
+[torch.Tensor of dimension 5x2]
+</file>
+
+Here is a more useful example, where one can implement a network which also computes a Criterion using this module:
+<file lua>
+pred_mlp=nn.Sequential(); -- A network that makes predictions given x.
+pred_mlp:add(nn.Linear(5,4))
+pred_mlp:add(nn.Linear(4,3))
+
+xy_mlp=nn.ParallelTable();-- A network for predictions and for keeping the
+xy_mlp:add(pred_mlp) -- true label for comparison with a criterion
+xy_mlp:add(nn.Identity()) -- by forwarding both x and y through the network.
+
+mlp=nn.Sequential(); -- The main network that takes both x and y.
+mlp:add(xy_mlp) -- It feeds x and y to parallel networks;
+cr=nn.MSECriterion();
+cr_wrap=nn.CriterionTable(cr)
+mlp:add(cr_wrap) -- and then applies the criterion.
+
+for i=1,100 do -- Do a few training iterations
+ x=lab.ones(5); -- Make input features.
+ y=torch.Tensor(3);
+ y:copy(x:narrow(1,1,3)) -- Make output label.
+ err=mlp:forward{x,y} -- Forward both input and output.
+ print(err) -- Print error from criterion.
+
+ mlp:zeroGradParameters(); -- Do backprop...
+ mlp:backward({x, y} );
+ mlp:updateParameters(0.05);
+end
+</file>
+
+==== PairwiseDistance ====
+{{anchor:nn.PairwiseDistance}}
+
+''module'' = ''PairwiseDistance(p)'' creates a module that takes a table of two vectors as input and outputs the distance between them using the ''p''-norm.
+
+Example:
+<file lua>
+mlp_l1=nn.PairwiseDistance(1)
+mlp_l2=nn.PairwiseDistance(2)
+x=lab.new(1,2,3)
+y=lab.new(4,5,6)
+print(mlp_l1:forward({x,y}))
+print(mlp_l2:forward({x,y}))
+</file>
+gives the output:
+<file lua>
+ 9
+[torch.Tensor of dimension 1]
+
+ 5.1962
+[torch.Tensor of dimension 1]
+</file>
+
+A more complicated example:
+<file lua>
+-- imagine we have one network we are interested in, it is called "p1_mlp"
+p1_mlp= nn.Sequential(); p1_mlp:add(nn.Linear(5,2))
+
+-- But we want to push examples towards or away from each other
+-- so we make another copy of it called p2_mlp
+-- this *shares* the same weights via the set command, but has its own set of temporary gradient storage
+-- that's why we create it again (so that the gradients of the pair don't wipe each other)
+p2_mlp= nn.Sequential(); p2_mlp:add(nn.Linear(5,2))
+p2_mlp:get(1).weight:set(p1_mlp:get(1).weight)
+p2_mlp:get(1).bias:set(p1_mlp:get(1).bias)
+
+-- we make a parallel table that takes a pair of examples as input. they both go through the same (cloned) mlp
+prl = nn.ParallelTable()
+prl:add(p1_mlp)
+prl:add(p2_mlp)
+
+-- now we define our top level network that takes this parallel table and computes the pairwise distance betweem
+-- the pair of outputs
+mlp= nn.Sequential()
+mlp:add(prl)
+mlp:add(nn.PairwiseDistance(1))
+
+-- and a criterion for pushing together or pulling apart pairs
+crit=nn.HingeEmbeddingCriterion(1)
+
+-- lets make two example vectors
+x=lab.rand(5)
+y=lab.rand(5)
+
+
+-- Use a typical generic gradient update function
+function gradUpdate(mlp, x, y, criterion, learningRate)
+local pred = mlp:forward(x)
+local err = criterion:forward(pred, y)
+local gradCriterion = criterion:backward(pred, y)
+mlp:zeroGradParameters()
+mlp:backward(x, gradCriterion)
+mlp:updateParameters(learningRate)
+end
+
+-- push the pair x and y together, notice how then the distance between them given
+-- by print(mlp:forward({x,y})[1]) gets smaller
+for i=1,10 do
+gradUpdate(mlp,{x,y},1,crit,0.01)
+print(mlp:forward({x,y})[1])
+end
+
+
+-- pull apart the pair x and y, notice how then the distance between them given
+-- by print(mlp:forward({x,y})[1]) gets larger
+
+for i=1,10 do
+gradUpdate(mlp,{x,y},-1,crit,0.01)
+print(mlp:forward({x,y})[1])
+end
+
+</file>
+
+==== DotProduct ====
+{{anchor:nn.DotProduct}}
+
+''module'' = ''DotProduct()'' creates a module that takes a table of two vectors as input and outputs the dot product between them.
+
+Example:
+<file lua>
+mlp=nn.DotProduct()
+x=lab.new(1,2,3)
+y=lab.new(4,5,6)
+print(mlp:forward({x,y}))
+</file>
+gives the output:
+<file lua>
+ 32
+[torch.Tensor of dimension 1]
+</file>
+
+
+A more complicated example:
+<file lua>
+
+-- Train a ranking function so that mlp:forward({x,y},{x,z}) returns a number
+-- which indicates whether x is better matched with y or z (larger score = better match), or vice versa.
+
+mlp1=nn.Linear(5,10)
+mlp2=mlp1:clone('weight','bias')
+
+prl=nn.ParallelTable();
+prl:add(mlp1); prl:add(mlp2)
+
+mlp1=nn.Sequential()
+mlp1:add(prl)
+mlp1:add(nn.DotProduct())
+
+mlp2=mlp1:clone('weight','bias')
+
+mlp=nn.Sequential()
+prla=nn.ParallelTable()
+prla:add(mlp1)
+prla:add(mlp2)
+mlp:add(prla)
+
+x=lab.rand(5);
+y=lab.rand(5)
+z=lab.rand(5)
+
+
+print(mlp1:forward{x,x})
+print(mlp1:forward{x,y})
+print(mlp1:forward{y,y})
+
+
+crit=nn.MarginRankingCriterion(1);
+
+-- Use a typical generic gradient update function
+function gradUpdate(mlp, x, y, criterion, learningRate)
+ local pred = mlp:forward(x)
+ local err = criterion:forward(pred, y)
+ local gradCriterion = criterion:backward(pred, y)
+ mlp:zeroGradParameters()
+ mlp:backward(x, gradCriterion)
+ mlp:updateParameters(learningRate)
+end
+
+inp={{x,y},{x,z}}
+
+math.randomseed(1)
+
+-- make the pair x and y have a larger dot product than x and z
+
+for i=1,100 do
+ gradUpdate(mlp,inp,1,crit,0.05)
+ o1=mlp1:forward{x,y}[1];
+ o2=mlp2:forward{x,z}[1];
+ o=crit:forward(mlp:forward{{x,y},{x,z}},1)
+ print(o1,o2,o)
+end
+
+print "******************"
+
+-- make the pair x and z have a larger dot product than x and y
+
+for i=1,100 do
+ gradUpdate(mlp,inp,-1,crit,0.05)
+ o1=mlp1:forward{x,y}[1];
+ o2=mlp2:forward{x,z}[1];
+ o=crit:forward(mlp:forward{{x,y},{x,z}},-1)
+ print(o1,o2,o)
+end
+</file>
+
+
+==== CosineDistance ====
+{{anchor:nn.CosineDistance}}
+
+''module'' = ''CosineDistance()'' creates a module that takes a table of two vectors as input and outputs the cosine distance between them.
+
+Example:
+<file lua>
+mlp=nn.CosineDistance()
+x=lab.new(1,2,3)
+y=lab.new(4,5,6)
+print(mlp:forward({x,y}))
+</file>
+gives the output:
+<file lua>
+ 0.9746
+[torch.Tensor of dimension 1]
+</file>
+
+A more complicated example:
+<file lua>
+
+-- imagine we have one network we are interested in, it is called "p1_mlp"
+p1_mlp= nn.Sequential(); p1_mlp:add(nn.Linear(5,2))
+
+-- But we want to push examples towards or away from each other
+-- so we make another copy of it called p2_mlp
+-- this *shares* the same weights via the set command, but has its own set of temporary gradient storage
+-- that's why we create it again (so that the gradients of the pair don't wipe each other)
+p2_mlp= p1_mlp:clone('weight','bias')
+
+-- we make a parallel table that takes a pair of examples as input. they both go through the same (cloned) mlp
+prl = nn.ParallelTable()
+prl:add(p1_mlp)
+prl:add(p2_mlp)
+
+-- now we define our top level network that takes this parallel table and computes the cosine distance betweem
+-- the pair of outputs
+mlp= nn.Sequential()
+mlp:add(prl)
+mlp:add(nn.CosineDistance())
+
+
+-- lets make two example vectors
+x=lab.rand(5)
+y=lab.rand(5)
+
+-- Grad update function..
+function gradUpdate(mlp, x, y, learningRate)
+local pred = mlp:forward(x)
+if pred[1]*y < 1 then
+ gradCriterion=lab.new(-y)
+ mlp:zeroGradParameters()
+ mlp:backward(x, gradCriterion)
+ mlp:updateParameters(learningRate)
+end
+end
+
+-- push the pair x and y together, the distance should get larger..
+for i=1,1000 do
+ gradUpdate(mlp,{x,y},1,0.1)
+ if ((i%100)==0) then print(mlp:forward({x,y})[1]);end
+end
+
+
+-- pull apart the pair x and y, the distance should get smaller..
+
+for i=1,1000 do
+ gradUpdate(mlp,{x,y},-1,0.1)
+ if ((i%100)==0) then print(mlp:forward({x,y})[1]);end
+end
+</file>
+
+
+
+==== CriterionTable ====
+{{anchor:nn.CriterionTable}}
+
+''module'' = ''CriterionTable(criterion)''
+
+Creates a module that wraps a Criterion module so that it can accept a Table of inputs. Typically the table would contain two elements: the input and output ''x'' and ''y'' that the Criterion compares.
+
+Example:
+<file lua>
+mlp = nn.CriterionTable(nn.MSECriterion())
+require "lab"
+x=lab.randn(5)
+y=lab.randn(5)
+print(mlp:forward{x,x})
+print(mlp:forward{x,y})
+</file>
+gives the output:
+<file lua>
+0
+1.9028918413199
+</file>
+
+Here is a more complex example of embedding the criterion into a network:
+<file lua>
+require "lab"
+
+function table.print(t)
+ for i,k in pairs(t) do print(i,k); end
+end
+
+mlp=nn.Sequential(); -- Create an mlp that takes input
+ main_mlp=nn.Sequential(); -- and output using ParallelTable
+ main_mlp:add(nn.Linear(5,4))
+ main_mlp:add(nn.Linear(4,3))
+ cmlp=nn.ParallelTable();
+ cmlp:add(main_mlp)
+ cmlp:add(nn.Identity())
+mlp:add(cmlp)
+mlp:add(nn.CriterionTable(nn.MSECriterion())) -- Apply the Criterion
+
+for i=1,20 do -- Train for a few iterations
+ x=lab.ones(5);
+ y=torch.Tensor(3); y:copy(x:narrow(1,1,3))
+ err=mlp:forward{x,y} -- Pass in both input and output
+ print(err)
+
+ mlp:zeroGradParameters();
+ mlp:backward({x, y} );
+ mlp:updateParameters(0.05);
+end
+</file>
+
+==== CAddTable ====
+{{anchor:nn.CAddTable}}
+
+Takes a table of tensors and outputs summation of all tensors.
+
+<file lua>
+ii = {lab.ones(5),lab.ones(5)*2,lab.ones(5)*3}
+=ii[1]
+ 1
+ 1
+ 1
+ 1
+ 1
+[torch.DoubleTensor of dimension 5]
+
+return ii[2]
+ 2
+ 2
+ 2
+ 2
+ 2
+[torch.DoubleTensor of dimension 5]
+
+return ii[3]
+ 3
+ 3
+ 3
+ 3
+ 3
+[torch.DoubleTensor of dimension 5]
+
+m=nn.CAddTable()
+=m:forward(ii)
+ 6
+ 6
+ 6
+ 6
+ 6
+[torch.DoubleTensor of dimension 5]
+
+
+==== CSubTable ====
+{{anchor:nn.CSubTable}}
+
+Takes a table with two tensor and returns the component-wise
+subtraction between them.
+
+<file lua>
+m=nn.CSubTable()
+=m:forward({lab.ones(5)*2.2,lab.ones(5)})
+ 1.2000
+ 1.2000
+ 1.2000
+ 1.2000
+ 1.2000
+[torch.DoubleTensor of dimension 5]
+</file>
+
+==== CMulTable ====
+{{anchor:nn.CMulTable}}
+
+Takes a table of tensors and outputs the multiplication of all of them.
+
+<file lua>
+ii = {lab.ones(5)*2,lab.ones(5)*3,lab.ones(5)*4}
+m=nn.CMulTable()
+=m:forward(ii)
+ 24
+ 24
+ 24
+ 24
+ 24
+[torch.DoubleTensor of dimension 5]
+
+</file>
+
+==== CDivTable ====
+{{anchor:nn.CDivTable}}
+
+Takes a table with two tensor and returns the component-wise
+division between them.
+
+<file lua>
+m=nn.CDivTable()
+=m:forward({lab.ones(5)*2.2,lab.ones(5)*4.4})
+ 0.5000
+ 0.5000
+ 0.5000
+ 0.5000
+ 0.5000
+[torch.DoubleTensor of dimension 5]
+</file>
+
+====== Criterions ======
+{{anchor:nn.Criterions}}
+
+Criterions are helpful to train a neural network. Given an input and a
+target, they compute a gradient according to a given loss
+function. [[#nn.AbsCriterion|AbsCriterion]] and
+[[#nn.MSECriterion|MSECriterion]] are perfect for regression problems, while
+[[#nn.ClassNLLCriterion|ClassNLLCriterion]] is the criterion of choice when
+dealing with classification.
+
+Criterions are [[..:torch:file#torch.file.serialization|serializable]].
+
+===== Criterion =====
+{{anchor:nn.Criterion}}
+
+This is an abstract class which declares methods defined in all criterions.
+This class is [[..:torch:file#torch.file.serialization|serializable]].
+
+==== [output] forward(input, target) ====
+{{anchor:nn.Criterion.forward}}
+
+Given an ''input'' and a ''target'', compute the loss function associated to the criterion and return the
+result. In general ''input'' and ''target'' are [[..:torch:tensor|tensors]], but some specific criterions
+might require some other type of object.
+
+The ''output'' returned should be a scalar in general.
+
+The state variable [[#nn.Criterion.output|self.output]] should be updated after a call to ''forward()''.
+
+==== [gradInput] backward(input, target) ====
+{{anchor:nn.Criterion.backward}}
+
+Given an ''input'' and a ''target'', compute the gradients of the loss function associated to the criterion and
+return the result.In general ''input'', ''target'' and ''gradInput'' are [[..:torch:tensor|tensors]], but some specific criterions
+might require some other type of object.
+
+The state variable [[#nn.Criterion.gradInput|self.gradInput]] should be updated after a call to ''backward()''.
+
+==== State variable: output ====
+{{anchor:nn.Criterion.output}}
+
+State variable which contains the result of the last [[#nn.Criterion.forward|forward(input, target)]] call.
+
+==== State variable: gradInput ====
+{{anchor:nn.Criterion.gradInput}}
+
+State variable which contains the result of the last [[#nn.Criterion.backward|backward(input, target)]] call.
+
+===== AbsCriterion =====
+{{anchor:nn.AbsCriterion}}
+
+<file lua>
+criterion = AbsCriterion()
+</file>
+
+Creates a criterion that
+measures the mean absolute value between ''n'' elements in the input ''x''
+and output ''y'':
+
+''loss(x,y)'' = ''1/n \sum |x_i-y_i|''.
+
+If ''x'' and ''y'' are ''d''-dimensional Tensors with a total of ''n'' elements,
+the sum operation still operates over all the elements, and divides by ''n''.
+
+The division by ''n'' can be avoided if one sets the internal variable ''sizeAverage'' to ''false'':
+<file lua>
+criterion = nn.AbsCriterion()
+criterion.sizeAverage = false
+</file>
+
+===== ClassNLLCriterion =====
+{{anchor:nn.ClassNLLCriterion}}
+
+<file lua>
+criterion = ClassNLLCriterion()
+</file>
+
+The negative log likelihood criterion. It is useful to train a classication
+problem with ''n'' classes. The ''input'' given through a ''forward()'' is
+expected to contain //log-probabilities// of each class: ''input'' has to be a
+1D tensor of size ''n''. Obtaining log-probabilities in a neural network is
+easily achieved by adding a [[#nn.LogSoftMax|LogSoftMax]] layer in the last
+layer of your neural network.
+
+This criterion expect a class index (1 to the number of class) as ''target''
+when calling [[#nn.CriterionForward|forward(input, target)]] and
+[[#nn.CriterionBackward|backward(input, target)]].
+
+The loss can be described as:
+<file lua>
+loss(x, class) = forward(x, class) = -x[class]
+</file>
+
+The following is a code fragment showing how to make a gradient step
+given an input ''x'', a desired output ''y'' (an integer ''1'' to ''n'',
+in this case ''n'' = ''2'' classes),
+a network ''mlp'' and a learning rate ''learningRate'':
+<file lua>
+function gradUpdate(mlp,x,y,learningRate)
+ local criterion = nn.ClassNLLCriterion()
+ pred = mlp:forward(x)
+ local err = criterion:forward(pred, y);
+ mlp:zeroGradParameters();
+ local t = criterion:backward(pred, y);
+ mlp:backward(x, t);
+ mlp:updateParameters(learningRate);
+end
+</file>
+
+===== MarginCriterion =====
+{{anchor:nn.MarginCriterion}}
+
+<file lua>
+criterion = MarginCriterion()
+</file>
+
+Creates a criterion that optimizes a two-class classification hinge loss (margin-based loss) between input ''x'' (a Tensor of dimension 1) and output ''y'' (which is a scalar, either 1 or -1) :
+
+<file lua>
+loss(x,y) = forward(x,y) = max(0,m- y x).
+</file>
+
+''m'' is the margin, which is by default 1.
+
+<file lua>
+criterion = MarginCriterion(marginValue)
+</file>
+
+sets a different value of ''m''.
+
+
+Example:
+<file lua>
+require "nn"
+require "lab"
+
+function gradUpdate(mlp, x, y, criterion, learningRate)
+ local pred = mlp:forward(x)
+ local err = criterion:forward(pred, y)
+ local gradCriterion = criterion:backward(pred, y)
+ mlp:zeroGradParameters()
+ mlp:backward(x, gradCriterion)
+ mlp:updateParameters(learningRate)
+end
+
+mlp=nn.Sequential()
+mlp:add(nn.Linear(5,1))
+
+x1=lab.rand(5)
+x2=lab.rand(5)
+criterion=nn.MarginCriterion(1)
+
+for i=1,1000 do
+ gradUpdate(mlp,x1,1,criterion,0.01)
+ gradUpdate(mlp,x2,-1,criterion,0.01)
+end
+
+print(mlp:forward(x1))
+print(mlp:forward(x2))
+
+print(criterion:forward(mlp:forward(x1),1))
+print(criterion:forward(mlp:forward(x2),-1))
+</file>
+gives the output:
+<file lua>
+ 1.0043
+[torch.Tensor of dimension 1]
+
+
+-1.0061
+[torch.Tensor of dimension 1]
+
+0
+0
+</file>
+i.e. the mlp successfully separates the two data points such that they both have a margin of 1, and hence a loss of 0.
+
+===== MSECriterion =====
+{{anchor:nn.MSECriterion}}
+
+<file lua>
+criterion = MSECriterion()
+</file>
+
+Creates a criterion that measures the mean squared error between ''n'' elements in the input ''x''
+and output ''y'':
+
+<file lua>
+loss(x,y) = forward(x,y) = 1/n \sum |x_i-y_i|^2 .
+</file>
+
+If ''x'' and ''y'' are ''d''-dimensional Tensors with a total of ''n'' elements,
+the sum operation still operates over all the elements, and divides by ''n''. The two tensors must
+have the same number of elements (but their sizes might be different...)
+
+The division by ''n'' can be avoided if one sets the internal variable ''sizeAverage'' to ''false'':
+<file lua>
+criterion = nn.MSECriterion()
+criterion.sizeAverage = false
+</file>
+
+===== MultiCriterion =====
+{{anchor:nn.MultiCriterion}}
+
+<file lua>
+criterion = MultiCriterion()
+</file>
+
+This returns a Criterion which is a weighted sum of other Criterion.
+Criterions are added using the method:
+
+''criterion:add(singleCriterion, weight)''
+
+where ''weight'' is a scalar.
+
+
+===== HingeEmbeddingCriterion =====
+{{anchor:nn.HingeEmbeddingCriterion}}
+
+<file lua>
+criterion = HingeEmbeddingCriterion()
+</file>
+
+Creates a criterion that measures the loss given an input
+''x'' which is a 1-dimensional vector and a label ''y'' (1 or -1).
+This is usually used for measuring whether two inputs are similar
+or dissimilar, e.g. using the L1 pairwise distance,
+and is typically used for
+learning nonlinear embeddings or semi-supervised learning.
+
+<verbatim>
+loss(x,y) = forward(x,y) = x, if y=1
+= max(0,margin - x), if y=-1
+</verbatim>
+
+The ''margin'' has a default value of 1, or can be set in the constructor:
+<file lua>
+criterion = HingeEmbeddingCriterion(marginValue)
+</file>
+
+Example use:
+<file lua>
+-- imagine we have one network we are interested in, it is called "p1_mlp"
+p1_mlp= nn.Sequential(); p1_mlp:add(nn.Linear(5,2))
+
+-- But we want to push examples towards or away from each other
+-- so we make another copy of it called p2_mlp
+-- this *shares* the same weights via the set command, but has its own set of temporary gradient storage
+-- that's why we create it again (so that the gradients of the pair don't wipe each other)
+p2_mlp= nn.Sequential(); p2_mlp:add(nn.Linear(5,2))
+p2_mlp:get(1).weight:set(p1_mlp:get(1).weight)
+p2_mlp:get(1).bias:set(p1_mlp:get(1).bias)
+
+-- we make a parallel table that takes a pair of examples as input. they both go through the same (cloned) mlp
+prl = nn.ParallelTable()
+prl:add(p1_mlp)
+prl:add(p2_mlp)
+
+-- now we define our top level network that takes this parallel table and computes the pairwise distance betweem
+-- the pair of outputs
+mlp= nn.Sequential()
+mlp:add(prl)
+mlp:add(nn.PairwiseDistance(1))
+
+-- and a criterion for pushing together or pulling apart pairs
+crit=nn.HingeEmbeddingCriterion(1)
+
+-- lets make two example vectors
+x=lab.rand(5)
+y=lab.rand(5)
+
+
+-- Use a typical generic gradient update function
+function gradUpdate(mlp, x, y, criterion, learningRate)
+local pred = mlp:forward(x)
+local err = criterion:forward(pred, y)
+local gradCriterion = criterion:backward(pred, y)
+mlp:zeroGradParameters()
+mlp:backward(x, gradCriterion)
+mlp:updateParameters(learningRate)
+end
+
+-- push the pair x and y together, notice how then the distance between them given
+-- by print(mlp:forward({x,y})[1]) gets smaller
+for i=1,10 do
+gradUpdate(mlp,{x,y},1,crit,0.01)
+print(mlp:forward({x,y})[1])
+end
+
+
+-- pull apart the pair x and y, notice how then the distance between them given
+-- by print(mlp:forward({x,y})[1]) gets larger
+
+for i=1,10 do
+gradUpdate(mlp,{x,y},-1,crit,0.01)
+print(mlp:forward({x,y})[1])
+end
+
+</file>
+
+===== L1HingeEmbeddingCriterion =====
+{{anchor:nn.L1HingeEmbeddingCriterion}}
+
+<file lua>
+criterion = L1HingeEmbeddingCriterion(margin)
+</file>
+
+Creates a criterion that measures the loss given an input
+''x'' = ''{x1,x2}'', a table of two tensors, and a label ''y'' (1 or -1):
+This is used for measuring whether two inputs are similar
+or dissimilar, using the L1 distance, and is typically used for
+learning nonlinear embeddings or semi-supervised learning.
+
+<verbatim>
+loss(x,y) = forward(x,y) = ||x1-x2||_1, if y=1
+= max(0,margin - ||x1-x2||_1), if y=-1
+</verbatim>
+
+The ''margin'' has a default value of 1, or can be set in the constructor:
+<file lua>
+criterion = L1HingeEmbeddingCriterion(marginValue)
+</file>
+
+===== CosineEmbeddingCriterion =====
+{{anchor:nn.CosineEmbeddingCriterion}}
+
+<file lua>
+criterion = nn.CosineEmbeddingCriterion(margin)
+</file>
+
+Creates a criterion that measures the loss given an input
+''x'' = ''{x1,x2}'', a table of two tensors, and a label ''y'' (1 or -1):
+This is used for measuring whether two inputs are similar
+or dissimilar, using the cosine distance, and is typically used for
+learning nonlinear embeddings or semi-supervised learning.
+
+''margin'' should be a number from -1 to 1, 0 to 0.5 is suggested.
+Forward and Backward have to be used alternately. If ''margin'' is missing, the default value is 0.
+
+The loss function is:
+<verbatim>
+loss(x,y) = forward(x,y) = 1-cos(x1, x2), if y=1
+= max(0,cos(x1, x2)-margin), if y=-1
+</verbatim>
+
+===== MarginRankingCriterion =====
+{{anchor:nn.MarginRankingCriterion}}
+
+<file lua>
+criterion = nn.MarginRankingCriterion(margin)
+</file>
+
+Creates a criterion that measures the loss given an input
+''x'' = ''{x1,x2}'', a table of two Tensors of size 1 (they contain only scalars),
+and a label ''y'' (1 or -1):
+
+If ''y'' = ''1'' then it assumed the first input should be ranked higher (have a larger value)
+than the second input, and vice-versa for ''y'' = ''-1''.
+
+The loss function is:
+<verbatim>
+loss(x,y) = forward(x,y) = max(0,-y*(x[1]-x[2])+margin)
+</verbatim>
+
+Example:
+<file lua>
+
+p1_mlp= nn.Linear(5,2)
+p2_mlp= p1_mlp:clone('weight','bias')
+
+prl=nn.ParallelTable()
+prl:add(p1_mlp)
+prl:add(p2_mlp)
+
+mlp1=nn.Sequential()
+mlp1:add(prl)
+mlp1:add(nn.DotProduct())
+
+mlp2=mlp1:clone('weight','bias')
+
+mlpa=nn.Sequential()
+prla=nn.ParallelTable()
+prla:add(mlp1)
+prla:add(mlp2)
+mlpa:add(prla)
+
+crit=nn.MarginRankingCriterion(0.1)
+
+x=lab.randn(5)
+y=lab.randn(5)
+z=lab.randn(5)
+
+
+-- Use a typical generic gradient update function
+function gradUpdate(mlp, x, y, criterion, learningRate)
+ local pred = mlp:forward(x)
+ local err = criterion:forward(pred, y)
+ local gradCriterion = criterion:backward(pred, y)
+ mlp:zeroGradParameters()
+ mlp:backward(x, gradCriterion)
+ mlp:updateParameters(learningRate)
+end
+
+for i=1,100 do
+ gradUpdate(mlpa,{{x,y},{x,z}},1,crit,0.01)
+ if true then
+ o1=mlp1:forward{x,y}[1];
+ o2=mlp2:forward{x,z}[1];
+ o=crit:forward(mlpa:forward{{x,y},{x,z}},1)
+ print(o1,o2,o)
+ end
+end
+
+print "--"
+
+for i=1,100 do
+ gradUpdate(mlpa,{{x,y},{x,z}},-1,crit,0.01)
+ if true then
+ o1=mlp1:forward{x,y}[1];
+ o2=mlp2:forward{x,z}[1];
+ o=crit:forward(mlpa:forward{{x,y},{x,z}},-1)
+ print(o1,o2,o)
+ end
+end
+</file>
+
+====== Training a neural network ======
+{{anchor:nn.traningneuralnet.dok}}
+
+Training a neural network is easy with a [[#nn.DoItYourself|simple ''for'' loop]].
+While doing your own loop provides great flexibility, you might
+want sometimes a quick way of training neural
+networks. [[#nn.StochasticGradient|StochasticGradient]], a simple class
+which does the job for you is provided as standard.
+
+===== StochasticGradient =====
+{{anchor:nn.StochasticGradient.dok}}
+
+''StochasticGradient'' is a high-level class for training [[#nn.Module|neural networks]], using a stochastic gradient
+algorithm. This class is [[..:torch:file#torch.file.serialization|serializable]].
+
+==== StochasticGradient(module, criterion) ====
+{{anchor:nn.StochasticGradient}}
+
+Create a ''StochasticGradient'' class, using the given [[#nn.Module|Module]] and [[#nn.Criterion|Criterion]].
+The class contains [[#nn.StochasticGradientParameters|several parameters]] you might want to set after initialization.
+
+==== train(dataset) ====
+{{anchor:nn.StochasticGradientTrain}}
+
+Train the module and criterion given in the
+[[#nn.StochasticGradient|constructor]] over ''dataset'', using the
+internal [[#nn.StochasticGradientParameters|parameters]].
+
+StochasticGradient expect as a ''dataset'' an object which implements the operator
+''dataset[index]'' and implements the method ''dataset:size()''. The ''size()'' methods
+returns the number of examples and ''dataset[i]'' has to return the i-th example.
+
+An ''example'' has to be an object which implements the operator
+''example[field]'', where ''field'' might take the value ''1'' (input features)
+or ''2'' (corresponding label which will be given to the criterion).
+The input is usually a Tensor (except if you use special kind of gradient modules,
+like [[#nn.TableLayers|table layers]]). The label type depends of the criterion.
+For example, the [[#nn.MSECriterion|MSECriterion]] expects a Tensor, but the
+[[#nn.ClassNLLCriterion|ClassNLLCriterion]] except a integer number (the class).
+
+Such a dataset is easily constructed by using Lua tables, but it could any ''C'' object
+for example, as long as required operators/methods are implemented.
+[[#nn.DoItStochasticGradient|See an example]].
+
+==== Parameters ====
+{{anchor:nn.StochasticGradientParameters}}
+
+''StochasticGradient'' has several field which have an impact on a call to [[#nn.StochasticGradientTrain|train()]].
+
+ * ''learningRate'': This is the learning rate used during training. The update of the parameters will be ''parameters = parameters - learningRate * parameters_gradient''. Default value is ''0.01''.
+ * ''learningRateDecay'': The learning rate decay. If non-zero, the learning rate (note: the field learningRate will not change value) will be computed after each iteration (pass over the dataset) with: ''current_learning_rate =learningRate / (1 + iteration * learningRateDecay)''
+ * ''maxIteration'': The maximum number of iteration (passes over the dataset). Default is ''25''.
+ * ''shuffleIndices'': Boolean which says if the examples will be randomly sampled or not. Default is ''true''. If ''false'', the examples will be taken in the order of the dataset.
+ * ''hookExample'': A possible hook function which will be called (if non-nil) during training after each example forwarded and backwarded through the network. The function takes ''(self, example)'' as parameters. Default is ''nil''.
+ * ''hookIteration'': A possible hook function which will be called (if non-nil) during training after a complete pass over the dataset. The function takes ''(self, iteration)'' as parameters. Default is ''nil''.
+
+===== Example of training using StochasticGradient =====
+{{anchor:nn.DoItStochasticGradient}}
+
+We show an example here on a classical XOR problem.
+
+**Dataset**
+
+We first need to create a dataset, following the conventions described in
+[[#nn.StochasticGradientTrain|StochasticGradient]].
+<file lua>
+require "lab"
+dataset={};
+function dataset:size() return 100 end -- 100 examples
+for i=1,dataset:size() do
+ local input = lab.randn(2); -- normally distributed example in 2d
+ local output = torch.Tensor(1);
+ if input[1]*input[2]>0 then -- calculate label for XOR function
+ output[1] = -1;
+ else
+ output[1] = 1
+ end
+ dataset[i] = {input, output}
+end
+</file>
+
+**Neural Network**
+
+We create a simple neural network with one hidden layer.
+<file lua>
+require "nn"
+mlp = nn.Sequential(); -- make a multi-layer perceptron
+inputs = 2; outputs = 1; HUs = 20; -- parameters
+mlp:add(nn.Linear(inputs, HUs))
+mlp:add(nn.Tanh())
+mlp:add(nn.Linear(HUs, outputs))
+</file>
+
+**Training**
+
+We choose the Mean Squared Error criterion and train the beast.
+<file lua>
+criterion = nn.MSECriterion()
+trainer = nn.StochasticGradient(mlp, criterion)
+trainer.learningRate = 0.01
+trainer:train(dataset)
+</file>
+
+**Test the network**
+
+<file lua>
+x = torch.Tensor(2)
+x[1] = 0.5; x[2] = 0.5; print(mlp:forward(x))
+x[1] = 0.5; x[2] = -0.5; print(mlp:forward(x))
+x[1] = -0.5; x[2] = 0.5; print(mlp:forward(x))
+x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x))
+</file>
+
+You should see something like:
+<file lua>
+> x = torch.Tensor(2)
+> x[1] = 0.5; x[2] = 0.5; print(mlp:forward(x))
+
+-0.3490
+[torch.Tensor of dimension 1]
+
+> x[1] = 0.5; x[2] = -0.5; print(mlp:forward(x))
+
+ 1.0561
+[torch.Tensor of dimension 1]
+
+> x[1] = -0.5; x[2] = 0.5; print(mlp:forward(x))
+
+ 0.8640
+[torch.Tensor of dimension 1]
+
+> x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x))
+
+-0.2941
+[torch.Tensor of dimension 1]
+</file>
+
+===== Example of manual training of a neural network =====
+{{anchor:nn.DoItYourself}}
+
+We show an example here on a classical XOR problem.
+
+**Neural Network**
+
+We create a simple neural network with one hidden layer.
+<file lua>
+require "nn"
+mlp = nn.Sequential(); -- make a multi-layer perceptron
+inputs = 2; outputs = 1; HUs = 20; -- parameters
+mlp:add(nn.Linear(inputs, HUs))
+mlp:add(nn.Tanh())
+mlp:add(nn.Linear(HUs, outputs))
+</file>
+
+**Loss function**
+
+We choose the Mean Squared Error criterion.
+<file lua>
+criterion = nn.MSECriterion()
+</file>
+
+**Training**
+
+We create data //on the fly// and feed it to the neural network.
+
+<file lua>
+require "lab"
+for i = 1,2500 do
+ -- random sample
+ local input= lab.randn(2); -- normally distributed example in 2d
+ local output= torch.Tensor(1);
+ if input[1]*input[2] > 0 then -- calculate label for XOR function
+ output[1] = -1
+ else
+ output[1] = 1
+ end
+
+ -- feed it to the neural network and the criterion
+ criterion:forward(mlp:forward(input), output)
+
+ -- train over this example in 3 steps
+ -- (1) zero the accumulation of the gradients
+ mlp:zeroGradParameters()
+ -- (2) accumulate gradients
+ mlp:backward(input, criterion:backward(mlp.output, output))
+ -- (3) update parameters with a 0.01 learning rate
+ mlp:updateParameters(0.01)
+end
+</file>
+
+**Test the network**
+
+<file lua>
+x = torch.Tensor(2)
+x[1] = 0.5; x[2] = 0.5; print(mlp:forward(x))
+x[1] = 0.5; x[2] = -0.5; print(mlp:forward(x))
+x[1] = -0.5; x[2] = 0.5; print(mlp:forward(x))
+x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x))
+</file>
+
+You should see something like:
+<file lua>
+> x = torch.Tensor(2)
+> x[1] = 0.5; x[2] = 0.5; print(mlp:forward(x))
+
+-0.6140
+[torch.Tensor of dimension 1]
+
+> x[1] = 0.5; x[2] = -0.5; print(mlp:forward(x))
+
+ 0.8878
+[torch.Tensor of dimension 1]
+
+> x[1] = -0.5; x[2] = 0.5; print(mlp:forward(x))
+
+ 0.8548
+[torch.Tensor of dimension 1]
+
+> x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x))
+
+-0.5498
+[torch.Tensor of dimension 1]
+</file>
diff --git a/dok/lena.jpg b/dok/lena.jpg
new file mode 100644
index 0000000..d4a8c36
--- /dev/null
+++ b/dok/lena.jpg
Binary files differ
diff --git a/dok/lenap.jpg b/dok/lenap.jpg
new file mode 100644
index 0000000..0e6916d
--- /dev/null
+++ b/dok/lenap.jpg
Binary files differ
diff --git a/dok/logsigmoid.png b/dok/logsigmoid.png
new file mode 100644
index 0000000..f632ed8
--- /dev/null
+++ b/dok/logsigmoid.png
Binary files differ
diff --git a/dok/logsoftmax.png b/dok/logsoftmax.png
new file mode 100644
index 0000000..dec5be5
--- /dev/null
+++ b/dok/logsoftmax.png
Binary files differ
diff --git a/dok/power.png b/dok/power.png
new file mode 100644
index 0000000..958eeb4
--- /dev/null
+++ b/dok/power.png
Binary files differ
diff --git a/dok/sigmmoid.png b/dok/sigmmoid.png
new file mode 100644
index 0000000..48aad7e
--- /dev/null
+++ b/dok/sigmmoid.png
Binary files differ
diff --git a/dok/sigmoid.png b/dok/sigmoid.png
new file mode 100644
index 0000000..48aad7e
--- /dev/null
+++ b/dok/sigmoid.png
Binary files differ
diff --git a/dok/softmax.png b/dok/softmax.png
new file mode 100644
index 0000000..29c5534
--- /dev/null
+++ b/dok/softmax.png
Binary files differ
diff --git a/dok/softmin.png b/dok/softmin.png
new file mode 100644
index 0000000..d1807a4
--- /dev/null
+++ b/dok/softmin.png
Binary files differ
diff --git a/dok/softplus.png b/dok/softplus.png
new file mode 100644
index 0000000..a5ee028
--- /dev/null
+++ b/dok/softplus.png
Binary files differ
diff --git a/dok/softsign.png b/dok/softsign.png
new file mode 100644
index 0000000..0805433
--- /dev/null
+++ b/dok/softsign.png
Binary files differ
diff --git a/dok/sqrt.png b/dok/sqrt.png
new file mode 100644
index 0000000..29b1d42
--- /dev/null
+++ b/dok/sqrt.png
Binary files differ
diff --git a/dok/square.png b/dok/square.png
new file mode 100644
index 0000000..c191eaf
--- /dev/null
+++ b/dok/square.png
Binary files differ
diff --git a/dok/sshrink.png b/dok/sshrink.png
new file mode 100644
index 0000000..99c5d11
--- /dev/null
+++ b/dok/sshrink.png
Binary files differ
diff --git a/dok/tanh.png b/dok/tanh.png
new file mode 100644
index 0000000..d2f77aa
--- /dev/null
+++ b/dok/tanh.png
Binary files differ
diff --git a/generic/Abs.c b/generic/Abs.c
new file mode 100644
index 0000000..8c65813
--- /dev/null
+++ b/generic/Abs.c
@@ -0,0 +1,43 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Abs.c"
+#else
+
+static int nn_(Abs_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(output, input);
+
+ TH_TENSOR_APPLY2(real, output, real, input, \
+ *output_data = fabs(*input_data);)
+ return 1;
+}
+
+static int nn_(Abs_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \
+ real z = *input_data; \
+ *gradInput_data = *gradOutput_data * (z >= 0 ? 1 : -1);)
+ return 1;
+}
+
+static const struct luaL_Reg nn_(Abs__) [] = {
+ {"Abs_updateOutput", nn_(Abs_updateOutput)},
+ {"Abs_updateGradInput", nn_(Abs_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(Abs_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(Abs__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/AbsCriterion.c b/generic/AbsCriterion.c
new file mode 100644
index 0000000..b9b948d
--- /dev/null
+++ b/generic/AbsCriterion.c
@@ -0,0 +1,54 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/AbsCriterion.c"
+#else
+
+static int nn_(AbsCriterion_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id));
+ int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+ real sum;
+
+ sum = 0;
+ TH_TENSOR_APPLY2(real, input, real, target,
+ sum += fabs(*input_data - *target_data);)
+
+ if(sizeAverage)
+ sum /= THTensor_(nElement)(input);
+
+ lua_pushnumber(L, sum);
+ lua_setfield(L, 1, "output");
+
+ lua_pushnumber(L, sum);
+ return 1;
+}
+
+static int nn_(AbsCriterion_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id));
+ int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+ real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.);
+
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
+ *gradInput_data = ( (*input_data - *target_data) >= 0 ? norm : -norm);)
+
+ return 1;
+}
+
+static const struct luaL_Reg nn_(AbsCriterion__) [] = {
+ {"AbsCriterion_updateOutput", nn_(AbsCriterion_updateOutput)},
+ {"AbsCriterion_updateGradInput", nn_(AbsCriterion_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(AbsCriterion_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(AbsCriterion__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Exp.c b/generic/Exp.c
new file mode 100644
index 0000000..b56f379
--- /dev/null
+++ b/generic/Exp.c
@@ -0,0 +1,43 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Exp.c"
+#else
+
+static int nn_(Exp_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(output, input);
+
+ TH_TENSOR_APPLY2(real, output, real, input, \
+ *output_data = exp(*input_data);)
+
+ return 1;
+}
+
+static int nn_(Exp_updateGradInput)(lua_State *L)
+{
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(gradInput, output);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \
+ *gradInput_data = *gradOutput_data * *output_data;);
+ return 1;
+}
+
+static const struct luaL_Reg nn_(Exp__) [] = {
+ {"Exp_updateOutput", nn_(Exp_updateOutput)},
+ {"Exp_updateGradInput", nn_(Exp_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(Exp_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(Exp__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/HardShrink.c b/generic/HardShrink.c
new file mode 100644
index 0000000..be98ddc
--- /dev/null
+++ b/generic/HardShrink.c
@@ -0,0 +1,50 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/HardShrink.c"
+#else
+
+static int nn_(HardShrink_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ real lambda = luaT_getfieldchecknumber(L, 1, "lambda");
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(output, input);
+
+ TH_TENSOR_APPLY2(real, output, real, input, \
+ if ((*input_data) > lambda) *output_data = *input_data; \
+ else if ((*input_data) < -lambda) *output_data = *input_data; \
+ else *output_data = 0;);
+ return 1;
+}
+
+static int nn_(HardShrink_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ real lambda = luaT_getfieldchecknumber(L, 1, "lambda");
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \
+ if ((*input_data) > lambda || (*input_data) < -lambda) \
+ *gradInput_data = (*gradOutput_data); \
+ else \
+ *gradInput_data = 0; \
+ );
+ return 1;
+}
+
+static const struct luaL_Reg nn_(HardShrink__) [] = {
+ {"HardShrink_updateOutput", nn_(HardShrink_updateOutput)},
+ {"HardShrink_updateGradInput", nn_(HardShrink_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(HardShrink_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(HardShrink__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/HardTanh.c b/generic/HardTanh.c
new file mode 100644
index 0000000..3764095
--- /dev/null
+++ b/generic/HardTanh.c
@@ -0,0 +1,50 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/HardTanh.c"
+#else
+
+static int nn_(HardTanh_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(output, input);
+
+ TH_TENSOR_APPLY2(real, output, real, input, \
+ if(*input_data < -1) \
+ *output_data = -1; \
+ else if(*input_data <= 1) \
+ *output_data = *input_data; \
+ else \
+ *output_data = 1;)
+ return 1;
+}
+
+static int nn_(HardTanh_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \
+ if(*input_data < -1 || *input_data > 1) \
+ *gradInput_data = 0; \
+ else \
+ *gradInput_data = *gradOutput_data;);
+ return 1;
+}
+
+static const struct luaL_Reg nn_(HardTanh__) [] = {
+ {"HardTanh_updateOutput", nn_(HardTanh_updateOutput)},
+ {"HardTanh_updateGradInput", nn_(HardTanh_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(HardTanh_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(HardTanh__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/LogSigmoid.c b/generic/LogSigmoid.c
new file mode 100644
index 0000000..b5bdae4
--- /dev/null
+++ b/generic/LogSigmoid.c
@@ -0,0 +1,49 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/LogSigmoid.c"
+#else
+
+static int nn_(LogSigmoid_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *buffer = luaT_getfieldcheckudata(L, 1, "buffer", torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(output, input);
+ THTensor_(resizeAs)(buffer, input);
+
+ TH_TENSOR_APPLY3(real, output, real, input, real, buffer, \
+ real z = exp(-*input_data); \
+ *buffer_data = z; \
+ *output_data = -log(1. + z);)
+
+ return 1;
+}
+
+static int nn_(LogSigmoid_updateGradInput)(lua_State *L)
+{
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THTensor *buffer = luaT_getfieldcheckudata(L, 1, "buffer", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(gradInput, buffer);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, buffer, \
+ real z = *buffer_data; \
+ *gradInput_data = *gradOutput_data * z / (1. + z);)
+
+ return 1;
+}
+
+static const struct luaL_Reg nn_(LogSigmoid__) [] = {
+ {"LogSigmoid_updateOutput", nn_(LogSigmoid_updateOutput)},
+ {"LogSigmoid_updateGradInput", nn_(LogSigmoid_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(LogSigmoid_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(LogSigmoid__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/LogSoftMax.c b/generic/LogSoftMax.c
new file mode 100644
index 0000000..5d4dbfc
--- /dev/null
+++ b/generic/LogSoftMax.c
@@ -0,0 +1,111 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/LogSoftMax.c"
+#else
+
+static int nn_(LogSoftMax_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+ real *input_data, *output_data;
+ long nframe = 0, dim = 0;
+ long t, d;
+
+ if(input->nDimension == 1)
+ {
+ nframe = 1;
+ dim = input->size[0];
+ }
+ else if(input->nDimension == 2)
+ {
+ nframe = input->size[0];
+ dim = input->size[1];
+ }
+ else
+ THArgCheck(0, 2, "vector or matrix expected");
+
+ input = THTensor_(newContiguous)(input);
+ THTensor_(resizeAs)(output, input);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ for(t = 0; t < nframe; t++)
+ {
+ accreal logsum = 0;
+ real maxInput = -THInf;
+
+ for(d = 0; d < dim; d++)
+ maxInput = THMax(maxInput, input_data[d]);
+
+ for(d = 0; d < dim; d++)
+ logsum += THExpMinusApprox(maxInput-input_data[d]);
+ logsum = maxInput + log(logsum);
+
+ for(d = 0; d < dim; d++)
+ output_data[d] = input_data[d] - logsum;
+
+ input_data += dim;
+ output_data += dim;
+ }
+
+ THTensor_(free)(input);
+
+ return 1;
+}
+
+static int nn_(LogSoftMax_updateGradInput)(lua_State *L)
+{
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+ real *gradInput_data, *gradOutput_data, *output_data;
+ long nframe = 0, dim = 0;
+ long t, d;
+
+ if(output->nDimension == 1)
+ {
+ nframe = 1;
+ dim = output->size[0];
+ }
+ else if(output->nDimension == 2)
+ {
+ nframe = output->size[0];
+ dim = output->size[1];
+ }
+ else
+ THError("vector or matrix expected");
+
+ THTensor_(resizeAs)(gradInput, output);
+ gradInput_data = THTensor_(data)(gradInput);
+ output_data = THTensor_(data)(output);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ for(t = 0; t < nframe; t++)
+ {
+ accreal sum = 0;
+ for(d = 0; d < dim; d++)
+ sum += gradOutput_data[d];
+
+ for(d = 0; d < dim; d++)
+ gradInput_data[d] = gradOutput_data[d] - exp(output_data[d])*sum;
+
+ gradInput_data += dim;
+ output_data += dim;
+ gradOutput_data += dim;
+ }
+
+ return 1;
+}
+
+static const struct luaL_Reg nn_(LogSoftMax__) [] = {
+ {"LogSoftMax_updateOutput", nn_(LogSoftMax_updateOutput)},
+ {"LogSoftMax_updateGradInput", nn_(LogSoftMax_updateGradInput)},
+ {NULL, NULL}
+};
+
+void nn_(LogSoftMax_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(LogSoftMax__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/MSECriterion.c b/generic/MSECriterion.c
new file mode 100644
index 0000000..c53735c
--- /dev/null
+++ b/generic/MSECriterion.c
@@ -0,0 +1,54 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/MSECriterion.c"
+#else
+
+static int nn_(MSECriterion_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id));
+ int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+ real sum;
+
+ sum = 0;
+ TH_TENSOR_APPLY2(real, input, real, target,
+ real z = (*input_data - *target_data);
+ sum += z*z;)
+
+ if(sizeAverage)
+ sum /= THTensor_(nElement)(input);
+
+ lua_pushnumber(L, sum);
+ lua_setfield(L, 1, "output");
+
+ lua_pushnumber(L, sum);
+ return 1;
+}
+
+static int nn_(MSECriterion_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id));
+ int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+ real norm = (sizeAverage ? 2./((real)THTensor_(nElement)(input)) : 2.);
+
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, input, real, target,
+ *gradInput_data = norm * (*input_data - *target_data);)
+ return 1;
+}
+
+static const struct luaL_Reg nn_(MSECriterion__) [] = {
+ {"MSECriterion_updateOutput", nn_(MSECriterion_updateOutput)},
+ {"MSECriterion_updateGradInput", nn_(MSECriterion_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(MSECriterion_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(MSECriterion__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Max.c b/generic/Max.c
new file mode 100644
index 0000000..87f52f1
--- /dev/null
+++ b/generic/Max.c
@@ -0,0 +1,100 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Max.c"
+#else
+
+static int nn_(Max_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1;
+ THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THLongStorage *dim;
+ long i;
+
+ luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range");
+
+ dim = THLongStorage_newWithSize(input->nDimension);
+ for(i = 0; i < input->nDimension; i++)
+ dim->data[i] = input->size[i];
+ dim->data[dimension] = 1;
+ THTensor_(resize)(output, dim, NULL);
+ THTensor_(resize)(indices, dim, NULL);
+ THLongStorage_free(dim);
+
+ TH_TENSOR_DIM_APPLY3(real, output, real, input, real, indices, dimension,
+ long theIndex = 0;
+ real theMax = input_data[0];
+ for(i = 1; i < input_size; i++)
+ {
+ if(input_data[i*input_stride] > theMax)
+ {
+ theIndex = i;
+ theMax = input_data[i*input_stride];
+ }
+ }
+ *indices_data = theIndex+1;
+ *output_data = theMax;)
+
+ THTensor_(select)(output, NULL, dimension, 0);
+
+ return 1;
+}
+
+static int nn_(Max_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+ int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1;
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor *gradOutputPlusOneDim;
+ THLongStorage *dim, *str;
+ int i, j;
+
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ dim = THLongStorage_newWithSize(gradOutput->nDimension+1);
+ str = THLongStorage_newWithSize(gradOutput->nDimension+1);
+ for(i = 0, j = 0; j < gradOutput->nDimension+1; j++)
+ {
+ if(j == dimension)
+ {
+ dim->data[j] = input->size[dimension];
+ str->data[j] = 0;
+ continue;
+ }
+
+ dim->data[j] = gradOutput->size[i];
+ str->data[j] = gradOutput->stride[i];
+ i++;
+ }
+
+ gradOutputPlusOneDim = THTensor_(newWithStorage)(gradOutput->storage, gradOutput->storageOffset, dim, str);
+ THLongStorage_free(dim);
+ THLongStorage_free(str);
+
+ TH_TENSOR_DIM_APPLY3(real, gradInput, real, gradOutputPlusOneDim, real, indices, dimension,
+ gradInput_data[ ((long)(*indices_data)-1)*gradInput_stride ] = *gradOutputPlusOneDim_data;)
+
+ THTensor_(free)(gradOutputPlusOneDim);
+
+ return 1;
+}
+
+static const struct luaL_Reg nn_(Max__) [] = {
+ {"Max_updateOutput", nn_(Max_updateOutput)},
+ {"Max_updateGradInput", nn_(Max_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(Max_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(Max__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Min.c b/generic/Min.c
new file mode 100644
index 0000000..d3309df
--- /dev/null
+++ b/generic/Min.c
@@ -0,0 +1,100 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Min.c"
+#else
+
+static int nn_(Min_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1;
+ THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THLongStorage *dim;
+ long i;
+
+ luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range");
+
+ dim = THLongStorage_newWithSize(input->nDimension);
+ for(i = 0; i < input->nDimension; i++)
+ dim->data[i] = input->size[i];
+ dim->data[dimension] = 1;
+ THTensor_(resize)(output, dim, NULL);
+ THTensor_(resize)(indices, dim, NULL);
+ THLongStorage_free(dim);
+
+ TH_TENSOR_DIM_APPLY3(real, output, real, input, real, indices, dimension,
+ long theIndex = 0;
+ real theMin = input_data[0];
+ for(i = 1; i < input_size; i++)
+ {
+ if(input_data[i*input_stride] < theMin)
+ {
+ theIndex = i;
+ theMin = input_data[i*input_stride];
+ }
+ }
+ *indices_data = theIndex+1;
+ *output_data = theMin;)
+
+ THTensor_(select)(output, NULL, dimension, 0);
+
+ return 1;
+}
+
+static int nn_(Min_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+ int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1;
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor *gradOutputPlusOneDim;
+ THLongStorage *dim, *str;
+ int i, j;
+
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ dim = THLongStorage_newWithSize(gradOutput->nDimension+1);
+ str = THLongStorage_newWithSize(gradOutput->nDimension+1);
+ for(i = 0, j = 0; j < gradOutput->nDimension+1; j++)
+ {
+ if(j == dimension)
+ {
+ dim->data[j] = input->size[dimension];
+ str->data[j] = 0;
+ continue;
+ }
+
+ dim->data[j] = gradOutput->size[i];
+ str->data[j] = gradOutput->stride[i];
+ i++;
+ }
+
+ gradOutputPlusOneDim = THTensor_(newWithStorage)(gradOutput->storage, gradOutput->storageOffset, dim, str);
+ THLongStorage_free(dim);
+ THLongStorage_free(str);
+
+ TH_TENSOR_DIM_APPLY3(real, gradInput, real, gradOutputPlusOneDim, real, indices, dimension,
+ gradInput_data[ ((long)(*indices_data)-1)*gradInput_stride ] = *gradOutputPlusOneDim_data;)
+
+ THTensor_(free)(gradOutputPlusOneDim);
+
+ return 1;
+}
+
+static const struct luaL_Reg nn_(Min__) [] = {
+ {"Min_updateOutput", nn_(Min_updateOutput)},
+ {"Min_updateGradInput", nn_(Min_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(Min_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(Min__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/MultiLabelMarginCriterion.c b/generic/MultiLabelMarginCriterion.c
new file mode 100644
index 0000000..f4c3914
--- /dev/null
+++ b/generic/MultiLabelMarginCriterion.c
@@ -0,0 +1,185 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/MultiLabelMarginCriterion.c"
+#else
+
+static int nn_(MultiLabelMarginCriterion_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+ real *input_data, *target_data;
+ long nframe, dim;
+ long t, d, dt, ddt;
+ THTensor *target;
+ real sum;
+
+ THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
+
+ if(input->nDimension == 1)
+ {
+ nframe = 1;
+ dim = input->size[0];
+ target = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, "inconsistent target size");
+ }
+ else
+ {
+ nframe = input->size[0];
+ dim = input->size[1];
+ target = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) && (target->size[1] == dim), 3, "inconsistent target size");
+ }
+
+ THArgCheck(THTensor_(minall)(target) >= 0, 3, "target out of range");
+ THArgCheck(THTensor_(maxall)(target) <= dim, 3, "target out of range");
+
+ target = THTensor_(newContiguous)(target);
+ input = THTensor_(newContiguous)(input);
+ input_data = THTensor_(data)(input);
+ target_data = THTensor_(data)(target);
+
+ sum = 0;
+ for(t = 0; t < nframe; t++)
+ {
+ for(dt = 0; dt < dim; dt++)
+ {
+ long target_idx = (long)target_data[dt]-1;
+ real input_target;
+ if(target_idx < 0)
+ break;
+
+ input_target = input_data[target_idx];
+ for(d = 0; d < dim; d++)
+ {
+ int istarget = 0;
+ for(ddt = 0; ddt < dim; ddt++)
+ {
+ if(!target_data[ddt])
+ break;
+ if(((long)target_data[ddt])-1 == d)
+ istarget = 1;
+ }
+
+ if(!istarget)
+ {
+ real z = 1 - input_target + input_data[d];
+ if(z > 0)
+ sum += z;
+ }
+ }
+ }
+ input_data += dim;
+ target_data += dim;
+ }
+
+ if(sizeAverage)
+ sum /= dim;
+
+ lua_pushnumber(L, sum);
+ lua_setfield(L, 1, "output");
+
+ THTensor_(free)(input);
+ THTensor_(free)(target);
+ lua_pushnumber(L, sum);
+ return 1;
+}
+
+static int nn_(MultiLabelMarginCriterion_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+ real *input_data;
+ real *gradInput_data;
+ real *target_data;
+ long nframe, dim;
+ long t, d, dt, ddt;
+ THTensor *target;
+ real g;
+
+ THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
+
+ if(input->nDimension == 1)
+ {
+ nframe = 1;
+ dim = input->size[0];
+ target = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, "inconsistent target size");
+ }
+ else
+ {
+ nframe = input->size[0];
+ dim = input->size[1];
+ target = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) && (target->size[1] == dim), 3, "inconsistent target size");
+ }
+
+ THArgCheck(THTensor_(minall)(target) >= 0, 3, "target out of range");
+ THArgCheck(THTensor_(maxall)(target) <= dim, 3, "target out of range");
+
+ target = THTensor_(newContiguous)(target);
+ input = THTensor_(newContiguous)(input);
+ input_data = THTensor_(data)(input);
+ target_data = THTensor_(data)(target);
+
+ g = (sizeAverage ? 1./((real)dim) : 1.);
+
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+ gradInput_data = THTensor_(data)(gradInput);
+
+ for(t = 0; t < nframe; t++)
+ {
+ for(dt = 0; dt < dim; dt++)
+ {
+ long target_idx = (long)target_data[dt]-1;
+ real input_target;
+ if(target_idx < 0)
+ break;
+
+ input_target = input_data[target_idx];
+ for(d = 0; d < dim; d++)
+ {
+ int istarget = 0;
+ for(ddt = 0; ddt < dim; ddt++)
+ {
+ if(!target_data[ddt])
+ break;
+ if(((long)target_data[ddt])-1 == d)
+ istarget = 1;
+ }
+
+ if(!istarget)
+ {
+ real z = 1 - input_target + input_data[d];
+ if(z > 0)
+ {
+ gradInput_data[target_idx] -= g;
+ gradInput_data[d] += g;
+ }
+ }
+ }
+ }
+ input_data += dim;
+ target_data += dim;
+ gradInput_data += dim;
+ }
+
+ THTensor_(free)(input);
+ THTensor_(free)(target);
+ return 1;
+}
+
+static const struct luaL_Reg nn_(MultiLabelMarginCriterion__) [] = {
+ {"MultiLabelMarginCriterion_updateOutput", nn_(MultiLabelMarginCriterion_updateOutput)},
+ {"MultiLabelMarginCriterion_updateGradInput", nn_(MultiLabelMarginCriterion_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(MultiLabelMarginCriterion_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(MultiLabelMarginCriterion__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/MultiMarginCriterion.c b/generic/MultiMarginCriterion.c
new file mode 100644
index 0000000..ca73bc9
--- /dev/null
+++ b/generic/MultiMarginCriterion.c
@@ -0,0 +1,162 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/MultiMarginCriterion.c"
+#else
+
+static int nn_(MultiMarginCriterion_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+ real *input_data, *target_data;
+ long nframe, dim;
+ long t, d;
+ real target_;
+ THTensor *target;
+ real sum;
+
+ THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
+
+ if(input->nDimension == 1)
+ {
+ nframe = 1;
+ dim = input->size[0];
+ target_ = luaL_checknumber(L, 3);
+ target = THTensor_(newWithSize1d)(1);
+ THTensor_(fill)(target, target_);
+ }
+ else
+ {
+ nframe = input->size[0];
+ dim = input->size[1];
+ target = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, "inconsistent target size");
+ target = THTensor_(newContiguous)(target);
+ }
+
+ for(t = 0; t < nframe; t++)
+ {
+ real idx = THTensor_(get1d)(target, t);
+ THArgCheck((idx >= 1) && (idx <= dim), 3, "target out of range");
+ }
+
+ input = THTensor_(newContiguous)(input);
+ input_data = THTensor_(data)(input);
+ target_data = THTensor_(data)(target);
+
+ sum = 0;
+ for(t = 0; t < nframe; t++)
+ {
+ long target_idx = (long)(target_data[t]-1);
+ real input_target = input_data[target_idx];
+ for(d = 0; d < dim; d++)
+ {
+ real z = 1 - input_target + input_data[d];
+ if(d == target_idx)
+ continue;
+
+ if(z > 0)
+ sum += z;
+ }
+ input_data += dim;
+ }
+
+ if(sizeAverage)
+ sum /= dim;
+
+ lua_pushnumber(L, sum);
+ lua_setfield(L, 1, "output");
+
+ THTensor_(free)(input);
+ THTensor_(free)(target);
+ lua_pushnumber(L, sum);
+ return 1;
+}
+
+static int nn_(MultiMarginCriterion_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage");
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+ real *input_data;
+ real *gradInput_data;
+ real *target_data;
+ THTensor *target;
+ long nframe, dim;
+ long t, d;
+ real target_;
+ real g;
+ real sum;
+
+ THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected");
+
+ if(input->nDimension == 1)
+ {
+ nframe = 1;
+ dim = input->size[0];
+ target_ = luaL_checknumber(L, 3);
+ target = THTensor_(newWithSize1d)(1);
+ THTensor_(fill)(target, target_);
+ }
+ else
+ {
+ nframe = input->size[0];
+ dim = input->size[1];
+ target = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, "inconsistent target size");
+ target = THTensor_(newContiguous)(target);
+ }
+
+ g = (sizeAverage ? 1./((real)dim) : 1.);
+
+ input = THTensor_(newContiguous)(input);
+ input_data = THTensor_(data)(input);
+
+ THTensor_(resizeAs)(gradInput, input);
+ gradInput_data = THTensor_(data)(gradInput);
+
+ target_data = THTensor_(data)(target);
+
+ for(t = 0; t < nframe; t++)
+ {
+ long target_idx = (long)(target_data[t])-1;
+ real input_target = input_data[target_idx];
+ real gradInput_target = 0;
+ for(d = 0; d < dim; d++)
+ {
+ real z = 1 - input_target + input_data[d];
+ if(d == target_idx)
+ continue;
+
+ if(z > 0)
+ {
+ gradInput_target -= g;
+ gradInput_data[d] = g;
+ }
+ else
+ gradInput_data[d] = 0;
+ }
+ gradInput_data[target_idx] = gradInput_target;
+
+ input_data += dim;
+ gradInput_data += dim;
+ }
+
+
+ THTensor_(free)(input);
+ THTensor_(free)(target);
+ return 1;
+}
+
+static const struct luaL_Reg nn_(MultiMarginCriterion__) [] = {
+ {"MultiMarginCriterion_updateOutput", nn_(MultiMarginCriterion_updateOutput)},
+ {"MultiMarginCriterion_updateGradInput", nn_(MultiMarginCriterion_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(MultiMarginCriterion_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(MultiMarginCriterion__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Sigmoid.c b/generic/Sigmoid.c
new file mode 100644
index 0000000..20348b9
--- /dev/null
+++ b/generic/Sigmoid.c
@@ -0,0 +1,44 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Sigmoid.c"
+#else
+
+static int nn_(Sigmoid_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(output, input);
+
+ TH_TENSOR_APPLY2(real, output, real, input, \
+ *output_data = 1./(1.+ exp(- *input_data));)
+
+ return 1;
+}
+
+static int nn_(Sigmoid_updateGradInput)(lua_State *L)
+{
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(gradInput, output);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \
+ real z = *output_data; \
+ *gradInput_data = *gradOutput_data * (1. - z) * z;)
+ return 1;
+}
+
+static const struct luaL_Reg nn_(Sigmoid__) [] = {
+ {"Sigmoid_updateOutput", nn_(Sigmoid_updateOutput)},
+ {"Sigmoid_updateGradInput", nn_(Sigmoid_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(Sigmoid_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(Sigmoid__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SoftMax.c b/generic/SoftMax.c
new file mode 100644
index 0000000..3aaae65
--- /dev/null
+++ b/generic/SoftMax.c
@@ -0,0 +1,114 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SoftMax.c"
+#else
+
+static int nn_(SoftMax_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+ real *input_data, *output_data;
+ long nframe = 0, dim = 0;
+ long t, d;
+
+ if(input->nDimension == 1)
+ {
+ nframe = 1;
+ dim = input->size[0];
+ }
+ else if(input->nDimension == 2)
+ {
+ nframe = input->size[0];
+ dim = input->size[1];
+ }
+ else
+ THArgCheck(0, 2, "vector or matrix expected");
+
+ input = THTensor_(newContiguous)(input);
+ THTensor_(resizeAs)(output, input);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ for(t = 0; t < nframe; t++)
+ {
+ real inputMax = -THInf;
+ for(d = 0; d < dim; d++) {
+ if (input_data[d] >= inputMax) inputMax = input_data[d];
+ }
+
+ accreal sum = 0;
+ for(d = 0; d < dim; d++) {
+ real z = THExpMinusApprox(inputMax - input_data[d]);
+ output_data[d] = z;
+ sum += z;
+ }
+
+ for(d = 0; d < dim; d++) {
+ output_data[d] *= 1/sum;
+ }
+
+ input_data += dim;
+ output_data += dim;
+ }
+
+ THTensor_(free)(input);
+
+ return 1;
+}
+
+static int nn_(SoftMax_updateGradInput)(lua_State *L)
+{
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+ real *gradInput_data, *gradOutput_data, *output_data;
+ long nframe = 0, dim = 0;
+ long t, d;
+
+ if(output->nDimension == 1)
+ {
+ nframe = 1;
+ dim = output->size[0];
+ }
+ else if(output->nDimension == 2)
+ {
+ nframe = output->size[0];
+ dim = output->size[1];
+ }
+ else
+ THError("vector or matrix expected");
+
+ THTensor_(resizeAs)(gradInput, output);
+ gradInput_data = THTensor_(data)(gradInput);
+ output_data = THTensor_(data)(output);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ for(t = 0; t < nframe; t++)
+ {
+ accreal sum = 0;
+ for(d = 0; d < dim; d++)
+ sum += (accreal)gradOutput_data[d] * output_data[d];
+
+ for(d = 0; d < dim; d++)
+ gradInput_data[d] = output_data[d] * (gradOutput_data[d] - sum);
+
+ gradInput_data += dim;
+ output_data += dim;
+ gradOutput_data += dim;
+ }
+
+ return 1;
+}
+
+static const struct luaL_Reg nn_(SoftMax__) [] = {
+ {"SoftMax_updateOutput", nn_(SoftMax_updateOutput)},
+ {"SoftMax_updateGradInput", nn_(SoftMax_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(SoftMax_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(SoftMax__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SoftPlus.c b/generic/SoftPlus.c
new file mode 100644
index 0000000..7a097fb
--- /dev/null
+++ b/generic/SoftPlus.c
@@ -0,0 +1,44 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SoftPlus.c"
+#else
+
+static int nn_(SoftPlus_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(output, input);
+
+ TH_TENSOR_APPLY2(real, output, real, input, \
+ *output_data = log1p(exp(*input_data));)
+
+ return 1;
+}
+
+static int nn_(SoftPlus_updateGradInput)(lua_State *L)
+{
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(gradInput, output);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \
+ real z = exp(*output_data); \
+ *gradInput_data = *gradOutput_data * (z - 1.)/z;)
+ return 1;
+}
+
+static const struct luaL_Reg nn_(SoftPlus__) [] = {
+ {"SoftPlus_updateOutput", nn_(SoftPlus_updateOutput)},
+ {"SoftPlus_updateGradInput", nn_(SoftPlus_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(SoftPlus_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(SoftPlus__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SoftShrink.c b/generic/SoftShrink.c
new file mode 100644
index 0000000..0bc4075
--- /dev/null
+++ b/generic/SoftShrink.c
@@ -0,0 +1,50 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SoftShrink.c"
+#else
+
+static int nn_(SoftShrink_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ real lambda = luaT_getfieldchecknumber(L, 1, "lambda");
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(output, input);
+
+ TH_TENSOR_APPLY2(real, output, real, input, \
+ if ((*input_data) > lambda) *output_data = *input_data - lambda; \
+ else if ((*input_data) < -lambda) *output_data = *input_data + lambda; \
+ else *output_data = 0;);
+ return 1;
+}
+
+static int nn_(SoftShrink_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ real lambda = luaT_getfieldchecknumber(L, 1, "lambda");
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \
+ if ((*input_data) > lambda || (*input_data) < -lambda) \
+ *gradInput_data = (*gradOutput_data); \
+ else \
+ *gradInput_data = 0; \
+ );
+ return 1;
+}
+
+static const struct luaL_Reg nn_(SoftShrink__) [] = {
+ {"SoftShrink_updateOutput", nn_(SoftShrink_updateOutput)},
+ {"SoftShrink_updateGradInput", nn_(SoftShrink_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(SoftShrink_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(SoftShrink__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SparseLinear.c b/generic/SparseLinear.c
new file mode 100644
index 0000000..d29a1aa
--- /dev/null
+++ b/generic/SparseLinear.c
@@ -0,0 +1,130 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SparseLinear.c"
+#else
+
+static int nn_(SparseLinear_updateOutput)(lua_State *L)
+{
+ long i;
+ THTensor * input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+ THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+ long dim = weight->size[0]; /* number of weights.. */
+
+ THTensor_(copy)(output, bias);
+ for(i = 0; i < input->size[1]; i++)
+ {
+ long offset = (long)(THTensor_(get2d)(input, 0, i))-1;
+
+ if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */
+ {
+ real val = THTensor_(get2d)(input, 1, i);
+ THBlas_(axpy)(output->size[0],
+ val,
+ THTensor_(data)(weight)+offset*weight->stride[0],
+ weight->stride[1],
+ THTensor_(data)(output),
+ output->stride[0]);
+ }
+ else
+ luaL_error(L, "index out of bound");
+ }
+ return 1;
+}
+
+static int nn_(SparseLinear_accGradParameters)(lua_State *L)
+{
+ long i;
+ THTensor * input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor * gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ real scale = luaL_optnumber(L, 4, 1);
+ THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+ THTensor * gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+ THTensor * gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+ THTensor * lastInput = luaT_getfieldcheckudata(L, 1, "lastInput", torch_(Tensor_id));
+ real weightDecay = luaT_getfieldchecknumber(L, 1, "weightDecay");
+ long dim = gradWeight->size[0]; /* number of weights.. */
+
+ for(i = 0; i < input->size[1]; i++)
+ {
+ long offset = (long)(THTensor_(get2d)(input, 0, i))-1;
+
+ if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */
+ {
+ real val = scale*THTensor_(get2d)(input, 1, i);
+ THBlas_(scal)(gradOutput->size[0],
+ 0,
+ THTensor_(data)(gradWeight)+offset*gradWeight->stride[0],
+ gradWeight->stride[1]); /* zero */
+
+ THBlas_(axpy)(gradOutput->size[0],
+ val,
+ THTensor_(data)(gradOutput),
+ gradOutput->stride[0],
+ THTensor_(data)(gradWeight)+offset*gradWeight->stride[0],
+ gradWeight->stride[1]);
+ }
+ else
+ luaL_error(L, "index out of bound");
+ }
+
+ THTensor_(cadd)(gradBias, gradBias, 1, gradOutput);
+
+ if(weightDecay != 0)
+ THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight);
+
+ THTensor_(resizeAs)(lastInput, input);
+ THTensor_(copy)(lastInput, input);
+
+ return 0;
+}
+
+int nn_(SparseLinear_updateParameters)(lua_State *L)
+{
+ long i;
+ real learningRate = luaL_checknumber(L, 2);
+ THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+ THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+ THTensor * gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+ THTensor * gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+ THTensor * lastInput = luaT_getfieldcheckudata(L, 1, "lastInput", torch_(Tensor_id));
+ real weightDecay = luaT_getfieldchecknumber(L, 1, "weightDecay");
+
+ long dim = weight->size[0]; /* number of weights.. */
+ THTensor_(cadd)(bias, bias, -learningRate, gradBias);
+
+ for(i = 0; i < lastInput->size[1]; i++)
+ {
+ long offset = (long)(THTensor_(get2d)(lastInput, 0, i))-1;
+
+ if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */
+ {
+ THBlas_(axpy)(bias->size[0],
+ -learningRate,
+ THTensor_(data)(gradWeight)+offset*gradWeight->stride[0],
+ gradWeight->stride[1],
+ THTensor_(data)(weight)+offset*weight->stride[0],
+ weight->stride[1]);
+ }
+ else
+ luaL_error(L, "index out of bound");
+ }
+ return 0;
+}
+
+static const struct luaL_Reg nn_(SparseLinear__) [] = {
+ {"SparseLinear_updateOutput", nn_(SparseLinear_updateOutput)},
+ {"SparseLinear_updateParameters", nn_(SparseLinear_updateParameters)},
+ {NULL, NULL}
+};
+
+void nn_(SparseLinear_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(SparseLinear__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SpatialConvolution.c b/generic/SpatialConvolution.c
new file mode 100644
index 0000000..de0de1d
--- /dev/null
+++ b/generic/SpatialConvolution.c
@@ -0,0 +1,201 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialConvolution.c"
+#else
+
+static void nn_(convolution_updateOutput_)(THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, int dH, int dW)
+{
+ /* add bias */
+ long i;
+ THTensor *outn = THTensor_(new)();
+ for (i=0; i<bias->size[0]; i++) {
+ THTensor_(select)(outn,output,0,i);
+ THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
+ }
+ THTensor_(free)(outn);
+
+ /* do convolutions */
+ THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "V","X");
+}
+
+static int nn_(SpatialConvolution_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
+
+ int dimw = 2;
+ int dimh = 1;
+ if (input->nDimension == 4) {
+ dimw++;
+ dimh++;
+ }
+
+ long nOutputPlane = weight->size[0];
+ long nInputPlane = weight->size[1];
+ long kW = weight->size[3];
+ long kH = weight->size[2];
+ long inputWidth = input->size[dimw];
+ long inputHeight = input->size[dimh];
+ long outputWidth = (inputWidth - kW) / dW + 1;
+ long outputHeight = (inputHeight - kH) / dH + 1;
+
+ if (input->nDimension == 3)
+ {
+ THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
+/* printf("\n*************\nstochastic\n"); */
+/* printf("no=%d\n",output->nDimension); */
+/* printf("no=%ld,%ld,%ld\n",nOutputPlane,outputHeight,outputWidth); */
+/* printf("ni=%d\n",input->nDimension); */
+ nn_(convolution_updateOutput_)(input,output,weight,bias,dH,dW);
+/* printf("stochastic\n");*/
+ }
+ else
+ {
+ THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth);
+ THTensor *outn = THTensor_(new)();
+ THTensor *inpn = THTensor_(new)();
+ long i;
+ for (i=0; i<input->size[0]; i++)
+ {
+ THTensor_(select)(outn,output,0,i);
+ THTensor_(select)(inpn,input,0,i);
+ nn_(convolution_updateOutput_)(inpn,outn,weight,bias,dH,dW);
+ }
+ THTensor_(free)(outn);
+ THTensor_(free)(inpn);
+ }
+
+/* /\* add bias *\/ */
+/* long i; */
+/* THTensor *outn = THTensor_(new)(); */
+/* for (i=0; i<bias->size[0]; i++) { */
+/* THTensor_(select)(outn,output,0,i); */
+/* THTensor_(fill)(outn, THTensor_(get1d)(bias, i)); */
+/* } */
+/* THTensor_(free)(outn); */
+
+/* /\* do convolutions *\/ */
+/* THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "vx"); */
+
+ return 1;
+}
+
+
+static int nn_(SpatialConvolution_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );
+
+ long k;
+
+ /* gradient to input */
+ THTensor *tweight = THTensor_(newTranspose)(weight,0,1);
+
+ if(input->nDimension == 3)
+ {
+ THTensor_(conv2Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dH, dW, "F", "C");
+ }
+ else
+ {
+
+ THTensor_(resizeAs)(gradInput,input);
+ THTensor *outn = THTensor_(new)();
+ THTensor *inpn = THTensor_(new)();
+ long i;
+ for (i=0; i<input->size[0]; i++)
+ {
+ THTensor_(select)(outn,gradOutput,0,i);
+ THTensor_(select)(inpn,gradInput,0,i);
+ THTensor_(conv2Dmv)(inpn, 0.0, 1.0, outn, tweight, dH, dW, "F", "C");
+ }
+ THTensor_(free)(outn);
+ THTensor_(free)(inpn);
+ }
+ THTensor_(free)(tweight);
+
+ return 1;
+}
+
+static void nn_(convolution_accGradParameters_)(THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, real scale, int dH, int dW)
+{
+ long k;
+
+ /* gradient to bias */
+ real *gradBias_data = THTensor_(data)(gradBias);
+ THTensor* gradOutSlice = THTensor_(new)();
+ for(k = 0; k < gradOutput->size[0]; k++)
+ {
+ THTensor_(select)(gradOutSlice, gradOutput, 0, k);
+ gradBias_data[k] += scale*THTensor_(sumall)(gradOutSlice);
+ }
+ THTensor_(free)(gradOutSlice);
+
+ /* gradient to kernels */
+ THTensor_(conv2DRevger)(gradWeight, 1.0, scale, input, gradOutput, dH, dW);
+}
+
+static int nn_(SpatialConvolution_accGradParameters)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ real scale = luaL_optnumber(L, 4, 1);
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+ THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+ THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+
+ THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );
+
+ if(input->nDimension == 3)
+ {
+ nn_(convolution_accGradParameters_)(input,gradOutput,gradWeight,gradBias,scale,dH,dW);
+ }
+ else
+ {
+ THTensor *outn = THTensor_(new)();
+ THTensor *inpn = THTensor_(new)();
+ long i;
+ for (i=0; i<input->size[0]; i++)
+ {
+ THTensor_(select)(outn,gradOutput,0,i);
+ THTensor_(select)(inpn,input,0,i);
+ nn_(convolution_accGradParameters_)(inpn,outn,gradWeight,gradBias,scale,dH,dW);
+ }
+ THTensor_(free)(outn);
+ THTensor_(free)(inpn);
+ }
+
+ return 0;
+}
+
+static const struct luaL_Reg nn_(SpatialConvolution__) [] = {
+ {"SpatialConvolution_updateOutput", nn_(SpatialConvolution_updateOutput)},
+ {"SpatialConvolution_updateGradInput", nn_(SpatialConvolution_updateGradInput)},
+ {"SpatialConvolution_accGradParameters", nn_(SpatialConvolution_accGradParameters)},
+ {NULL, NULL}
+};
+
+static void nn_(SpatialConvolution_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(SpatialConvolution__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SpatialConvolutionMap.c b/generic/SpatialConvolutionMap.c
new file mode 100644
index 0000000..ff7d8ca
--- /dev/null
+++ b/generic/SpatialConvolutionMap.c
@@ -0,0 +1,229 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialConvolutionMap.c"
+#else
+
+static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int kH = luaT_getfieldcheckint(L, 1, "kH");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+ int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+ THTensor *connTable = luaT_getfieldcheckudata(L, 1, "connTable", torch_(Tensor_id));
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected");
+ luaL_argcheck(L, input->size[0] == nInputPlane, 2, "invalid number of input planes");
+ luaL_argcheck(L, input->size[2] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size");
+
+ THTensor_(resize3d)(output, nOutputPlane,
+ (input->size[1] - kH) / dH + 1,
+ (input->size[2] - kW) / dW + 1);
+
+ // contiguous
+ input = THTensor_(newContiguous)(input);
+ output = THTensor_(newContiguous)(output);
+
+ // get raw pointers
+ real *input_data = THTensor_(data)(input);
+ real *output_data = THTensor_(data)(output);
+ real *weight_data = THTensor_(data)(weight);
+
+ // and dims
+ long input_n = input->size[0];
+ long input_h = input->size[1];
+ long input_w = input->size[2];
+ long output_n = output->size[0];
+ long output_h = output->size[1];
+ long output_w = output->size[2];
+ long weight_n = weight->size[0];
+ long weight_h = weight->size[1];
+ long weight_w = weight->size[2];
+
+ // add bias
+ THTensor *outputPlane = THTensor_(new)();
+ int k;
+ for (k = 0; k < nOutputPlane; k++) {
+ THTensor_(select)(outputPlane,output,0,k);
+ THTensor_(fill)(outputPlane, THTensor_(get1d)(bias, k));
+ }
+ THTensor_(free)(outputPlane);
+
+ // convolve all maps
+ int i,o;
+ int nweight = connTable->size[0];
+ for (k = 0; k < nweight; k++) {
+ // get offsets for input/output
+ o = (int)THTensor_(get2d)(connTable,k,1)-1;
+ i = (int)THTensor_(get2d)(connTable,k,0)-1;
+
+ // convolve each map
+ THTensor_(validXCorr2Dptr)(output_data + o*output_w*output_h,
+ 1.0,
+ input_data + i*input_w*input_h, input_h, input_w,
+ weight_data + k*weight_w*weight_h, weight_h, weight_w,
+ dH, dW);
+ }
+
+ // clean up
+ THTensor_(free)(input);
+ THTensor_(free)(output);
+
+ return 1;
+}
+
+static int nn_(SpatialConvolutionMap_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int kH = luaT_getfieldcheckint(L, 1, "kH");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+ int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+ THTensor *connTable = luaT_getfieldcheckudata(L, 1, "connTable", torch_(Tensor_id));
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+ THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ // contiguous
+ gradInput = THTensor_(newContiguous)(gradInput);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ // Resize/Zero
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ // get raw pointers
+ real *gradInput_data = THTensor_(data)(gradInput);
+ real *gradOutput_data = THTensor_(data)(gradOutput);
+ real *weight_data = THTensor_(data)(weight);
+ real *gradWeight_data = THTensor_(data)(gradWeight);
+
+ // and dims
+ long input_n = input->size[0];
+ long input_h = input->size[1];
+ long input_w = input->size[2];
+ long output_n = gradOutput->size[0];
+ long output_h = gradOutput->size[1];
+ long output_w = gradOutput->size[2];
+ long weight_n = weight->size[0];
+ long weight_h = weight->size[1];
+ long weight_w = weight->size[2];
+
+ // updateGradInput all
+ int k;
+ int nkernel = connTable->size[0];
+ for(k = 0; k < nkernel; k++)
+ {
+ int o = (int)THTensor_(get2d)(connTable,k,1)-1;
+ int i = (int)THTensor_(get2d)(connTable,k,0)-1;
+
+ // gradient to input
+ THTensor_(fullConv2Dptr)(gradInput_data + i*input_w*input_h,
+ 1.0,
+ gradOutput_data + o*output_w*output_h, output_h, output_w,
+ weight_data + k*weight_w*weight_h, weight_h, weight_w,
+ dH, dW);
+ }
+
+ // clean up
+ THTensor_(free)(gradInput);
+ THTensor_(free)(gradOutput);
+
+ return 1;
+}
+
+static int nn_(SpatialConvolutionMap_accGradParameters)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int kH = luaT_getfieldcheckint(L, 1, "kH");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+ int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+ real scale = luaL_optnumber(L, 4, 1);
+
+ THTensor *connTable = luaT_getfieldcheckudata(L, 1, "connTable", torch_(Tensor_id));
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+ THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ // contiguous
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ // get raw pointers
+ real *input_data = THTensor_(data)(input);
+ real *gradOutput_data = THTensor_(data)(gradOutput);
+ real *weight_data = THTensor_(data)(weight);
+ real *gradWeight_data = THTensor_(data)(gradWeight);
+
+ // and dims
+ long input_n = input->size[0];
+ long input_h = input->size[1];
+ long input_w = input->size[2];
+ long output_n = gradOutput->size[0];
+ long output_h = gradOutput->size[1];
+ long output_w = gradOutput->size[2];
+ long weight_n = weight->size[0];
+ long weight_h = weight->size[1];
+ long weight_w = weight->size[2];
+
+ // gradients wrt bias
+ int k;
+ THTensor *gradOutputPlane = THTensor_(new)();
+ real *gradBias_data = THTensor_(data)(gradBias);
+ for(k = 0; k < nOutputPlane; k++) {
+ THTensor_(select)(gradOutputPlane, gradOutput, 0, k);
+ gradBias_data[k] += scale * THTensor_(sumall)(gradOutputPlane);
+ }
+ THTensor_(free)(gradOutputPlane);
+
+ // gradients wrt weight
+ int nkernel = connTable->size[0];
+ for(k = 0; k < nkernel; k++)
+ {
+ int o = (int)THTensor_(get2d)(connTable,k,1)-1;
+ int i = (int)THTensor_(get2d)(connTable,k,0)-1;
+
+ // gradient to kernel
+ THTensor_(validXCorr2DRevptr)(gradWeight_data + k*weight_w*weight_h,
+ scale,
+ input_data + i*input_w*input_h, input_h, input_w,
+ gradOutput_data + o*output_w*output_h, output_h, output_w,
+ dH, dW);
+ }
+
+ // clean up
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ return 0;
+}
+
+static const struct luaL_Reg nn_(SpatialConvolutionMap__) [] = {
+ {"SpatialConvolutionMap_updateOutput", nn_(SpatialConvolutionMap_updateOutput)},
+ {"SpatialConvolutionMap_updateGradInput", nn_(SpatialConvolutionMap_updateGradInput)},
+ {"SpatialConvolutionMap_accGradParameters", nn_(SpatialConvolutionMap_accGradParameters)},
+ {NULL, NULL}
+};
+
+static void nn_(SpatialConvolutionMap_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(SpatialConvolutionMap__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SpatialMaxPooling.c b/generic/SpatialMaxPooling.c
new file mode 100644
index 0000000..b9fab3b
--- /dev/null
+++ b/generic/SpatialMaxPooling.c
@@ -0,0 +1,163 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialMaxPooling.c"
+#else
+
+static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int kH = luaT_getfieldcheckint(L, 1, "kH");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected");
+ luaL_argcheck(L, input->size[2] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size");
+
+ // sizes
+ long nslices = input->size[0];
+ long iheight = input->size[1];
+ long iwidth = input->size[2];
+ long oheight = (iheight - kH) / dH + 1;
+ long owidth = (iwidth - kW) / dW + 1;
+
+ // get contiguous input
+ input = THTensor_(newContiguous)(input);
+
+ // resize output
+ THTensor_(resize3d)(output, nslices, oheight, owidth);
+
+ // indices will contain i,j locatyions for each output point
+ THTensor_(resize4d)(indices, 2, nslices, oheight, owidth);
+
+ // get raw pointers
+ real *input_data = THTensor_(data)(input);
+ real *output_data = THTensor_(data)(output);
+ real *indices_data = THTensor_(data)(indices);
+
+ // compute max pooling for each input slice
+ long k;
+ for (k = 0; k < nslices; k++) {
+ // pointers to slices
+ real *input_p = input_data + k*iwidth*iheight;
+ real *output_p = output_data + k*owidth*oheight;
+ real *indy_p = indices_data + k*owidth*oheight;
+ real *indx_p = indices_data + (k+nslices)*owidth*oheight;
+
+ // loop over output
+ int i,j;
+ for(i = 0; i < oheight; i++) {
+ for(j = 0; j < owidth; j++) {
+ // local pointers
+ real *ip = input_p + i*iwidth*dH + j*dW;
+ real *op = output_p + i*owidth + j;
+ real *indyp = indy_p + i*owidth + j;
+ real *indxp = indx_p + i*owidth + j;
+
+ // compute local max:
+ long maxindex = -1;
+ real maxval = -THInf;
+ long tcntr = 0;
+ int x,y;
+ for(y = 0; y < kH; y++) {
+ for(x = 0; x < kW; x++) {
+ real val = *(ip + y*iwidth + x);
+ if (val > maxval) {
+ maxval = val;
+ maxindex = tcntr;
+ }
+ tcntr++;
+ }
+ }
+
+ // set output to local max
+ *op = maxval;
+
+ // store location of max (x,y)
+ *indyp = (int)(maxindex / dW)+1;
+ *indxp = (maxindex % dW) +1;
+ }
+ }
+ }
+
+ // cleanup
+ THTensor_(free)(input);
+
+ return 1;
+}
+
+static int nn_(SpatialMaxPooling_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int kH = luaT_getfieldcheckint(L, 1, "kH");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ // get contiguous gradOutput
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ // resize
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ // sizes
+ int ichannels = input->size[0];
+ int iheight = input->size[1];
+ int iwidth = input->size[2];
+ int ochannels = ichannels;
+ int oheight = gradOutput->size[1];
+ int owidth = gradOutput->size[2];
+
+ // get raw pointers
+ real *gradInput_data = THTensor_(data)(gradInput);
+ real *gradOutput_data = THTensor_(data)(gradOutput);
+ real *indices_data = THTensor_(data)(indices);
+
+ // backprop
+ long k;
+ for (k = 0; k < input->size[0]; k++) {
+ // pointers to slices
+ real *gradOutput_p = gradOutput_data + k*owidth*oheight;
+ real *gradInput_p = gradInput_data + k*iwidth*iheight;
+ real *indy_p = indices_data + k*owidth*oheight;
+ real *indx_p = indices_data + (k+ochannels)*owidth*oheight;
+
+ // calculate max points
+ int i,j;
+ for(i = 0; i < oheight; i++) {
+ for(j = 0; j < owidth; j++) {
+ // retrieve position of max
+ long maxi = *(indy_p + i*owidth + j) - 1 + i*dH;
+ long maxj = *(indx_p + i*owidth + j) - 1 + j*dW;
+
+ // update gradient
+ *(gradInput_p + maxi*iwidth + maxj) += *(gradOutput_p + i*owidth + j);
+ }
+ }
+ }
+
+ // cleanup
+ THTensor_(free)(gradOutput);
+
+ return 1;
+}
+
+static const struct luaL_Reg nn_(SpatialMaxPooling__) [] = {
+ {"SpatialMaxPooling_updateOutput", nn_(SpatialMaxPooling_updateOutput)},
+ {"SpatialMaxPooling_updateGradInput", nn_(SpatialMaxPooling_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(SpatialMaxPooling_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(SpatialMaxPooling__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SpatialSubSampling.c b/generic/SpatialSubSampling.c
new file mode 100644
index 0000000..705253f
--- /dev/null
+++ b/generic/SpatialSubSampling.c
@@ -0,0 +1,278 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialSubSampling.c"
+#else
+
+static int nn_(SpatialSubSampling_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int kH = luaT_getfieldcheckint(L, 1, "kH");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ real *weight_data = THTensor_(data)(weight);
+ real *bias_data = THTensor_(data)(bias);
+ real *output_data;
+ real *input_data;
+
+
+ luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
+
+ int dimw = 2;
+ int dimh = 1;
+ if (input->nDimension == 4) {
+ dimw++;
+ dimh++;
+ }
+
+ long inputWidth = input->size[dimw];
+ long inputHeight = input->size[dimh];
+ long outputWidth = (inputWidth - kW) / dW + 1;
+ long outputHeight = (inputHeight - kH) / dH + 1;
+
+
+ luaL_argcheck(L, input->size[dimh-1] == nInputPlane, 2, "invalid number of input planes");
+ luaL_argcheck(L, inputWidth >= kW && inputHeight >= kH, 2, "input image smaller than kernel size");
+
+ input = THTensor_(newContiguous)(input);
+ input_data = THTensor_(data)(input);
+
+ long nbatch = 1;
+ if (input->nDimension == 3)
+ {
+ THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth);
+ }
+ else
+ {
+ nbatch = input->size[0];
+ THTensor_(resize4d)(output, nbatch, nInputPlane, outputHeight, outputWidth);
+ }
+
+ output_data = THTensor_(data)(output);
+
+ long i, k, p;
+
+ for(p = 0; p < nbatch; p++)
+ {
+ //input_data += p*nInputPlane*inputWidth*inputHeight;
+ //output_data += p*nInputPlane*outputHeight*outputWidth;
+ for(k = 0; k < nInputPlane; k++)
+ {
+ real *ptr_output;
+ long xx, yy;
+
+ /* Get the good mask for (k,i) (k out, i in) */
+ real the_weight = weight_data[k];
+
+ /* Initialize to the bias */
+ real z = bias_data[k];
+ for(i = 0; i < outputWidth*outputHeight; i++)
+ output_data[i] = z;
+
+ /* For all output pixels... */
+ ptr_output = output_data;
+ for(yy = 0; yy < outputHeight; yy++)
+ {
+ for(xx = 0; xx < outputWidth; xx++)
+ {
+ // Compute the mean of the input image...
+ real *ptr_input = input_data+yy*dH*inputWidth+xx*dW;
+ real sum = 0;
+ long kx, ky;
+
+ for(ky = 0; ky < kH; ky++)
+ {
+ for(kx = 0; kx < kW; kx++)
+ sum += ptr_input[kx];
+ ptr_input += inputWidth; // next input line
+ }
+
+ // Update output
+ *ptr_output++ += the_weight*sum;
+ }
+ }
+
+ // Next input/output plane
+ output_data += outputWidth*outputHeight;
+ input_data += inputWidth*inputHeight;
+ }
+ }
+
+ THTensor_(free)(input);
+
+ return 1;
+}
+
+static int nn_(SpatialSubSampling_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int kH = luaT_getfieldcheckint(L, 1, "kH");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ int dimw = 2;
+ int dimh = 1;
+ long nbatch = 1;
+ if (input->nDimension == 4) {
+ dimw++;
+ dimh++;
+ nbatch = input->size[0];
+ }
+
+ long inputWidth = input->size[dimw];
+ long inputHeight = input->size[dimh];
+ long outputWidth = (inputWidth - kW) / dW + 1;
+ long outputHeight = (inputHeight - kH) / dH + 1;
+
+ real *weight_data = THTensor_(data)(weight);
+ real *gradOutput_data = THTensor_(data)(gradOutput);
+ real *gradInput_data;
+
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+
+ long i, k, p;
+
+ for(p = 0; p < nbatch; p++)
+ {
+ //gradInput_data += p*nInputPlane*inputWidth*inputHeight;
+ //gradOutput_data += p*nInputPlane*outputWidth*outputHeight;
+ for(k = 0; k < nInputPlane; k++)
+ {
+ real the_weight = weight_data[k];
+ real *ptr_gradOutput = gradOutput_data;
+ long xx, yy;
+
+ for(yy = 0; yy < outputHeight; yy++)
+ {
+ for(xx = 0; xx < outputWidth; xx++)
+ {
+ real *ptr_gradInput = gradInput_data+yy*dH*inputWidth+xx*dW;
+ real z = *ptr_gradOutput++ * the_weight;
+ long kx, ky;
+
+ for(ky = 0; ky < kH; ky++)
+ {
+ for(kx = 0; kx < kW; kx++)
+ ptr_gradInput[kx] += z;
+ ptr_gradInput += inputWidth;
+ }
+ }
+ }
+ gradOutput_data += outputWidth*outputHeight;
+ gradInput_data += inputWidth*inputHeight;
+ }
+ }
+
+ return 1;
+}
+
+static int nn_(SpatialSubSampling_accGradParameters)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ real scale = luaL_optnumber(L, 4, 1);
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int kH = luaT_getfieldcheckint(L, 1, "kH");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+
+ THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+ THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+
+ int dimw = 2;
+ int dimh = 1;
+ long nbatch = 1;
+ if (input->nDimension == 4) {
+ dimw++;
+ dimh++;
+ nbatch = input->size[0];
+ }
+
+ long inputWidth = input->size[dimw];
+ long inputHeight = input->size[dimh];
+ long outputWidth = (inputWidth - kW) / dW + 1;
+ long outputHeight = (inputHeight - kH) / dH + 1;
+
+ real *gradWeight_data = THTensor_(data)(gradWeight);
+ real *gradBias_data = THTensor_(data)(gradBias);
+ real *gradOutput_data = THTensor_(data)(gradOutput);
+ real *input_data;
+
+ input = THTensor_(newContiguous)(input);
+ input_data = THTensor_(data)(input);
+
+ long i, k, p;
+ for(p = 0; p < nbatch; p++)
+ {
+ //input_data += p*nInputPlane*inputWidth*inputHeight;
+ //gradOutput_data += p*nInputPlane*inputWidth*inputHeight;
+ for(k = 0; k < nInputPlane; k++)
+ {
+ real *ptr_gradOutput = gradOutput_data;
+ real sum;
+ long xx, yy;
+
+ sum = 0;
+ for(i = 0; i < outputWidth*outputHeight; i++)
+ sum += gradOutput_data[i];
+ gradBias_data[k] += scale*sum;
+
+ sum = 0;
+ for(yy = 0; yy < outputHeight; yy++)
+ {
+ for(xx = 0; xx < outputWidth; xx++)
+ {
+ real *ptr_input = input_data+yy*dH*inputWidth+xx*dW;
+ real z = *ptr_gradOutput++;
+ long kx, ky;
+
+ for(ky = 0; ky < kH; ky++)
+ {
+ for(kx = 0; kx < kW; kx++)
+ sum += z * ptr_input[kx];
+ ptr_input += inputWidth;
+ }
+ }
+ }
+ gradWeight_data[k] += scale*sum;
+ gradOutput_data += outputWidth*outputHeight;
+ input_data += inputWidth*inputHeight;
+ }
+ }
+
+
+ THTensor_(free)(input);
+
+ return 0;
+}
+
+static const struct luaL_Reg nn_(SpatialSubSampling__) [] = {
+ {"SpatialSubSampling_updateOutput", nn_(SpatialSubSampling_updateOutput)},
+ {"SpatialSubSampling_updateGradInput", nn_(SpatialSubSampling_updateGradInput)},
+ {"SpatialSubSampling_accGradParameters", nn_(SpatialSubSampling_accGradParameters)},
+ {NULL, NULL}
+};
+
+static void nn_(SpatialSubSampling_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(SpatialSubSampling__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Sqrt.c b/generic/Sqrt.c
new file mode 100644
index 0000000..a739e96
--- /dev/null
+++ b/generic/Sqrt.c
@@ -0,0 +1,46 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Sqrt.c"
+#else
+
+static int nn_(Sqrt_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(output, input);
+
+ TH_TENSOR_APPLY2(real, output, real, input, \
+ *output_data = sqrt(*input_data););
+
+ return 1;
+}
+
+static int nn_(Sqrt_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(gradInput, input);
+
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \
+ *gradInput_data = 0.5 * (*gradOutput_data / *output_data););
+
+ return 1;
+}
+
+static const struct luaL_Reg nn_(Sqrt__) [] = {
+ {"Sqrt_updateOutput", nn_(Sqrt_updateOutput)},
+ {"Sqrt_updateGradInput", nn_(Sqrt_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(Sqrt_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(Sqrt__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Square.c b/generic/Square.c
new file mode 100644
index 0000000..409055d
--- /dev/null
+++ b/generic/Square.c
@@ -0,0 +1,45 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Square.c"
+#else
+
+static int nn_(Square_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(output, input);
+
+ TH_TENSOR_APPLY2(real, output, real, input, \
+ *output_data = *input_data * *input_data;);
+
+ return 1;
+}
+
+static int nn_(Square_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(gradInput, input);
+
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \
+ *gradInput_data = 2.0 * (*gradOutput_data) * (*input_data););
+
+ return 1;
+}
+
+static const struct luaL_Reg nn_(Square__) [] = {
+ {"Square_updateOutput", nn_(Square_updateOutput)},
+ {"Square_updateGradInput", nn_(Square_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(Square_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(Square__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Tanh.c b/generic/Tanh.c
new file mode 100644
index 0000000..5c24d15
--- /dev/null
+++ b/generic/Tanh.c
@@ -0,0 +1,45 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Tanh.c"
+#else
+
+static int nn_(Tanh_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(output, input);
+
+ TH_TENSOR_APPLY2(real, output, real, input, \
+ *output_data = tanh(*input_data);)
+
+ return 1;
+}
+
+static int nn_(Tanh_updateGradInput)(lua_State *L)
+{
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(gradInput, output);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \
+ real z = *output_data; \
+ *gradInput_data = *gradOutput_data * (1. - z*z););
+ return 1;
+}
+
+static const struct luaL_Reg nn_(Tanh__) [] = {
+ {"Tanh_updateOutput", nn_(Tanh_updateOutput)},
+ {"Tanh_updateGradInput", nn_(Tanh_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(Tanh_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(Tanh__), "nn");
+ lua_pop(L,1);
+
+}
+
+#endif
diff --git a/generic/TemporalConvolution.c b/generic/TemporalConvolution.c
new file mode 100644
index 0000000..fa14a22
--- /dev/null
+++ b/generic/TemporalConvolution.c
@@ -0,0 +1,194 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/TemporalConvolution.c"
+#else
+
+static int nn_(TemporalConvolution_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int inputFrameSize = luaT_getfieldcheckint(L, 1, "inputFrameSize");
+ int outputFrameSize = luaT_getfieldcheckint(L, 1, "outputFrameSize");
+
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THTensor *outputWindow, *inputWindow;
+ int nInputFrame, nOutputFrame;
+ long k;
+
+ luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected");
+ luaL_argcheck(L, input->size[1] == inputFrameSize, 2, "invalid input frame size");
+ luaL_argcheck(L, input->size[0] >= kW, 2, "input sequence smaller than kernel size");
+
+ input = THTensor_(newContiguous)(input);
+ outputWindow = THTensor_(new)();
+ inputWindow = THTensor_(new)();
+
+ nInputFrame = input->size[0];
+ nOutputFrame = (nInputFrame - kW) / dW + 1;
+
+ THTensor_(resize2d)(output,
+ nOutputFrame,
+ outputFrameSize);
+
+ /* bias first */
+ for(k = 0; k < nOutputFrame; k++)
+ {
+ THTensor_(select)(outputWindow, output, 0, k);
+ THTensor_(copy)(outputWindow, bias);
+ }
+
+ /* ouch */
+ for(k = 0; nOutputFrame > 0; k++)
+ {
+ long outputFrameStride = (kW-1)/dW+1;
+ long inputFrameStride = outputFrameStride*dW;
+ long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
+ nOutputFrame -= nFrame;
+
+ THTensor_(setStorage2d)(inputWindow, input->storage,
+ input->storageOffset+k*dW*input->size[1],
+ nFrame, inputFrameStride*input->size[1],
+ kW*input->size[1], 1);
+
+ THTensor_(setStorage2d)(outputWindow, output->storage,
+ output->storageOffset + k*output->size[1],
+ nFrame, outputFrameStride*output->size[1],
+ output->size[1], 1);
+
+ THTensor_(transpose)(weight, NULL, 0, 1);
+ THTensor_(addmm)(outputWindow, 1, outputWindow, 1, inputWindow, weight);
+ THTensor_(transpose)(weight, NULL, 0, 1);
+ }
+
+ THTensor_(free)(outputWindow);
+ THTensor_(free)(inputWindow);
+ THTensor_(free)(input);
+
+ return 1;
+}
+
+static int nn_(TemporalConvolution_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ long nInputFrame = input->size[0];
+ long nOutputFrame = gradOutput->size[0];
+
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor *gradOutputWindow;
+ THTensor *gradInputWindow;
+ long k;
+
+ gradOutputWindow = THTensor_(new)();
+ gradInputWindow = THTensor_(new)();
+
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ /* ouch */
+ for(k = 0; nOutputFrame > 0; k++)
+ {
+ long outputFrameStride = (kW-1)/dW+1;
+ long inputFrameStride = outputFrameStride*dW;
+ long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
+ nOutputFrame -= nFrame;
+
+ THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage,
+ gradOutput->storageOffset + k*gradOutput->size[1],
+ nFrame, outputFrameStride*gradOutput->size[1],
+ gradOutput->size[1], 1);
+
+ THTensor_(setStorage2d)(gradInputWindow, gradInput->storage,
+ gradInput->storageOffset+k*dW*gradInput->size[1],
+ nFrame, inputFrameStride*gradInput->size[1],
+ kW*gradInput->size[1], 1);
+
+ THTensor_(addmm)(gradInputWindow, 1, gradInputWindow, 1, gradOutputWindow, weight);
+ }
+
+ THTensor_(free)(gradOutputWindow);
+ THTensor_(free)(gradInputWindow);
+
+ return 1;
+}
+
+static int nn_(TemporalConvolution_accGradParameters)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ real scale = luaL_optnumber(L, 4, 1);
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ long nInputFrame = input->size[0];
+ long nOutputFrame = gradOutput->size[0];
+
+ THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+ THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+
+ THTensor *gradOutputWindow;
+ THTensor *inputWindow;
+ long k;
+
+ input = THTensor_(newContiguous)(input);
+ gradOutputWindow = THTensor_(new)();
+ inputWindow = THTensor_(new)();
+
+ /* bias first */
+ for(k = 0; k < nOutputFrame; k++)
+ {
+ THTensor_(select)(gradOutputWindow, gradOutput, 0, k);
+ THTensor_(cadd)(gradBias, gradBias, scale, gradOutputWindow);
+ }
+
+ /* ouch */
+ for(k = 0; nOutputFrame > 0; k++)
+ {
+ long outputFrameStride = (kW-1)/dW+1;
+ long inputFrameStride = outputFrameStride*dW;
+ long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1;
+ nOutputFrame -= nFrame;
+
+ THTensor_(setStorage2d)(inputWindow, input->storage,
+ input->storageOffset+k*dW*input->size[1],
+ nFrame, inputFrameStride*input->size[1],
+ kW*input->size[1], 1);
+
+ THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage,
+ gradOutput->storageOffset + k*gradOutput->size[1],
+ nFrame, outputFrameStride*gradOutput->size[1],
+ gradOutput->size[1], 1);
+
+ THTensor_(transpose)(gradOutputWindow, NULL, 0, 1);
+ THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutputWindow, inputWindow);
+ THTensor_(transpose)(gradOutputWindow, NULL, 0, 1);
+ }
+
+ THTensor_(free)(gradOutputWindow);
+ THTensor_(free)(inputWindow);
+ THTensor_(free)(input);
+
+ return 0;
+}
+
+static const struct luaL_Reg nn_(TemporalConvolution__) [] = {
+ {"TemporalConvolution_updateOutput", nn_(TemporalConvolution_updateOutput)},
+ {"TemporalConvolution_updateGradInput", nn_(TemporalConvolution_updateGradInput)},
+ {"TemporalConvolution_accGradParameters", nn_(TemporalConvolution_accGradParameters)},
+ {NULL, NULL}
+};
+
+static void nn_(TemporalConvolution_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(TemporalConvolution__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/TemporalSubSampling.c b/generic/TemporalSubSampling.c
new file mode 100644
index 0000000..39e7f3b
--- /dev/null
+++ b/generic/TemporalSubSampling.c
@@ -0,0 +1,139 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/TemporalSubSampling.c"
+#else
+
+static int nn_(TemporalSubSampling_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int inputFrameSize = luaT_getfieldcheckint(L, 1, "inputFrameSize");
+
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THTensor *outputFrame, *inputWindow;
+ int nInputFrame, nOutputFrame;
+ long k;
+
+ luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected");
+ luaL_argcheck(L, input->size[1] == inputFrameSize, 2, "invalid input frame size");
+ luaL_argcheck(L, input->size[0] >= kW, 2, "input sequence smaller than kernel size");
+
+ outputFrame = THTensor_(new)();
+ inputWindow = THTensor_(new)();
+
+ nInputFrame = input->size[0];
+ nOutputFrame = (nInputFrame - kW) / dW + 1;
+
+ THTensor_(resize2d)(output,
+ nOutputFrame,
+ inputFrameSize);
+
+ for(k = 0; k < nOutputFrame; k++)
+ {
+ THTensor_(narrow)(inputWindow, input, 0, k*dW, kW);
+ THTensor_(select)(outputFrame, output, 0, k);
+ THTensor_(sum)(outputFrame, inputWindow, 0);
+ THTensor_(cmul)(outputFrame, outputFrame, weight);
+ THTensor_(cadd)(outputFrame, outputFrame, 1, bias);
+ }
+
+ THTensor_(free)(outputFrame);
+ THTensor_(free)(inputWindow);
+
+ return 1;
+}
+
+static int nn_(TemporalSubSampling_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor *gradOutputFrame;
+ THTensor *gradInputWindow, *buffer, *kwunit;
+ long k;
+
+ gradOutputFrame = THTensor_(new)();
+ gradInputWindow = THTensor_(new)();
+ buffer = THTensor_(new)();
+ kwunit = THTensor_(newWithSize1d)(kW);
+
+ THTensor_(fill)(kwunit, 1);
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ for(k = 0; k < gradOutput->size[0]; k++)
+ {
+ THTensor_(narrow)(gradInputWindow, gradInput, 0, k*dW, kW);
+ THTensor_(select)(gradOutputFrame, gradOutput, 0, k);
+ THTensor_(cmul)(buffer, weight, gradOutputFrame);
+ THTensor_(addr)(gradInputWindow, 1, gradInputWindow, 1, kwunit, buffer);
+ }
+
+ THTensor_(free)(gradOutputFrame);
+ THTensor_(free)(gradInputWindow);
+ THTensor_(free)(buffer);
+ THTensor_(free)(kwunit);
+
+ return 1;
+}
+
+static int nn_(TemporalSubSampling_accGradParameters)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ real scale = luaL_optnumber(L, 4, 1);
+
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+
+ THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+ THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+
+ THTensor *gradOutputFrame;
+ THTensor *inputWindow, *buffer;
+ long k;
+
+
+ gradOutputFrame = THTensor_(new)();
+ inputWindow = THTensor_(new)();
+ buffer = THTensor_(new)();
+
+ for(k = 0; k < gradOutput->size[0]; k++)
+ {
+ THTensor_(narrow)(inputWindow, input, 0, k*dW, kW);
+ THTensor_(select)(gradOutputFrame, gradOutput, 0, k);
+ THTensor_(sum)(buffer, inputWindow, 0);
+ THTensor_(addcmul)(gradWeight, gradWeight, scale, buffer, gradOutputFrame);
+ THTensor_(cadd)(gradBias, gradBias, scale, gradOutputFrame);
+ }
+
+ THTensor_(free)(gradOutputFrame);
+ THTensor_(free)(inputWindow);
+ THTensor_(free)(buffer);
+
+ return 0;
+}
+
+static const struct luaL_Reg nn_(TemporalSubSampling__) [] = {
+ {"TemporalSubSampling_updateOutput", nn_(TemporalSubSampling_updateOutput)},
+ {"TemporalSubSampling_updateGradInput", nn_(TemporalSubSampling_updateGradInput)},
+ {"TemporalSubSampling_accGradParameters", nn_(TemporalSubSampling_accGradParameters)},
+ {NULL, NULL}
+};
+
+static void nn_(TemporalSubSampling_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(TemporalSubSampling__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/Threshold.c b/generic/Threshold.c
new file mode 100644
index 0000000..760e842
--- /dev/null
+++ b/generic/Threshold.c
@@ -0,0 +1,47 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/Threshold.c"
+#else
+
+static int nn_(Threshold_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ real val = luaT_getfieldchecknumber(L, 1, "val");
+ real threshold = luaT_getfieldchecknumber(L, 1, "threshold");
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(output, input);
+ TH_TENSOR_APPLY2(real, output, real, input, \
+ *output_data = (*input_data > threshold) ? *input_data : val;);
+
+ return 1;
+}
+
+static int nn_(Threshold_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ real threshold = luaT_getfieldchecknumber(L, 1, "threshold");
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \
+ if ((*input_data) > threshold) *gradInput_data = 1; \
+ else *gradInput_data = 0; \
+ *gradInput_data = (*gradOutput_data) * (*gradInput_data););
+ return 1;
+}
+
+static const struct luaL_Reg nn_(Threshold__) [] = {
+ {"Threshold_updateOutput", nn_(Threshold_updateOutput)},
+ {"Threshold_updateGradInput", nn_(Threshold_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(Threshold_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(Threshold__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/VolumetricConvolution.c b/generic/VolumetricConvolution.c
new file mode 100644
index 0000000..0ec2247
--- /dev/null
+++ b/generic/VolumetricConvolution.c
@@ -0,0 +1,118 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricConvolution.c"
+#else
+
+static int nn_(VolumetricConvolution_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ int dT = luaT_getfieldcheckint(L, 1, "dT");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id));
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id));
+
+ luaL_argcheck(L, input->nDimension == 4, 2, "4D tensor expected");
+
+ long nOutputPlane = weight->size[0];
+ long nInputPlane = weight->size[1];
+ long kT = weight->size[2];
+ long kH = weight->size[3];
+ long kW = weight->size[4];
+ long inputDepth = input->size[1];
+ long inputHeight = input->size[2];
+ long inputWidth = input->size[3];
+ long outputDepth = (inputDepth - kT) / dT + 1;
+ long outputWidth = (inputWidth - kW) / dW + 1;
+ long outputHeight = (inputHeight - kH) / dH + 1;
+
+ THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
+
+ /* add bias */
+ long i;
+ THTensor *outn = THTensor_(new)();
+ for (i=0; i<bias->size[0]; i++) {
+ THTensor_(select)(outn,output,0,i);
+ THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
+ }
+ THTensor_(free)(outn);
+
+ /* do convolutions */
+ THTensor_(conv3Dmv)(output, 1.0, 1.0, input, weight, dT, dH, dW, "V", "X");
+
+ return 1;
+}
+
+
+static int nn_(VolumetricConvolution_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ int dT = luaT_getfieldcheckint(L, 1, "dT");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id));
+
+ THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" );
+
+ /* gradient to input */
+ THTensor *tweight = THTensor_(newTranspose)(weight,0,1);
+ THTensor_(conv3Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dT, dH, dW, "F", "C");
+ THTensor_(free)(tweight);
+
+ return 1;
+}
+
+static int nn_(VolumetricConvolution_accGradParameters)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id));
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id));
+ real scale = luaL_optnumber(L, 4, 1);
+ int dT = luaT_getfieldcheckint(L, 1, "dT");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id));
+ THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id));
+ THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id));
+
+ THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" );
+
+ long k;
+
+ /* gradient to bias */
+ real *gradBias_data = THTensor_(data)(gradBias);
+ THTensor* gradOutSlice = THTensor_(new)();
+ for(k = 0; k < nOutputPlane; k++)
+ {
+ THTensor_(select)(gradOutSlice, gradOutput, 0, k);
+ gradBias_data[k] += scale*THTensor_(sumall)(gradOutSlice);
+ }
+ THTensor_(free)(gradOutSlice);
+
+ /* gradient to kernels */
+ THTensor_(conv3DRevger)(gradWeight, 1.0, scale, input, gradOutput, dT, dH, dW);
+
+ return 0;
+}
+
+static const struct luaL_Reg nn_(VolumetricConvolution__) [] = {
+ {"VolumetricConvolution_updateOutput", nn_(VolumetricConvolution_updateOutput)},
+ {"VolumetricConvolution_updateGradInput", nn_(VolumetricConvolution_updateGradInput)},
+ {"VolumetricConvolution_accGradParameters", nn_(VolumetricConvolution_accGradParameters)},
+ {NULL, NULL}
+};
+
+static void nn_(VolumetricConvolution_init)(lua_State *L)
+{
+ luaT_pushmetaclass(L, torch_(Tensor_id));
+ luaT_registeratname(L, nn_(VolumetricConvolution__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/init.c b/init.c
new file mode 100644
index 0000000..b2a528e
--- /dev/null
+++ b/init.c
@@ -0,0 +1,163 @@
+#include "TH.h"
+#include "luaT.h"
+
+#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME)
+#define torch_string_(NAME) TH_CONCAT_STRING_3(torch., Real, NAME)
+#define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME)
+
+static const void* torch_FloatTensor_id = NULL;
+static const void* torch_DoubleTensor_id = NULL;
+
+#include "generic/Square.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Sqrt.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/HardTanh.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Exp.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/LogSigmoid.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/LogSoftMax.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Sigmoid.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SoftPlus.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Tanh.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Abs.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/HardShrink.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SoftShrink.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Threshold.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SoftMax.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Max.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/Min.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/MSECriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/AbsCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SparseLinear.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/TemporalConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/TemporalSubSampling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialConvolutionMap.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialSubSampling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialMaxPooling.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/VolumetricConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/MultiMarginCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/MultiLabelMarginCriterion.c"
+#include "THGenerateFloatTypes.h"
+
+DLL_EXPORT int luaopen_libnn(lua_State *L)
+{
+ torch_FloatTensor_id = luaT_checktypename2id(L, "torch.FloatTensor");
+ torch_DoubleTensor_id = luaT_checktypename2id(L, "torch.DoubleTensor");
+
+ lua_newtable(L);
+ lua_pushvalue(L, -1);
+ lua_setfield(L, LUA_GLOBALSINDEX, "nn");
+
+ nn_FloatMin_init(L);
+ nn_FloatMax_init(L);
+ nn_FloatExp_init(L);
+ nn_FloatSqrt_init(L);
+ nn_FloatSquare_init(L);
+ nn_FloatHardTanh_init(L);
+ nn_FloatLogSoftMax_init(L);
+ nn_FloatMSECriterion_init(L);
+ nn_FloatAbsCriterion_init(L);
+ nn_FloatLogSigmoid_init(L);
+ nn_FloatSigmoid_init(L);
+ nn_FloatSoftMax_init(L);
+ nn_FloatSoftPlus_init(L);
+ nn_FloatTanh_init(L);
+ nn_FloatAbs_init(L);
+ nn_FloatHardShrink_init(L);
+ nn_FloatSoftShrink_init(L);
+ nn_FloatThreshold_init(L);
+ nn_FloatSparseLinear_init(L);
+ nn_FloatTemporalConvolution_init(L);
+ nn_FloatTemporalSubSampling_init(L);
+ nn_FloatSpatialConvolution_init(L);
+ nn_FloatSpatialConvolutionMap_init(L);
+ nn_FloatSpatialSubSampling_init(L);
+ nn_FloatSpatialMaxPooling_init(L);
+ nn_FloatVolumetricConvolution_init(L);
+ nn_FloatMultiMarginCriterion_init(L);
+ nn_FloatMultiLabelMarginCriterion_init(L);
+
+ nn_DoubleMin_init(L);
+ nn_DoubleMax_init(L);
+ nn_DoubleExp_init(L);
+ nn_DoubleSqrt_init(L);
+ nn_DoubleSquare_init(L);
+ nn_DoubleHardTanh_init(L);
+ nn_DoubleLogSoftMax_init(L);
+ nn_DoubleMSECriterion_init(L);
+ nn_DoubleAbsCriterion_init(L);
+ nn_DoubleLogSigmoid_init(L);
+ nn_DoubleSigmoid_init(L);
+ nn_DoubleSoftMax_init(L);
+ nn_DoubleSoftPlus_init(L);
+ nn_DoubleTanh_init(L);
+ nn_DoubleAbs_init(L);
+ nn_DoubleHardShrink_init(L);
+ nn_DoubleSoftShrink_init(L);
+ nn_DoubleThreshold_init(L);
+ nn_DoubleSparseLinear_init(L);
+ nn_DoubleTemporalConvolution_init(L);
+ nn_DoubleTemporalSubSampling_init(L);
+ nn_DoubleSpatialConvolution_init(L);
+ nn_DoubleSpatialConvolutionMap_init(L);
+ nn_DoubleSpatialSubSampling_init(L);
+ nn_DoubleSpatialMaxPooling_init(L);
+ nn_DoubleVolumetricConvolution_init(L);
+ nn_DoubleMultiMarginCriterion_init(L);
+ nn_DoubleMultiLabelMarginCriterion_init(L);
+
+ return 1;
+}
diff --git a/init.lua b/init.lua
new file mode 100644
index 0000000..c6e7df0
--- /dev/null
+++ b/init.lua
@@ -0,0 +1,91 @@
+require('torch')
+require('libnn')
+
+torch.include('nn', 'Module.lua')
+
+torch.include('nn', 'Concat.lua')
+torch.include('nn', 'Parallel.lua')
+torch.include('nn', 'Sequential.lua')
+
+torch.include('nn', 'Linear.lua')
+torch.include('nn', 'SparseLinear.lua')
+torch.include('nn', 'Reshape.lua')
+torch.include('nn', 'Select.lua')
+torch.include('nn', 'Narrow.lua')
+torch.include('nn', 'Replicate.lua')
+
+torch.include('nn', 'Copy.lua')
+torch.include('nn', 'Min.lua')
+torch.include('nn', 'Max.lua')
+torch.include('nn', 'Mean.lua')
+torch.include('nn', 'Sum.lua')
+torch.include('nn', 'CMul.lua')
+torch.include('nn', 'Mul.lua')
+torch.include('nn', 'Add.lua')
+
+torch.include('nn', 'CAddTable.lua')
+torch.include('nn', 'CDivTable.lua')
+torch.include('nn', 'CMulTable.lua')
+torch.include('nn', 'CSubTable.lua')
+
+torch.include('nn', 'Euclidean.lua')
+torch.include('nn', 'WeightedEuclidean.lua')
+torch.include('nn', 'PairwiseDistance.lua')
+torch.include('nn', 'CosineDistance.lua')
+torch.include('nn', 'DotProduct.lua')
+
+torch.include('nn', 'Exp.lua')
+torch.include('nn', 'HardTanh.lua')
+torch.include('nn', 'LogSigmoid.lua')
+torch.include('nn', 'LogSoftMax.lua')
+torch.include('nn', 'Sigmoid.lua')
+torch.include('nn', 'SoftMax.lua')
+torch.include('nn', 'SoftMin.lua')
+torch.include('nn', 'SoftPlus.lua')
+torch.include('nn', 'SoftSign.lua')
+torch.include('nn', 'Tanh.lua')
+torch.include('nn', 'Abs.lua')
+torch.include('nn', 'Power.lua')
+torch.include('nn', 'Square.lua')
+torch.include('nn', 'Sqrt.lua')
+torch.include('nn', 'HardShrink.lua')
+torch.include('nn', 'SoftShrink.lua')
+torch.include('nn', 'Threshold.lua')
+
+torch.include('nn', 'LookupTable.lua')
+torch.include('nn', 'SpatialConvolution.lua')
+torch.include('nn', 'SpatialConvolutionMap.lua')
+torch.include('nn', 'SpatialSubSampling.lua')
+torch.include('nn', 'SpatialMaxPooling.lua')
+torch.include('nn', 'SpatialLPPooling.lua')
+torch.include('nn', 'TemporalConvolution.lua')
+torch.include('nn', 'TemporalSubSampling.lua')
+torch.include('nn', 'SpatialSubtractiveNormalization.lua')
+torch.include('nn', 'SpatialZeroPadding.lua')
+
+torch.include('nn', 'VolumetricConvolution.lua')
+
+torch.include('nn', 'ParallelTable.lua')
+torch.include('nn', 'ConcatTable.lua')
+torch.include('nn', 'SplitTable.lua')
+torch.include('nn', 'JoinTable.lua')
+torch.include('nn', 'CriterionTable.lua')
+torch.include('nn', 'Identity.lua')
+
+torch.include('nn', 'Criterion.lua')
+torch.include('nn', 'MSECriterion.lua')
+torch.include('nn', 'MarginCriterion.lua')
+torch.include('nn', 'AbsCriterion.lua')
+torch.include('nn', 'ClassNLLCriterion.lua')
+torch.include('nn', 'MultiCriterion.lua')
+torch.include('nn', 'L1HingeEmbeddingCriterion.lua')
+torch.include('nn', 'HingeEmbeddingCriterion.lua')
+torch.include('nn', 'CosineEmbeddingCriterion.lua')
+torch.include('nn', 'MarginRankingCriterion.lua')
+torch.include('nn', 'MultiMarginCriterion.lua')
+torch.include('nn', 'MultiLabelMarginCriterion.lua')
+
+torch.include('nn', 'StochasticGradient.lua')
+
+torch.include('nn', 'Jacobian.lua')
+torch.include('nn', 'test.lua')
diff --git a/test/test.lua b/test/test.lua
new file mode 100644
index 0000000..c18d3a2
--- /dev/null
+++ b/test/test.lua
@@ -0,0 +1,1029 @@
+require 'torch'
+require 'random'
+
+local mytester = torch.Tester()
+local jac
+
+local precision = 1e-5
+
+local nntest = {}
+local nntestx = {}
+
+function nntest.Add()
+ local ini = math.random(10,20)
+ local inj = math.random(10,20)
+ local ink = math.random(10,20)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.Add(ini*inj*ink)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err,precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err,precision, 'error on bias [direct update]')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.CMul()
+ local ini = math.random(5,15)
+ local inj = math.random(5,15)
+ local ink = math.random(5,15)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.CMul(ini*inj*ink)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Exp()
+ local ini = math.random(10,20)
+ local inj = math.random(10,20)
+ local ink = math.random(10,20)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.Exp()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.HardTanh()
+ local ini = math.random(5,10)
+ local inj = math.random(5,10)
+ local ink = math.random(5,10)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.HardTanh()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision , 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Abs()
+ local ini = math.random(5,10)
+ local inj = math.random(5,10)
+ local ink = math.random(5,10)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.Abs()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision , 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Threshold()
+ local ini = math.random(5,10)
+ local inj = math.random(5,10)
+ local ink = math.random(5,10)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.Threshold(random.uniform(-2,2),random.uniform(-2,2))
+
+ local err = nn.Jacobian.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = nn.Jacobian.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.HardShrink()
+ local ini = math.random(5,10)
+ local inj = math.random(5,10)
+ local ink = math.random(5,10)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.HardShrink(math.random()/2)
+
+ local err = nn.Jacobian.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = nn.Jacobian.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SoftShrink()
+ local ini = math.random(5,10)
+ local inj = math.random(5,10)
+ local ink = math.random(5,10)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.SoftShrink(math.random()/2)
+
+ local err = nn.Jacobian.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = nn.Jacobian.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Power()
+ local in1 = torch.rand(10,20)
+ local module = nn.Power(2)
+ local out = module:forward(in1)
+ local err = out:dist(in1:cmul(in1))
+ mytester:asserteq(err, 0, torch.typename(module) .. ' - forward err ')
+
+ local ini = math.random(5,10)
+ local inj = math.random(5,10)
+ local ink = math.random(5,10)
+ local pw = random.uniform()*math.random(1,10)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.Power(pw)
+
+ local err = nn.Jacobian.testJacobian(module, input, 0.1, 2)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = nn.Jacobian.testIO(module,input, 0.1, 2)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Square()
+ local in1 = torch.rand(10,20)
+ local module = nn.Square()
+ local out = module:forward(in1)
+ local err = out:dist(in1:cmul(in1))
+ mytester:asserteq(err, 0, torch.typename(module) .. ' - forward err ')
+
+ local ini = math.random(5,10)
+ local inj = math.random(5,10)
+ local ink = math.random(5,10)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.Square()
+
+ local err = nn.Jacobian.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = nn.Jacobian.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Sqrt()
+ local in1 = torch.rand(10,20)
+ local module = nn.Sqrt()
+ local out = module:forward(in1)
+ local err = out:dist(in1:sqrt())
+ mytester:asserteq(err, 0, torch.typename(module) .. ' - forward err ')
+
+ local ini = math.random(5,10)
+ local inj = math.random(5,10)
+ local ink = math.random(5,10)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.Sqrt()
+
+ local err = nn.Jacobian.testJacobian(module, input, 0.1, 2)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = nn.Jacobian.testIO(module, input, 0, 2)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Linear()
+ local ini = math.random(50,70)
+ local inj = math.random(50,70)
+ local input = torch.Tensor(ini):zero()
+ local module = nn.Linear(ini,inj)
+
+ -- 1D
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err,precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err,precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ -- 2D
+ local nframe = math.random(50,70)
+ local input = torch.Tensor(nframe, ini):zero()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err,precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ -- IO
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Euclidean()
+ local ini = math.random(50,70)
+ local inj = math.random(50,70)
+ local input = torch.Tensor(ini):zero()
+ local module = nn.Euclidean(ini,inj)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.WeightedEuclidean()
+ local ini = math.random(10,20)
+ local inj = math.random(10,20)
+ local input = torch.Tensor(ini):zero()
+ local module = nn.WeightedEuclidean(ini,inj)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err,precision, 'error on bias ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.LogSigmoid()
+ local ini = math.random(10,20)
+ local inj = math.random(10,20)
+ local ink = math.random(10,20)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.LogSigmoid()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.LogSoftmax()
+ local ini = math.random(10,20)
+ local inj = math.random(10,20)
+ local input = torch.Tensor(ini,inj):zero()
+ local module = nn.LogSoftMax()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+-- function nntest.TemporalLogSoftmax()
+-- local ini = math.random(10,20)
+-- local inj = math.random(10,20)
+-- local input = torch.Tensor(ini,inj):zero()
+-- local module = nn.TemporalLogSoftMax()
+
+-- local err = jac.testJacobian(module,input)
+-- mytester:assertlt(err,precision, 'error on state ')
+
+-- local ferr,berr = jac.testIO(module,input)
+-- mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+-- mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+-- end
+
+function nntest.Max()
+ local ini = math.random(10,20)
+ local inj = math.random(10,20)
+ local ink = math.random(10,20)
+ local input = torch.Tensor(ini,inj*ink):zero()
+ local module = nn.Max(1)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Min()
+ local ini = math.random(10,20)
+ local inj = math.random(10,20)
+ local ink = math.random(10,20)
+ local input = torch.Tensor(ini,inj*ink):zero()
+ local module = nn.Min(1)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Mean()
+ local ini = math.random(10,20)
+ local inj = math.random(10,20)
+ local ink = math.random(10,20)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.Mean(random.random(1,3))
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Mul()
+ local ini = math.random(10,20)
+ local inj = math.random(10,20)
+ local ink = math.random(10,20)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.Mul(ini*inj*ink)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err,precision, 'error on weight ')
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err,precision, 'error on weight [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Sigmoid()
+ local ini = math.random(10,20)
+ local inj = math.random(10,20)
+ local ink = math.random(10,20)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.Sigmoid()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Softmax()
+ local ini = math.random(10,20)
+ local ink = math.random(10,20)
+ local input = torch.Tensor(ink, ini):zero()
+ local module = nn.SoftMax()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Softmin()
+ local ini = math.random(10,20)
+ local ink = math.random(10,20)
+ local input = torch.Tensor(ink, ini):zero()
+ local module = nn.SoftMin()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Softsign()
+ local ini = math.random(10,20)
+ local ink = math.random(10,20)
+ local input = torch.Tensor(ink, ini):zero()
+ local module = nn.SoftSign()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SoftPlus()
+ local ini = math.random(10,20)
+ local inj = math.random(10,20)
+ local ink = math.random(10,20)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.SoftPlus()
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialSubtractiveNormalization_2dkernel()
+ local inputSize = math.random(11,20)
+ local kersize = 9
+ local nbfeatures = math.random(5,10)
+ local kernel = torch.Tensor(kersize,kersize):fill(1)
+ local module = nn.SpatialSubtractiveNormalization(nbfeatures,kernel)
+ local input = torch.rand(nbfeatures,inputSize,inputSize)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialSubtractiveNormalization_1dkernel()
+ local inputSize = math.random(11,20)
+ local kersize = 9
+ local nbfeatures = math.random(5,10)
+ local kernel = torch.Tensor(kersize):fill(1)
+ local module = nn.SpatialSubtractiveNormalization(nbfeatures,kernel)
+ local input = torch.rand(nbfeatures,inputSize,inputSize)
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialConvolution()
+ local from = math.random(1,10)
+ local to = math.random(1,10)
+ local ki = math.random(1,10)
+ local kj = math.random(1,10)
+ local si = math.random(1,4)
+ local sj = math.random(1,4)
+ local outi = math.random(10,20)
+ local outj = math.random(10,20)
+ local ini = (outi-1)*si+ki
+ local inj = (outj-1)*sj+kj
+ local module = nn.SpatialConvolution(from, to, ki, kj, si, sj)
+ local input = torch.Tensor(from, inj, ini):zero()
+
+ -- stochastic
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ -- batch
+
+ --verbose = true
+ local batch = math.random(2,5)
+ outi = math.random(4,8)
+ outj = math.random(4,8)
+ ini = (outi-1)*si+ki
+ inj = (outj-1)*sj+kj
+ module = nn.SpatialConvolution(from, to, ki, kj, si, sj)
+ input = torch.Tensor(batch,from,inj,ini):zero()
+
+-- print(from, to, ki, kj, si, sj, batch, ini, inj)
+-- print(module.weight:size())
+-- print(module.gradWeight:size())
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'batch error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'batch error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'batch error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'batch error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialConvolutionMap()
+ local from = math.random(1,10)
+ local fanin = math.random(1, from)
+ local to = math.random(1,10)
+ local ki = math.random(1,10)
+ local kj = math.random(1,10)
+ local si = math.random(1,4)
+ local sj = math.random(1,4)
+ local outi = math.random(10,20)
+ local outj = math.random(10,20)
+ local ini = (outi-1)*si+ki
+ local inj = (outj-1)*sj+kj
+
+ local module = nn.SpatialConvolutionMap(nn.tables.random(from, to, fanin), ki, kj, si, sj)
+ local input = torch.Tensor(from, inj, ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function batchcompare(smod, sin, plist)
+ local bs = torch.LongStorage(sin:size():size()+1)
+ bs[1] = 1
+ for i=1,sin:size():size() do bs[i+1] = sin:size()[i] end
+ local bin = torch.Tensor(bs):copy(sin)
+ local bmod = smod:clone()
+
+ local sout = smod:forward(sin):clone()
+ local bout = bmod:forward(bin):clone()
+
+ local sgout = torch.randn(sout:size())
+ local bgout = torch.Tensor(bout:size())
+ bgout:copy(sgout)
+
+ local sgin = smod:backward(sin, sgout)
+ local bgin = bmod:backward(bin, bgout)
+
+ smod:accGradParameters(sin, sgout, 1)
+ bmod:accGradParameters(bin, bgout, 1)
+
+ mytester:assertTensorEq(sout,bout:select(1,1), 1e-8, 'batchcompare error on output')
+ mytester:assertTensorEq(sgin,bgin:select(1,1), 1e-8, 'batchcompare error on gradInput')
+
+ for i,v in pairs(plist) do
+ mytester:assertTensorEq(smod[v],bmod[v], 1e-8, 'batchcompare error on ' .. v)
+ end
+end
+
+function nntest.SpatialConvolutionBatchCompare()
+ local from = math.random(1,10)
+ local to = math.random(1,10)
+ local ki = math.random(1,10)
+ local kj = math.random(1,10)
+ local si = math.random(1,4)
+ local sj = math.random(1,4)
+ local outi = math.random(10,20)
+ local outj = math.random(10,20)
+ local ini = (outi-1)*si+ki
+ local inj = (outj-1)*sj+kj
+
+ local module = nn.SpatialConvolution(from, to, ki, kj, si, sj)
+ local input = torch.randn(from,inj,ini)
+
+ batchcompare(module,input, {'weight','bias','gradWeight','gradBias'})
+end
+
+function nntest.SpatialSubSamplingBatchCompare()
+ local from = math.random(1,10)
+ local ki = math.random(1,10)
+ local kj = math.random(1,10)
+ local si = math.random(1,4)
+ local sj = math.random(1,4)
+ local outi = math.random(10,20)
+ local outj = math.random(10,20)
+ local ini = (outi-1)*si+ki
+ local inj = (outj-1)*sj+kj
+ local module = nn.SpatialSubSampling(from, ki, kj, si, sj)
+ local input = torch.randn(from,inj,ini)--torch.Tensor(from, inj, ini):zero()
+
+ batchcompare(module,input, {'weight','bias','gradWeight','gradBias'})
+end
+
+function nntest.SpatialSubSampling()
+ local from = math.random(1,10)
+ local ki = math.random(1,10)
+ local kj = math.random(1,10)
+ local si = math.random(1,4)
+ local sj = math.random(1,4)
+ local outi = math.random(10,20)
+ local outj = math.random(10,20)
+ local ini = (outi-1)*si+ki
+ local inj = (outj-1)*sj+kj
+ local module = nn.SpatialSubSampling(from, ki, kj, si, sj)
+ local input = torch.Tensor(from, inj, ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ --verbose = true
+ local batch = math.random(2,5)
+ outi = math.random(4,8)
+ outj = math.random(4,8)
+ ini = (outi-1)*si+ki
+ inj = (outj-1)*sj+kj
+ module = nn.SpatialSubSampling(from, ki, kj, si, sj)
+ input = torch.Tensor(batch,from,inj,ini):zero()
+
+-- print(from, to, ki, kj, si, sj, batch, ini, inj)
+-- print(module.weight:size())
+-- print(module.gradWeight:size())
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'batch error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'batch error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'batch error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'batch error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'batch error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialMaxPooling()
+ local fanin = math.random(1,4)
+ local osizex = math.random(1,20)
+ local osizey = math.random(1,20)
+ local mx = math.random(2,4)
+ local my = math.random(2,4)
+ local sizex = osizex*mx
+ local sizey = osizey*my
+ local module = nn.SpatialMaxPooling(mx,my,mx,my)
+ local input = torch.rand(fanin,sizey,sizex)
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialLPPooling()
+ local fanin = math.random(1,4)
+ local osizex = math.random(1,4)
+ local osizey = math.random(1,4)
+ local p = math.random(1,4)
+ local mx = math.random(2,8)
+ local my = math.random(2,8)
+ local dx = math.random(2,mx)
+ local dy = math.random(2,my)
+ local sizex = osizex*mx
+ local sizey = osizey*my
+ local module = nn.SpatialLPPooling(fanin,p,mx,my,dx,dy)
+ local input = torch.rand(fanin,sizey,sizex)
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Sum()
+ local ini = math.random(10,20)
+ local inj = math.random(10,20)
+ local ink = math.random(10,20)
+ local input = torch.Tensor(ini,inj,ink):zero()
+ local module = nn.Sum(random.random(1,3))
+
+ local err = jac.testJacobian(module,input)
+ mytester:assertlt(err,precision, 'error on state ')
+
+ local ferr,berr = jac.testIO(module,input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.Tanh()
+ local ini = math.random(5,10)
+ local inj = math.random(5,10)
+ local ink = math.random(5,10)
+ local input = torch.Tensor(ink, inj, ini):zero()
+
+ local module = nn.Tanh()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision , 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.TemporalConvolution()
+ local from = math.random(1,10)
+ local to = math.random(1,10)
+ local ki = math.random(1,10)
+ local si = math.random(1,4)
+ local outi = math.random(10,20)
+ local ini = (outi-1)*si+ki
+ local module = nn.TemporalConvolution(from, to, ki,si)
+ local input = torch.Tensor(ini, from):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update]')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update]')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.TemporalSubSampling()
+ local from = math.random(1,10)
+ local ki = math.random(1,10)
+ local si = math.random(1,4)
+ local outi = math.random(10,20)
+ local ini = (outi-1)*si+ki
+ local module = nn.TemporalSubSampling(from, ki, si)
+ local input = torch.Tensor(ini, from):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.VolumetricConvolution()
+ local from = math.random(2,5)
+ local to = math.random(2,5)
+ local kt = math.random(3,7)
+ local ki = math.random(3,7)
+ local kj = math.random(3,7)
+ local st = math.random(2,4)
+ local si = math.random(2,4)
+ local sj = math.random(2,4)
+ local outt = math.random(3,7)
+ local outi = math.random(3,7)
+ local outj = math.random(3,7)
+ local int = (outt-1)*st+kt
+ local ini = (outi-1)*si+ki
+ local inj = (outj-1)*sj+kj
+ local module = nn.VolumetricConvolution(from, to, kt, ki, kj, st, si, sj)
+ local input = torch.Tensor(from, int, inj, ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+
+mytester:add(nntest)
+--mytester:add(test_SpatialConvolution)
+--mytester:add(test_AbsCriterion)
+
+if not nn then
+ require 'nn'
+ jac = nn.Jacobian
+ mytester:run()
+else
+ jac = nn.Jacobian
+ function nn.test()
+ -- randomize stuff
+ math.randomseed(os.time())
+ mytester:run()
+ end
+end