diff options
127 files changed, 10444 insertions, 0 deletions
@@ -0,0 +1,15 @@ +local Abs, parent = torch.class('nn.Abs', 'nn.Module') + +function Abs:__init() + parent.__init(self) +end + +function Abs:updateOutput(input) + input.nn.Abs_updateOutput(self, input) + return self.output +end + +function Abs:updateGradInput(input, gradOutput) + input.nn.Abs_updateGradInput(self, input, gradOutput) + return self.gradInput +end diff --git a/AbsCriterion.lua b/AbsCriterion.lua new file mode 100644 index 0000000..be7f6cb --- /dev/null +++ b/AbsCriterion.lua @@ -0,0 +1,14 @@ +local AbsCriterion, parent = torch.class('nn.AbsCriterion', 'nn.Criterion') + +function AbsCriterion:__init() + parent.__init(self) + self.sizeAverage = true +end + +function AbsCriterion:updateOutput(input, target) + return input.nn.AbsCriterion_updateOutput(self, input, target) +end + +function AbsCriterion:updateGradInput(input, target) + return input.nn.AbsCriterion_updateGradInput(self, input, target) +end @@ -0,0 +1,54 @@ +local Add, parent = torch.class('nn.Add', 'nn.Module') + +function Add:__init(inputSize,scalar) + parent.__init(self) + + local size = inputSize + if scalar then size=1 end + self.bias = torch.Tensor(size) + self.gradBias = torch.Tensor(size) + + -- state + self.gradInput:resize(inputSize) + self.output:resize(inputSize) + + self:reset() +end + +function Add:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1./math.sqrt(self.bias:size(1)) + end + + for i=1,self.bias:size(1) do + self.bias[i] = torch.uniform(-stdv, stdv) + end +end + +function Add:updateOutput(input) + self.output:copy(input); + if self.gradBias:size(1)==1 then + self.output:add(self.bias[1]); + else + self.output:add(self.bias); + end + return self.output +end + +function Add:updateGradInput(input, gradOutput) + if self.gradInput then + self.gradInput:copy(gradOutput) + return self.gradInput + end +end + +function Add:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + if self.gradBias:size(1) == 1 then + self.gradBias[1] = self.gradBias[1] + scale*gradOutput:sumall(); + else + self.gradBias:add(scale, gradOutput) + end +end diff --git a/CAddTable.lua b/CAddTable.lua new file mode 100644 index 0000000..afe3568 --- /dev/null +++ b/CAddTable.lua @@ -0,0 +1,24 @@ + +local CAddTable, parent = torch.class('nn.CAddTable', 'nn.Module') + +function CAddTable:__init() + parent.__init(self) + self.gradInput = {} +end + +function CAddTable:updateOutput(input) + self.output:resizeAs(input[1]):copy(input[1]) + for i=2,#input do + self.output:add(input[i]) + end + return self.output +end + +function CAddTable:updateGradInput(input, gradOutput) + for i=1,#input do + self.gradInput[i] = self.gradInput[i] or torch.Tensor() + self.gradInput[i]:resizeAs(input[i]) + self.gradInput[i]:copy(gradOutput) + end + return self.gradInput +end diff --git a/CDivTable.lua b/CDivTable.lua new file mode 100644 index 0000000..f91d024 --- /dev/null +++ b/CDivTable.lua @@ -0,0 +1,21 @@ + +local CDivTable, parent = torch.class('nn.CDivTable', 'nn.Module') + +function CDivTable:__init() + parent.__init(self) + self.gradInput = {} +end + +function CDivTable:updateOutput(input) + self.output:resizeAs(input[1]):copy(input[1]) + self.output:cdiv(input[2]) + return self.output +end + +function CDivTable:updateGradInput(input, gradOutput) + self.gradInput[1] = self.gradInput[1] or torch.Tensor() + self.gradInput[2] = self.gradInput[2] or torch.Tensor() + self.gradInput[1]:resizeAs(input[1]):copy(gradOutput):cdiv(input[2]) + self.gradInput[2]:resizeAs(input[2]):zero():addcdiv(-1,self.gradInput[1],input[2]):cmul(input[1]) + return self.gradInput +end diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..75239ad --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,9 @@ +SET(src init.c) + +FILE(GLOB luasrc *.lua) +SET(luasrc ${luasrc} test/test.lua) + +ADD_TORCH_PACKAGE(nn "${src}" "${luasrc}" "Machine Learning") +ADD_TORCH_DOK(dok nn "Machine Learning" "Neural Networks" 3.1) + +TARGET_LINK_LIBRARIES(nn luaT TH) diff --git a/CMul.lua b/CMul.lua new file mode 100644 index 0000000..9b59944 --- /dev/null +++ b/CMul.lua @@ -0,0 +1,36 @@ +local CMul, parent = torch.class('nn.CMul', 'nn.Module') + +function CMul:__init(inputSize) + parent.__init(self) + + self.weight = torch.Tensor(inputSize) + self.gradWeight = torch.Tensor(inputSize) + + -- state + self.gradInput:resize(inputSize) + self.output:resize(inputSize) + + self:reset() +end + +function CMul:reset() + self.weight:fill(1) +end + +function CMul:updateOutput(input) + self.output:copy(input); + self.output:cmul(self.weight); + return self.output +end + +function CMul:updateGradInput(input, gradOutput) + if self.gradInput then + self.gradInput:zero() + self.gradInput:addcmul(1, self.weight, gradOutput) + return self.gradInput + end +end + +function CMul:accGradParameters(input, gradOutput, scale) + self.gradWeight:addcmul(scale or 1, input, gradOutput) +end diff --git a/CMulTable.lua b/CMulTable.lua new file mode 100644 index 0000000..4c058b6 --- /dev/null +++ b/CMulTable.lua @@ -0,0 +1,26 @@ + +local CMulTable, parent = torch.class('nn.CMulTable', 'nn.Module') + +function CMulTable:__init() + parent.__init(self) + self.gradInput = {} +end + +function CMulTable:updateOutput(input) + self.output:resizeAs(input[1]):copy(input[1]) + for i=2,#input do + self.output:cmul(input[i]) + end + return self.output +end + +function CMulTable:updateGradInput(input, gradOutput) + local tout = torch.Tensor():resizeAs(self.output) + for i=1,#input do + self.gradInput[i] = self.gradInput[i] or torch.Tensor() + self.gradInput[i]:resizeAs(input[i]):copy(gradOutput) + tout:copy(self.output):cdiv(input[i]) + self.gradInput[i]:cmul(tout) + end + return self.gradInput +end diff --git a/CSubTable.lua b/CSubTable.lua new file mode 100644 index 0000000..ffc495b --- /dev/null +++ b/CSubTable.lua @@ -0,0 +1,21 @@ + +local CSubTable, parent = torch.class('nn.CSubTable', 'nn.Module') + +function CSubTable:__init() + parent.__init(self) + self.gradInput = {} +end + +function CSubTable:updateOutput(input) + self.output:resizeAs(input[1]):copy(input[1]) + self.output:add(-1,input[2]) + return self.output +end + +function CSubTable:updateGradInput(input, gradOutput) + self.gradInput[1] = self.gradInput[1] or torch.Tensor() + self.gradInput[2] = self.gradInput[2] or torch.Tensor() + self.gradInput[1]:resizeAs(input[1]):copy(gradOutput) + self.gradInput[2]:resizeAs(input[1]):copy(gradOutput):mul(-1) + return self.gradInput +end diff --git a/ClassNLLCriterion.lua b/ClassNLLCriterion.lua new file mode 100644 index 0000000..7ac48f4 --- /dev/null +++ b/ClassNLLCriterion.lua @@ -0,0 +1,44 @@ +local ClassNLLCriterion, parent = torch.class('nn.ClassNLLCriterion', 'nn.Criterion') + +function ClassNLLCriterion:__init() + parent.__init(self) + self.sizeAverage = true +end + +function ClassNLLCriterion:updateOutput(input, target) + if input:dim() == 1 then + self.output = -input[target] + elseif input:dim() == 2 then + local output = 0 + for i=1,target:size(1) do + output = output - input[i][target[i]] + end + if self.sizeAverage then + output = output / target:size(1) + end + self.output = output + else + error('matrix or vector expected') + end + return self.output +end + +function ClassNLLCriterion:updateGradInput(input, target) + self.gradInput:resizeAs(input) + self.gradInput:zero() + + if input:dim() == 1 then + self.gradInput[target] = -1 + else + local z = -1 + if self.sizeAverage then + z = z / target:size(1) + end + local gradInput = self.gradInput + for i=1,target:size(1) do + gradInput[i][target[i]] = z + end + end + + return self.gradInput +end diff --git a/Concat.lua b/Concat.lua new file mode 100644 index 0000000..616c394 --- /dev/null +++ b/Concat.lua @@ -0,0 +1,119 @@ +local Concat, parent = torch.class('nn.Concat', 'nn.Module') + +function Concat:__init(dimension) + parent.__init(self) + self.modules = {} + self.size = torch.LongStorage() + self.dimension = dimension +end + +function Concat:add(module) + table.insert(self.modules, module) + return self +end + +function Concat:get(index) + return self.modules[index] +end + +function Concat:updateOutput(input) + for i=1,#self.modules do + local currentOutput = self.modules[i]:updateOutput(input) + + if i == 1 then + self.size:resize(currentOutput:dim()):copy(currentOutput:size()) + else + self.size[self.dimension] = self.size[self.dimension] + currentOutput:size(self.dimension) + end + end + self.output:resize(self.size) + + local offset = 1 + for _,module in ipairs(self.modules) do + local currentOutput = module:updateOutput(input) + self.output:narrow(self.dimension, offset, currentOutput:size(self.dimension)):copy(currentOutput) + offset = offset + currentOutput:size(self.dimension) + end + return self.output +end + +function Concat:updateGradInput(input, gradOutput) + self.gradInput:resizeAs(input) + + local offset = 1 + for i,module in ipairs(self.modules) do + local currentOutput = module.output + local currentGradInput = module:updateGradInput(input, gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension))) + + if i==1 then + self.gradInput:copy(currentGradInput) + else + self.gradInput:add(currentGradInput) + end + offset = offset + currentOutput:size(self.dimension) + end + return self.gradInput +end + +function Concat:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + local offset = 1 + for i,module in ipairs(self.modules) do + local currentOutput = module.output + local currentGradInput = module:accGradParameters(input, + gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)), + scale) + offset = offset + currentOutput:size(self.dimension) + end +end + +function Concat:accUpdateGradParameters(input, gradOutput, lr) + local offset = 1 + for i,module in ipairs(self.modules) do + local currentOutput = module.output + local currentGradInput = module:accUpdateGradParameters(input, + gradOutput:narrow(self.dimension, offset, currentOutput:size(self.dimension)), + lr) + offset = offset + currentOutput:size(self.dimension) + end +end + +function Concat:zeroGradParameters() + for _,module in ipairs(self.modules) do + module:zeroGradParameters() + end +end + +function Concat:updateParameters(learningRate) + for _,module in ipairs(self.modules) do + module:updateParameters(learningRate) + end +end + +function Concat:share(mlp,...) + for i=1,#self.modules do + self.modules[i]:share(mlp.modules[i],...); + end +end + +function Concat:parameters() + local function tinsert(to, from) + if type(from) == 'table' then + for i=1,#from do + tinsert(to,from[i]) + end + else + table.insert(to,from) + end + end + local w = {} + local gw = {} + for i=1,#self.modules do + local mw,mgw = self.modules[i]:parameters() + if mw then + tinsert(w,mw) + tinsert(gw,mgw) + end + end + return w,gw +end diff --git a/ConcatTable.lua b/ConcatTable.lua new file mode 100644 index 0000000..730d95e --- /dev/null +++ b/ConcatTable.lua @@ -0,0 +1,72 @@ +local ConcatTable, parent = torch.class('nn.ConcatTable', 'nn.Module') + +function ConcatTable:__init() + parent.__init(self) + self.modules = {} + self.output = {} +end + +function ConcatTable:add(module) + table.insert(self.modules, module) + return self +end + +function ConcatTable:get(index) + return self.modules[index] +end + +function ConcatTable:size() + return #self.modules +end + +function ConcatTable:updateOutput(input) + for i=1,#self.modules do + self.output[i] = self.modules[i]:updateOutput(input) + end + return self.output +end + +function ConcatTable:updateGradInput(input, gradOutput) + for i,module in ipairs(self.modules) do + local currentGradInput = module:updateGradInput(input, gradOutput[i]) + if i == 1 then + self.gradInput:resizeAs(currentGradInput):copy(currentGradInput) + else + self.gradInput:add(currentGradInput) + end + end + return self.gradInput +end + +function ConcatTable:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + for i,module in ipairs(self.modules) do + module:accGradParameters(input, gradOutput[i], scale) + end +end + +function ConcatTable:accUpdateGradParameters(input, gradOutput, lr) + for i,module in ipairs(self.modules) do + module:accUpdateGradParameters(input, gradOutput[i], lr) + end +end + +function ConcatTable:zeroGradParameters() + for _,module in ipairs(self.modules) do + module:zeroGradParameters() + end +end + +function ConcatTable:updateParameters(learningRate) + for _,module in ipairs(self.modules) do + module:updateParameters(learningRate) + end +end + +function ConcatTable:share(mlp,...) + for i=1,#self.modules do + self.modules[i]:share(mlp.modules[i],...); + end +end + + diff --git a/Copy.lua b/Copy.lua new file mode 100644 index 0000000..7b6eeb3 --- /dev/null +++ b/Copy.lua @@ -0,0 +1,33 @@ +local Copy, parent = torch.class('nn.Copy', 'nn.Module') + +function Copy:__init(intype, outtype) + intype = intype or torch.getmetatable(torch.Tensor.__typename) + outtype = outtype or torch.getmetatable(torch.Tensor.__typename) + + parent.__init(self) + self.gradInput = torch.getmetatable(intype).new() + self.output = torch.getmetatable(outtype).new() + + if intype == outtype then + + self.updateOutput = function(self, input) + self.output = input + return input + end + + self.updateGradInput = function(self, input, gradOutput) + self.gradInput = gradOutput + return gradOutput + end + end +end + +function Copy:updateOutput(input) + self.output:resize(input:size()):copy(input) + return self.output +end + +function Copy:updateGradInput(input, gradOutput) + self.gradInput:resize(gradOutput:size()):copy(gradOutput) + return self.gradInput +end diff --git a/CosineDistance.lua b/CosineDistance.lua new file mode 100644 index 0000000..061ff92 --- /dev/null +++ b/CosineDistance.lua @@ -0,0 +1,40 @@ +local CosineDistance, parent = torch.class('nn.CosineDistance', 'nn.Module') + +function CosineDistance:__init() + parent.__init(self) + self.gradInput = {torch.Tensor(), torch.Tensor()} + self.output=torch.Tensor(1) +end + +function CosineDistance:updateOutput(input) + local input1, input2 = input[1], input[2] + self.w1 = input1:dot(input2) + self.w22 = input1:dot(input1) + self.w2 = math.sqrt(self.w22) + self.w32 = input2:dot(input2) + self.w3 = math.sqrt(self.w32) + self.output[1] = self.w1/self.w2/self.w3 + return self.output +end + +function CosineDistance:updateGradInput(input, gradOutput) + local v1 = input[1] + local v2 = input[2] + local gw1 = input[1].new() + local gw2 = input[2].new() + gw1:resizeAs(v1) + gw2:resizeAs(v1) + + gw1:zero() + gw1:add(1/(self.w2*self.w3), v2) + gw1:add(-self.w1/(self.w22*self.w2*self.w3), v1) + + gw2:zero() + gw2:add(1/(self.w2*self.w3), v1) + gw2:add(-self.w1/(self.w32*self.w2*self.w3), v2) + + gw1:mul(gradOutput[1]) + gw2:mul(gradOutput[1]) + self.gradInput = {gw1, gw2} + return self.gradInput +end diff --git a/CosineEmbeddingCriterion.lua b/CosineEmbeddingCriterion.lua new file mode 100644 index 0000000..a9ee2e0 --- /dev/null +++ b/CosineEmbeddingCriterion.lua @@ -0,0 +1,54 @@ +local CosineEmbeddingCriterion, parent = torch.class('nn.CosineEmbeddingCriterion', 'nn.Module') + +function CosineEmbeddingCriterion:__init(margin) + parent.__init(self) + margin=margin or 0 + self.margin = margin + self.gradInput = {torch.Tensor(), torch.Tensor()} +end + +function CosineEmbeddingCriterion:updateOutput(input,y) + local input1, input2 = input[1], input[2] + self.w1 = input1:dot(input2) + self.w22 = input1:dot(input1) + self.w2 = math.sqrt(self.w22) + self.w32 = input2:dot(input2) + self.w3 = math.sqrt(self.w32) + self.output = self.w1/self.w2/self.w3 + if y==-1 then + self.output = math.max(0, self.output - self.margin); + else + self.output = 1 - self.output + end + return self.output +end + +local function mathsign(t) + if t>0 then return 1; end + if t<0 then return -1; end + return 2*torch.random(2)-3; +end + +function CosineEmbeddingCriterion:updateGradInput(input, y) + local v1 = input[1] + local v2 = input[2] + local gw1 = input[1].new() + local gw2 = input[2].new() + gw1:resizeAs(v1) + gw2:resizeAs(v1) + + gw1:zero() + gw1:add(1/(self.w2*self.w3), v2) + gw1:add(-self.w1/(self.w22*self.w2*self.w3), v1) + + gw2:zero() + gw2:add(1/(self.w2*self.w3), v1) + gw2:add(-self.w1/(self.w32*self.w2*self.w3), v2) + + if y == 1 then + gw1 = -gw1 + gw2 = -gw2 + end + self.gradInput = {gw1, gw2} + return self.gradInput +end diff --git a/Criterion.lua b/Criterion.lua new file mode 100644 index 0000000..6513414 --- /dev/null +++ b/Criterion.lua @@ -0,0 +1,51 @@ +local Criterion = torch.class('nn.Criterion') + +function Criterion:__init() + self.gradInput = torch.Tensor() + self.output = 0 +end + +function Criterion:updateOutput(input, target) +end + +function Criterion:forward(input, target) + return self:updateOutput(input, target) +end + +function Criterion:backward(input, target) + return self:updateGradInput(input, target) +end + +function Criterion:updateGradInput(input, target) +end + +function Criterion:clone() + local f = torch.MemoryFile("rw"):binary() + f:writeObject(self) + f:seek(1) + local clone = f:readObject() + f:close() + return clone +end + +function Criterion:type(type) + -- find all tensors and convert them + for key,param in pairs(self) do + if torch.typename(param) and torch.typename(param):find('torch%..+Tensor') then + self[key] = param:type(type) + end + end + return self +end + +function Criterion:float() + return self:type('torch.FloatTensor') +end + +function Criterion:double() + return self:type('torch.DoubleTensor') +end + +function Criterion:cuda() + return self:type('torch.CudaTensor') +end diff --git a/CriterionTable.lua b/CriterionTable.lua new file mode 100644 index 0000000..e5538f7 --- /dev/null +++ b/CriterionTable.lua @@ -0,0 +1,16 @@ +local CriterionTable, parent = torch.class('nn.CriterionTable', 'nn.Module') + +function CriterionTable:__init(criterion) + self.criterion = criterion + self.gradInput = {criterion.gradInput} +end + +function CriterionTable:updateOutput(input) + self.output = self.criterion:updateOutput(unpack(input)) + return self.output +end + +function CriterionTable:updateGradInput(input, gradOutput) + self.criterion:updateGradInput(unpack(input)) + return self.gradInput +end diff --git a/DotProduct.lua b/DotProduct.lua new file mode 100644 index 0000000..d16d295 --- /dev/null +++ b/DotProduct.lua @@ -0,0 +1,29 @@ +local DotProduct, parent = torch.class('nn.DotProduct', 'nn.Module') + +function DotProduct:__init() + parent.__init(self) + self.gradInput = {torch.Tensor(), torch.Tensor()} + self.output=torch.Tensor(1) +end + +function DotProduct:updateOutput(input,y) + self.output[1] = input[1]:dot(input[2]) + return self.output +end + +function DotProduct:updateGradInput(input, gradOutput) + local v1 = input[1] + local v2 = input[2] + local gw1=self.gradInput[1]; + local gw2=self.gradInput[2]; + gw1:resizeAs(v1) + gw2:resizeAs(v1) + + gw1:copy( v2) + gw1:mul(gradOutput[1]) + + gw2:copy( v1) + gw2:mul(gradOutput[1]) + + return self.gradInput +end diff --git a/Euclidean.lua b/Euclidean.lua new file mode 100644 index 0000000..808b7ab --- /dev/null +++ b/Euclidean.lua @@ -0,0 +1,64 @@ +local Euclidean, parent = torch.class('nn.Euclidean', 'nn.Module') + +function Euclidean:__init(inputSize,outputSize) + parent.__init(self) + + self.weight = torch.Tensor(inputSize,outputSize) + self.gradWeight = torch.Tensor(inputSize,outputSize) + + -- state + self.gradInput:resize(inputSize) + self.output:resize(outputSize) + self.temp = torch.Tensor(inputSize) + + self:reset() +end + +function Euclidean:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1./math.sqrt(self.weight:size(1)) + end + + for i=1,self.weight:size(2) do + self.weight:select(2, i):apply(function() + return torch.uniform(-stdv, stdv) + end) + end +end + +function Euclidean:updateOutput(input) + self.output:zero() + for o = 1,self.weight:size(2) do + self.output[o] = input:dist(self.weight:select(2,o)) + end + return self.output +end + +function Euclidean:updateGradInput(input, gradOutput) + self:updateOutput(input) + if self.gradInput then + self.gradInput:zero() + for o = 1,self.weight:size(2) do + if self.output[o] ~= 0 then + self.temp:copy(input):add(-1,self.weight:select(2,o)) + self.temp:mul(gradOutput[o]/self.output[o]) + self.gradInput:add(self.temp) + end + end + return self.gradInput + end +end + +function Euclidean:accGradParameters(input, gradOutput, scale) + self:updateOutput(input) + scale = scale or 1 + for o = 1,self.weight:size(2) do + if self.output[o] ~= 0 then + self.temp:copy(self.weight:select(2,o)):add(-1,input) + self.temp:mul(gradOutput[o]/self.output[o]) + self.gradWeight:select(2,o):add(self.temp) + end + end +end @@ -0,0 +1,9 @@ +local Exp = torch.class('nn.Exp', 'nn.Module') + +function Exp:updateOutput(input) + return input.nn.Exp_updateOutput(self, input) +end + +function Exp:updateGradInput(input, gradOutput) + return input.nn.Exp_updateGradInput(self, input, gradOutput) +end diff --git a/HardShrink.lua b/HardShrink.lua new file mode 100644 index 0000000..7dfeaca --- /dev/null +++ b/HardShrink.lua @@ -0,0 +1,16 @@ +local HardShrink, parent = torch.class('nn.HardShrink', 'nn.Module') + +function HardShrink:__init(lam) + parent.__init(self) + self.lambda = lam or 0.5 +end + +function HardShrink:updateOutput(input) + input.nn.HardShrink_updateOutput(self, input) + return self.output +end + +function HardShrink:updateGradInput(input, gradOutput) + input.nn.HardShrink_updateGradInput(self, input, gradOutput) + return self.gradInput +end diff --git a/HardTanh.lua b/HardTanh.lua new file mode 100644 index 0000000..3391479 --- /dev/null +++ b/HardTanh.lua @@ -0,0 +1,9 @@ +local HardTanh = torch.class('nn.HardTanh', 'nn.Module') + +function HardTanh:updateOutput(input) + return input.nn.HardTanh_updateOutput(self, input) +end + +function HardTanh:updateGradInput(input, gradOutput) + return input.nn.HardTanh_updateGradInput(self, input, gradOutput) +end diff --git a/HingeEmbeddingCriterion.lua b/HingeEmbeddingCriterion.lua new file mode 100644 index 0000000..e88ef82 --- /dev/null +++ b/HingeEmbeddingCriterion.lua @@ -0,0 +1,26 @@ +local HingeEmbeddingCriterion, parent = + torch.class('nn.HingeEmbeddingCriterion', 'nn.Module') + +function HingeEmbeddingCriterion:__init(margin) + parent.__init(self) + margin=margin or 1 + self.margin = margin + self.gradInput = torch.Tensor(1) +end + +function HingeEmbeddingCriterion:updateOutput(input,y) + self.output=input[1] + if y==-1 then + self.output = math.max(0,self.margin - self.output); + end + return self.output +end + +function HingeEmbeddingCriterion:updateGradInput(input, y) + self.gradInput[1]=y + local dist = input[1] + if y == -1 and dist > self.margin then + self.gradInput[1]=0; + end + return self.gradInput +end diff --git a/Identity.lua b/Identity.lua new file mode 100644 index 0000000..79b5c08 --- /dev/null +++ b/Identity.lua @@ -0,0 +1,12 @@ +local Identity, parent = torch.class('nn.Identity', 'nn.Module') + +function Identity:updateOutput(input) + self.output = input + return self.output +end + + +function Identity:updateGradInput(input, gradOutput) + self.gradInput = gradOutput + return self.gradInput +end diff --git a/Jacobian.lua b/Jacobian.lua new file mode 100644 index 0000000..04330ac --- /dev/null +++ b/Jacobian.lua @@ -0,0 +1,239 @@ +nn.Jacobian = {} + +function nn.Jacobian.backward (module, input, param, dparam) + local doparam = 0 + if param then + doparam = 1 + end + param = param or input + -- output deriv + module:forward(input) + local dout = module.output.new():resizeAs(module.output) + -- 1D view + local sdout = module.output.new(dout:storage(),1,dout:nElement()) + -- jacobian matrix to calculate + local jacobian = torch.Tensor(param:nElement(),dout:nElement()):zero() + + for i=1,sdout:nElement() do + dout:zero() + sdout[i] = 1 + module:zeroGradParameters() + local din = module:updateGradInput(input, dout) + module:accGradParameters(input, dout) + if doparam == 1 then + jacobian:select(2,i):copy(dparam) + else + jacobian:select(2,i):copy(din) + end + end + return jacobian +end + +function nn.Jacobian.backwardUpdate (module, input, param) + + -- output deriv + module:forward(input) + local dout = module.output.new():resizeAs(module.output) + -- 1D view + local sdout = module.output.new(dout:storage(),1,dout:nElement()) + -- jacobian matrix to calculate + local jacobian = torch.Tensor(param:nElement(),dout:nElement()):zero() + + -- original param + local origparam = param:clone() + + for i=1,sdout:nElement() do + param:copy(origparam) + dout:zero() + sdout[i] = 1 + local din = module:updateGradInput(input, dout) + module:accUpdateGradParameters(input, dout, 1) + jacobian:select(2,i):copy(param) + end + + param:copy(origparam) + + return jacobian +end + +function nn.Jacobian.forward(module, input, param) + param = param or input + -- perturbation amount + local small = 1e-6 + -- 1D view of input + local tst = param:storage() + local sin = param.new(tst,1,tst:size()) + -- jacobian matrix to calculate + local jacobian = torch.Tensor():resize(param:nElement(),module:forward(input):nElement()) + + local outa = torch.Tensor(jacobian:size(2)) + local outb = torch.Tensor(jacobian:size(2)) + + for i=1,sin:nElement() do + sin[i] = sin[i] - small + outa:copy(module:forward(input)) + sin[i] = sin[i] + 2*small + outb:copy(module:forward(input)) + sin[i] = sin[i] - small + + outb:add(-1,outa):div(2*small) + jacobian:select(1,i):copy(outb) + end + + return jacobian +end + +function nn.Jacobian.forwardUpdate(module, input, param) + -- perturbation amount + local small = 1e-6 + -- 1D view of input + local tst = param:storage() + local sin = param.new(tst,1,tst:size()) + -- jacobian matrix to calculate + local jacobian = torch.Tensor():resize(param:nElement(),module:forward(input):nElement()) + + local outa = torch.Tensor(jacobian:size(2)) + local outb = torch.Tensor(jacobian:size(2)) + + for i=1,sin:nElement() do + sin[i] = sin[i] - small + outa:copy(module:forward(input)) + sin[i] = sin[i] + 2*small + outb:copy(module:forward(input)) + sin[i] = sin[i] - small + + outb:add(-1,outa):div(2*small) + jacobian:select(1,i):copy(outb) + jacobian:select(1,i):mul(-1) + jacobian:select(1,i):add(sin[i]) + end + return jacobian +end + +function nn.Jacobian.testJacobian (module, input, minval, maxval) + minval = minval or -2 + maxval = maxval or 2 + local inrange = maxval - minval + input:copy(torch.rand(input:nElement()):mul(inrange):add(minval)) + local jac_fprop = nn.Jacobian.forward(module,input) + local jac_bprop = nn.Jacobian.backward(module,input) + local error = jac_fprop-jac_bprop + return error:abs():maxall() +end + +function nn.Jacobian.testJacobianParameters (module, input, param, dparam, minval, maxval) + minval = minval or -2 + maxval = maxval or 2 + local inrange = maxval - minval + input:copy(torch.rand(input:nElement()):mul(inrange):add(minval)) + param:copy(torch.rand(param:nElement()):mul(inrange):add(minval)) + local jac_bprop = nn.Jacobian.backward(module, input, param, dparam) + local jac_fprop = nn.Jacobian.forward(module, input, param) + local error = jac_fprop - jac_bprop + return error:abs():maxall() +end + +function nn.Jacobian.testJacobianUpdateParameters (module, input, param, minval, maxval) + minval = minval or -2 + maxval = maxval or 2 + local inrange = maxval - minval + input:copy(torch.rand(input:nElement()):mul(inrange):add(minval)) + param:copy(torch.rand(param:nElement()):mul(inrange):add(minval)) + local params_bprop = nn.Jacobian.backwardUpdate(module, input, param) + local params_fprop = nn.Jacobian.forwardUpdate(module, input, param) + + local error = params_fprop - params_bprop + return error:abs():maxall() +end + +function nn.Jacobian.testIO(module,input, minval, maxval) + minval = minval or -2 + maxval = maxval or 2 + local inrange = maxval - minval + + -- run module + module:forward(input) + local go = module.output:clone():copy(torch.rand(module.output:nElement()):mul(inrange):add(minval)) + module:updateGradInput(input,go) + module:accGradParameters(input,go) + + local fo = module.output:clone() + local bo = module.gradInput:clone() + + -- write module + local f = torch.DiskFile('tmp.bin','w'):binary() + f:writeObject(module) + f:close() + -- read module + local m = torch.DiskFile('tmp.bin'):binary():readObject() + m:forward(input) + m:updateGradInput(input,go) + m:accGradParameters(input,go) + -- cleanup + os.remove('tmp.bin') + + local fo2 = m.output:clone() + local bo2 = m.gradInput:clone() + + local errf = fo - fo2 + local errb = bo - bo2 + return errf:abs():maxall(), errb:abs():maxall() +end + +function nn.Jacobian.testAllUpdate(module, input, weight, gradWeight) + local gradOutput + local lr = torch.uniform(0.1, 1) + local errors = {} + + -- accGradParameters + local maccgp = module:clone() + local weightc = maccgp[weight]:clone() + maccgp:forward(input) + gradOutput = torch.rand(maccgp.output:size()) + maccgp:zeroGradParameters() + maccgp:updateGradInput(input, gradOutput) + maccgp:accGradParameters(input, gradOutput) + maccgp:updateParameters(lr) + errors["accGradParameters"] = (weightc-maccgp[gradWeight]*lr-maccgp[weight]):norm() + + -- accUpdateGradParameters + local maccugp = module:clone() + maccugp:forward(input) + maccugp:updateGradInput(input, gradOutput) + maccugp:accUpdateGradParameters(input, gradOutput, lr) + errors["accUpdateGradParameters"] = (maccugp[weight]-maccgp[weight]):norm() + + -- shared, accGradParameters + local macsh1 = module:clone() + local macsh2 = module:clone() + macsh2:share(macsh1, weight) + macsh1:forward(input) + macsh2:forward(input) + macsh1:zeroGradParameters() + macsh2:zeroGradParameters() + macsh1:updateGradInput(input, gradOutput) + macsh2:updateGradInput(input, gradOutput) + macsh1:accGradParameters(input, gradOutput) + macsh2:accGradParameters(input, gradOutput) + macsh1:updateParameters(lr) + macsh2:updateParameters(lr) + local err = (weightc-maccgp[gradWeight]*(lr*2)-macsh1[weight]):norm() + err = err + (weightc-maccgp[gradWeight]*(lr*2)-macsh2[weight]):norm() + errors["accGradParameters [shared]"] = err + + -- shared, accUpdateGradParameters + local macshu1 = module:clone() + local macshu2 = module:clone() + macshu2:share(macshu1, weight) + macshu1:forward(input) + macshu2:forward(input) + macshu1:updateGradInput(input, gradOutput) + macshu2:updateGradInput(input, gradOutput) + macshu1:accUpdateGradParameters(input, gradOutput, lr) + macshu2:accUpdateGradParameters(input, gradOutput, lr) + local err = (weightc-maccgp[gradWeight]*(lr*2)-macshu1[weight]):norm() + err = err + (weightc-maccgp[gradWeight]*(lr*2)-macshu2[weight]):norm() + errors["accUpdateGradParameters [shared]"] = err + + return errors +end diff --git a/JoinTable.lua b/JoinTable.lua new file mode 100644 index 0000000..dc20246 --- /dev/null +++ b/JoinTable.lua @@ -0,0 +1,50 @@ +local JoinTable, parent = torch.class('nn.JoinTable', 'nn.Module') + +function JoinTable:__init(dimension) + parent.__init(self) + self.size = torch.LongStorage() + self.dimension = dimension + self.gradInput = {} +end + +function JoinTable:updateOutput(input) + for i=1,#input do + local currentOutput = input[i] + if i == 1 then + self.size:resize(currentOutput:dim()):copy(currentOutput:size()) + else + self.size[self.dimension] = self.size[self.dimension] + + currentOutput:size(self.dimension) + end + end + self.output:resize(self.size) + + local offset = 1 + for i=1,#input do + local currentOutput = input[i] + self.output:narrow(self.dimension, offset, + currentOutput:size(self.dimension)):copy(currentOutput) + offset = offset + currentOutput:size(self.dimension) + end + return self.output + +end + +function JoinTable:updateGradInput(input, gradOutput) + for i=1,#input do + if self.gradInput[i] == nil then + self.gradInput[i] = input[i].new() + end + self.gradInput[i]:resizeAs(input[i]) + end + + local offset = 1 + for i=1,#input do + local currentOutput = input[i] + local currentGradInput = gradOutput:narrow(self.dimension, offset, + currentOutput:size(self.dimension)) + self.gradInput[i]:copy(currentGradInput) + offset = offset + currentOutput:size(self.dimension) + end + return self.gradInput +end diff --git a/L1HingeEmbeddingCriterion.lua b/L1HingeEmbeddingCriterion.lua new file mode 100644 index 0000000..5aa1ae7 --- /dev/null +++ b/L1HingeEmbeddingCriterion.lua @@ -0,0 +1,41 @@ +local L1HingeEmbeddingCriterion, parent = torch.class('nn.L1HingeEmbeddingCriterion', 'nn.Module') + +function L1HingeEmbeddingCriterion:__init(margin) + parent.__init(self) + margin=margin or 1 + self.margin = margin + self.gradInput = {torch.Tensor(), torch.Tensor()} +end + +function L1HingeEmbeddingCriterion:updateOutput(input,y) + self.output=input[1]:dist(input[2],1); + if y==-1 then + self.output = math.max(0,self.margin - self.output); + end + return self.output +end + + +local function mathsign(t) + if t>0 then return 1; end + if t<0 then return -1; end + return 2*torch.random(2)-3; +end + +function L1HingeEmbeddingCriterion:updateGradInput(input, y) + self.gradInput[1]:resizeAs(input[1]) + self.gradInput[2]:resizeAs(input[2]) + self.gradInput[1]:copy(input[1]) + self.gradInput[1]:add(-1, input[2]) + local dist = self.gradInput[1]:norm(1); + self.gradInput[1]:apply(mathsign) -- L1 gradient + if y == -1 then -- just to avoid a mul by 1 + if dist > self.margin then + self.gradInput[1]:zero() + else + self.gradInput[1]:mul(-1) + end + end + self.gradInput[2]:zero():add(-1, self.gradInput[1]) + return self.gradInput +end diff --git a/Linear.lua b/Linear.lua new file mode 100644 index 0000000..953af78 --- /dev/null +++ b/Linear.lua @@ -0,0 +1,82 @@ +local Linear, parent = torch.class('nn.Linear', 'nn.Module') + +function Linear:__init(inputSize, outputSize) + parent.__init(self) + + self.weight = torch.Tensor(outputSize, inputSize) + self.bias = torch.Tensor(outputSize) + self.gradWeight = torch.Tensor(outputSize, inputSize) + self.gradBias = torch.Tensor(outputSize) + + self:reset() +end + +function Linear:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1./math.sqrt(self.weight:size(2)) + end + + -- we do this so the initialization is exactly + -- the same than in previous torch versions + for i=1,self.weight:size(1) do + self.weight:select(1, i):apply(function() + return torch.uniform(-stdv, stdv) + end) + self.bias[i] = torch.uniform(-stdv, stdv) + end +end + +function Linear:updateOutput(input) + if input:dim() == 1 then + self.output:resize(self.bias:size(1)) + self.output:copy(self.bias) + self.output:addmv(1, self.weight, input) + elseif input:dim() == 2 then + local nframe = input:size(1) + local nunit = self.bias:size(1) + + self.output:resize(nframe, nunit) + self.output:zero():addr(1, input.new(nframe):fill(1), self.bias) + self.output:addmm(1, input, self.weight:t()) + else + error('input must be vector or matrix') + end + + return self.output +end + +function Linear:updateGradInput(input, gradOutput) + if self.gradInput then + + if input:dim() == 1 then + self.gradInput:resizeAs(input) + self.gradInput:addmv(0, 1, self.weight:t(), gradOutput) + elseif input:dim() == 2 then + self.gradInput:resizeAs(input) + self.gradInput:addmm(0, 1, gradOutput, self.weight) + end + + return self.gradInput + end +end + +function Linear:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + + if input:dim() == 1 then + self.gradWeight:addr(scale, gradOutput, input) + self.gradBias:add(scale, gradOutput) + elseif input:dim() == 2 then + local nframe = input:size(1) + local nunit = self.bias:size(1) + + self.gradWeight:addmm(scale, gradOutput:t(), input) + self.gradBias:addmv(scale, gradOutput:t(), input.new(nframe):fill(1)) + end + +end + +-- we do not need to accumulate parameters when sharing +Linear.sharedAccUpdateGradParameters = Linear.accUpdateGradParameters diff --git a/LogSigmoid.lua b/LogSigmoid.lua new file mode 100644 index 0000000..7485ae6 --- /dev/null +++ b/LogSigmoid.lua @@ -0,0 +1,14 @@ +local LogSigmoid, parent = torch.class('nn.LogSigmoid', 'nn.Module') + +function LogSigmoid:__init() + parent.__init(self) + self.buffer = torch.Tensor() +end + +function LogSigmoid:updateOutput(input) + return input.nn.LogSigmoid_updateOutput(self, input) +end + +function LogSigmoid:updateGradInput(input, gradOutput) + return input.nn.LogSigmoid_updateGradInput(self, input, gradOutput) +end diff --git a/LogSoftMax.lua b/LogSoftMax.lua new file mode 100644 index 0000000..8d2947e --- /dev/null +++ b/LogSoftMax.lua @@ -0,0 +1,9 @@ +local LogSoftMax = torch.class('nn.LogSoftMax', 'nn.Module') + +function LogSoftMax:updateOutput(input) + return input.nn.LogSoftMax_updateOutput(self, input) +end + +function LogSoftMax:updateGradInput(input, gradOutput) + return input.nn.LogSoftMax_updateGradInput(self, input, gradOutput) +end diff --git a/LookupTable.lua b/LookupTable.lua new file mode 100644 index 0000000..115f19c --- /dev/null +++ b/LookupTable.lua @@ -0,0 +1,76 @@ +local LookupTable, parent = torch.class('nn.LookupTable', 'nn.Module') + +LookupTable.__version = 2 + +function LookupTable:__init(nIndex, ...) + parent.__init(self) + + if select('#', ...) == 1 and type(select(1, ...)) ~= "number" then + local size = select(1, ...) + self.size = torch.LongStorage(#size + 1) + for i=1,#size do + self.size[i+1] = size[i] + end + else + self.size = torch.LongStorage(select('#', ...)+1) + for i=1,select('#',...) do + self.size[i+1] = select(i, ...) + end + end + + self.size[1] = nIndex + self.weight = torch.Tensor(self.size) + self.gradWeight = torch.Tensor(self.size):zero() + self.inputs = {} + + self:reset() +end + +function LookupTable:reset(stdv) + stdv = stdv or 1 + self.weight:apply(function() + return torch.normal(0, stdv) + end) +end + +function LookupTable:updateOutput(input) + local nIndex = input:size(1) + self.size[1] = nIndex + self.output:resize(self.size) + + for i=1,nIndex do + self.output:select(1, i):copy(self.weight:select(1, input[i])) + end + + return self.output +end + +function LookupTable:zeroGradParameters() + for k,_ in pairs(self.inputs) do + self.gradWeight:select(1, k):zero() + end + self.inputs = {} +end + +function LookupTable:accGradParameters(input, gradOutput, scale) + for i=1,input:size(1) do + local k = input[i] + self.inputs[k] = true + self.gradWeight:select(1, k):add(scale, gradOutput:select(1, i)) + end +end + +function LookupTable:accUpdateGradParameters(input, gradOutput, lr) + for i=1,input:size(1) do + self.weight:select(1, input[i]):add(-lr, gradOutput:select(1, i)) + end +end + +function LookupTable:updateParameters(learningRate) + for k,_ in pairs(self.inputs) do + self.weight:select(1, k):add(-learningRate, self.gradWeight:select(1, k)) + end +end + +-- we do not need to accumulate parameters when sharing +LookupTable.sharedAccUpdateGradParameters = LookupTable.accUpdateGradParameters diff --git a/MSECriterion.lua b/MSECriterion.lua new file mode 100644 index 0000000..655c74f --- /dev/null +++ b/MSECriterion.lua @@ -0,0 +1,14 @@ +local MSECriterion, parent = torch.class('nn.MSECriterion', 'nn.Criterion') + +function MSECriterion:__init() + parent.__init(self) + self.sizeAverage = true +end + +function MSECriterion:updateOutput(input, target) + return input.nn.MSECriterion_updateOutput(self, input, target) +end + +function MSECriterion:updateGradInput(input, target) + return input.nn.MSECriterion_updateGradInput(self, input, target) +end diff --git a/MarginCriterion.lua b/MarginCriterion.lua new file mode 100644 index 0000000..deb903e --- /dev/null +++ b/MarginCriterion.lua @@ -0,0 +1,23 @@ +local MarginCriterion, parent = + torch.class('nn.MarginCriterion', 'nn.Module') + +function MarginCriterion:__init(margin) + parent.__init(self) + margin=margin or 1 + self.margin = margin + self.gradInput = torch.Tensor(1) +end + +function MarginCriterion:updateOutput(input,y) + self.output=math.max(0, self.margin- y* input[1]) + return self.output +end + +function MarginCriterion:updateGradInput(input, y) + if (y*input[1])<self.margin then + self.gradInput[1]=-y + else + self.gradInput[1]=0; + end + return self.gradInput +end diff --git a/MarginRankingCriterion.lua b/MarginRankingCriterion.lua new file mode 100644 index 0000000..a365ade --- /dev/null +++ b/MarginRankingCriterion.lua @@ -0,0 +1,25 @@ +local MarginRankingCriterion, parent = torch.class('nn.MarginRankingCriterion', 'nn.Module') + +function MarginRankingCriterion:__init(margin) + parent.__init(self) + margin=margin or 1 + self.margin = margin + self.gradInput = {torch.Tensor(1), torch.Tensor(1)} +end + +function MarginRankingCriterion:updateOutput(input,y) + self.output=math.max(0, -y*(input[1][1]-input[2][1]) + self.margin ) + return self.output +end + +function MarginRankingCriterion:updateGradInput(input, y) + local dist = -y*(input[1][1]-input[2][1]) + self.margin + if dist < 0 then + self.gradInput[1][1]=0; + self.gradInput[2][1]=0; + else + self.gradInput[1][1]=-y + self.gradInput[2][1]=y + end + return self.gradInput +end @@ -0,0 +1,16 @@ +local Max, parent = torch.class('nn.Max', 'nn.Module') + +function Max:__init(dimension) + parent.__init(self) + dimension = dimension or 1 + self.dimension = dimension + self.indices = torch.Tensor() +end + +function Max:updateOutput(input) + return input.nn.Max_updateOutput(self, input) +end + +function Max:updateGradInput(input, gradOutput) + return input.nn.Max_updateGradInput(self, input, gradOutput) +end diff --git a/Mean.lua b/Mean.lua new file mode 100644 index 0000000..55e7609 --- /dev/null +++ b/Mean.lua @@ -0,0 +1,26 @@ +local Mean, parent = torch.class('nn.Mean', 'nn.Module') + +function Mean:__init(dimension) + parent.__init(self) + dimension = dimension or 1 + self.dimension = dimension +end + +function Mean:updateOutput(input) + input.torch.mean(self.output, input, self.dimension) + self.output = self.output:select(self.dimension, 1) + return self.output +end + +function Mean:updateGradInput(input, gradOutput) + local size = gradOutput:size():totable() + local stride = gradOutput:stride():totable() + table.insert(size, self.dimension, input:size(self.dimension)) + table.insert(stride, self.dimension, 0) + + self.gradInput:resizeAs(gradOutput):copy(gradOutput) + self.gradInput:mul(1/input:size(self.dimension)) + self.gradInput:resize(torch.LongStorage(size), torch.LongStorage(stride)) + + return self.gradInput +end @@ -0,0 +1,16 @@ +local Min, parent = torch.class('nn.Min', 'nn.Module') + +function Min:__init(dimension) + parent.__init(self) + dimension = dimension or 1 + self.dimension = dimension + self.indices = torch.Tensor() +end + +function Min:updateOutput(input) + return input.nn.Min_updateOutput(self, input) +end + +function Min:updateGradInput(input, gradOutput) + return input.nn.Min_updateGradInput(self, input, gradOutput) +end diff --git a/Module.lua b/Module.lua new file mode 100644 index 0000000..2ae8115 --- /dev/null +++ b/Module.lua @@ -0,0 +1,211 @@ +local Module = torch.class('nn.Module') + +function Module:__init() + self.gradInput = torch.Tensor() + self.output = torch.Tensor() +end + +function Module:parameters() + if self.weight and self.bias then + return {self.weight, self.bias}, {self.gradWeight, self.gradBias} + elseif self.weight then + return {self.weight}, {self.gradWeight} + elseif self.bias then + return {self.bias}, {self.gradBias} + else + return + end +end + +function Module:updateOutput(input) + return self.output +end + +function Module:forward(input) + return self:updateOutput(input, target) +end + +function Module:backward(input, gradOutput) + self:updateGradInput(input, gradOutput) + self:accGradParameters(input, gradOutput) + return self.gradInput +end + +function Module:backwardUpdate(input, gradOutput, lr) + self:updateGradInput(input, gradOutput) + self:accUpdateGradParameters(input, gradOutput, lr) + return self.gradInput +end + +function Module:updateGradInput(input, gradOutput) + return self.gradInput +end + +function Module:accGradParameters(input, gradOutput, scale) +end + +function Module:accUpdateGradParameters(input, gradOutput, lr) + local gradWeight = self.gradWeight + local gradBias = self.gradBias + self.gradWeight = self.weight + self.gradBias = self.bias + self:accGradParameters(input, gradOutput, -lr) + self.gradWeight = gradWeight + self.gradBias = gradBias +end + +function Module:sharedAccUpdateGradParameters(input, gradOutput, lr) + if self:parameters() then + self:zeroGradParameters() + self:accGradParameters(input, gradOutput, 1) + self:updateParameters(lr) + end +end + +function Module:zeroGradParameters() + local _,gradParams = self:parameters() + if gradParams then + for i=1,#gradParams do + gradParams[i]:zero() + end + end +end + +function Module:updateParameters(learningRate) + local params, gradParams = self:parameters() + if params then + for i=1,#params do + params[i]:add(-learningRate, gradParams[i]) + end + end +end + +function Module:share(mlp, ...) + for i,v in ipairs(arg) do + if self[v] ~= nil then + self[v]:set(mlp[v]) + self.accUpdateGradParameters = self.sharedAccUpdateGradParameters + mlp.accUpdateGradParameters = mlp.sharedAccUpdateGradParameters + end + end + return self +end + +function Module:clone(...) + local f = torch.MemoryFile("rw"):binary() + f:writeObject(self) + f:seek(1) + local clone = f:readObject() + f:close() + if select('#',...) > 0 then + clone:share(self,...) + end + return clone +end + +function Module:type(type) + -- find all tensors and convert them + for key,param in pairs(self) do + if torch.typename(param) and torch.typename(param):find('torch%..+Tensor') then + self[key] = param:type(type) + end + end + -- find submodules in classic containers 'modules' + if self.modules then + for _,module in ipairs(self.modules) do + module:type(type) + end + end + return self +end + +function Module:float() + return self:type('torch.FloatTensor') +end + +function Module:double() + return self:type('torch.DoubleTensor') +end + +function Module:cuda() + return self:type('torch.CudaTensor') +end + +function Module:getParameters() + -- get parameters + local parameters,gradParameters = self:parameters() + + -- this function flattens arbitrary lists of parameters, + -- even complex shared ones + local function flatten(parameters) + -- already flat ? + local flat = true + for k = 2,#parameters do + if parameters[k]:storage() ~= parameters[k-1]:storage() then + flat = false + break + end + end + if flat then + local nParameters = 0 + for k,param in ipairs(parameters) do + nParameters = nParameters + param:nElement() + end + local flatParameters = parameters[1].new(parameters[1]:storage()) + if nParameters ~= flatParameters:nElement() then + error('flattenParameters(): weird parameters') + end + return flatParameters + end + -- compute offsets of each parameter + local offsets = {} + local sizes = {} + local strides = {} + local elements = {} + local storageOffsets = {} + local params = {} + local nParameters = 0 + for k,param in ipairs(parameters) do + table.insert(offsets, nParameters+1) + table.insert(sizes, param:size()) + table.insert(strides, param:stride()) + table.insert(elements, param:nElement()) + table.insert(storageOffsets, param:storageOffset()) + local isView = false + for i = 1,k-1 do + if param:storage() == parameters[i]:storage() then + offsets[k] = offsets[i] + if storageOffsets[k] ~= storageOffsets[i] or elements[k] ~= elements[i] then + error('flattenParameters(): cannot flatten shared weights with different structures') + end + isView = true + break + end + end + if not isView then + nParameters = nParameters + param:nElement() + end + end + -- create flat vector + local flatParameters = parameters[1].new(nParameters) + local storage = flatParameters:storage() + -- reallocate all parameters in flat vector + for i = 1,#parameters do + local data = parameters[i]:clone() + parameters[i]:set(storage, offsets[i], elements[i]):resize(sizes[i],strides[i]):copy(data) + data = nil + collectgarbage() + end + -- cleanup + collectgarbage() + -- return flat param + return flatParameters + end + + -- flatten parameters and gradients + local flatParameters = flatten(parameters) + local flatGradParameters = flatten(gradParameters) + + -- return new flat vector that contains all discrete parameters + return flatParameters, flatGradParameters +end @@ -0,0 +1,42 @@ +local Mul, parent = torch.class('nn.Mul', 'nn.Module') + +function Mul:__init(inputSize) + parent.__init(self) + + self.weight = torch.Tensor(1) + self.gradWeight = torch.Tensor(1) + + -- state + self.gradInput:resize(inputSize) + self.output:resize(inputSize) + + self:reset() +end + + +function Mul:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1./math.sqrt(self.weight:size(1)) + end + + self.weight[1] = torch.uniform(-stdv, stdv); +end + +function Mul:updateOutput(input) + self.output:copy(input); + self.output:mul(self.weight[1]); + return self.output +end + +function Mul:updateGradInput(input, gradOutput) + self.gradInput:zero() + self.gradInput:add(self.weight[1], gradOutput) + return self.gradInput +end + +function Mul:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + self.gradWeight[1] = self.gradWeight[1] + scale*input:dot(gradOutput); +end diff --git a/MultiCriterion.lua b/MultiCriterion.lua new file mode 100644 index 0000000..e83b97e --- /dev/null +++ b/MultiCriterion.lua @@ -0,0 +1,32 @@ +local MultiCriterion, parent = torch.class('nn.MultiCriterion', 'nn.Criterion') + +function MultiCriterion:__init() + parent.__init(self) + self.criterions = {} + self.weights = torch.DoubleStorage() +end + +function MultiCriterion:add(criterion, weight) + weight = weight or 1 + table.insert(self.criterions, criterion) + self.weights:resize(#self.criterions, true) + self.weights[#self.criterions] = weight + return self +end + +function MultiCriterion:updateOutput(input, target) + self.output = 0 + for i=1,#self.criterions do + self.output = self.output + self.weights[i]*self.criterions[i]:updateOutput(input, target) + end + return self.output +end + +function MultiCriterion:updateGradInput(input, target) + self.gradInput:resizeAs(input) + self.gradInput:zero() + for i=1,#self.criterions do + self.gradInput:add(self.weights[i], self.criterions[i]:updateGradInput(input, target)) + end + return self.gradInput +end diff --git a/MultiLabelMarginCriterion.lua b/MultiLabelMarginCriterion.lua new file mode 100644 index 0000000..c435888 --- /dev/null +++ b/MultiLabelMarginCriterion.lua @@ -0,0 +1,14 @@ +local MultiLabelMarginCriterion, parent = torch.class('nn.MultiLabelMarginCriterion', 'nn.Criterion') + +function MultiLabelMarginCriterion:__init() + parent.__init(self) + self.sizeAverage = true +end + +function MultiLabelMarginCriterion:updateOutput(input, target) + return input.nn.MultiLabelMarginCriterion_updateOutput(self, input, target) +end + +function MultiLabelMarginCriterion:updateGradInput(input, target) + return input.nn.MultiLabelMarginCriterion_updateGradInput(self, input, target) +end diff --git a/MultiMarginCriterion.lua b/MultiMarginCriterion.lua new file mode 100644 index 0000000..e8de9d9 --- /dev/null +++ b/MultiMarginCriterion.lua @@ -0,0 +1,14 @@ +local MultiMarginCriterion, parent = torch.class('nn.MultiMarginCriterion', 'nn.Criterion') + +function MultiMarginCriterion:__init() + parent.__init(self) + self.sizeAverage = true +end + +function MultiMarginCriterion:updateOutput(input, target) + return input.nn.MultiMarginCriterion_updateOutput(self, input, target) +end + +function MultiMarginCriterion:updateGradInput(input, target) + return input.nn.MultiMarginCriterion_updateGradInput(self, input, target) +end diff --git a/Narrow.lua b/Narrow.lua new file mode 100644 index 0000000..4445983 --- /dev/null +++ b/Narrow.lua @@ -0,0 +1,24 @@ +local Narrow, parent = torch.class('nn.Narrow', 'nn.Module') + +function Narrow:__init(dimension,offset,length) + parent.__init(self) + self.dimension=dimension + self.index=offset + self.length=length or 1 + if not dimension or not offset then + error('nn.Narrow(dimension, offset, length)') + end +end + +function Narrow:updateOutput(input) + local output=input:narrow(self.dimension,self.index,self.length); + self.output:resizeAs(output) + return self.output:copy(output) +end + +function Narrow:updateGradInput(input, gradOutput) + self.gradInput:resizeAs(input) + self.gradInput:zero(); + self.gradInput:narrow(self.dimension,self.index,self.length):copy(gradOutput) + return self.gradInput +end diff --git a/PairwiseDistance.lua b/PairwiseDistance.lua new file mode 100644 index 0000000..638c58f --- /dev/null +++ b/PairwiseDistance.lua @@ -0,0 +1,33 @@ +local PairwiseDistance, parent = torch.class('nn.PairwiseDistance', 'nn.Module') + +function PairwiseDistance:__init(p) + parent.__init(self) + + -- state + self.gradInput = {torch.Tensor(), torch.Tensor()} + self.output = torch.Tensor(1) + self.norm=p +end + +function PairwiseDistance:updateOutput(input) + self.output[1]=input[1]:dist(input[2],self.norm); + return self.output +end + +local function mathsign(x) + if x==0 then return 2*torch.random(2)-3; end + if x>0 then return 1; else return -1; end +end + +function PairwiseDistance:updateGradInput(input, gradOutput) + self.gradInput[1]:resizeAs(input[1]) + self.gradInput[2]:resizeAs(input[2]) + self.gradInput[1]:copy(input[1]) + self.gradInput[1]:add(-1, input[2]) + if self.norm==1 then + self.gradInput[1]:apply(mathsign) + end + self.gradInput[1]:mul(gradOutput[1]); + self.gradInput[2]:zero():add(-1, self.gradInput[1]) + return self.gradInput +end diff --git a/Parallel.lua b/Parallel.lua new file mode 100644 index 0000000..04a8bdb --- /dev/null +++ b/Parallel.lua @@ -0,0 +1,137 @@ +local Parallel, parent = torch.class('nn.Parallel', 'nn.Module') + +function Parallel:__init(inputDimension,outputDimension) + parent.__init(self) + self.modules = {} + self.size = torch.LongStorage() + self.inputDimension = inputDimension + self.outputDimension = outputDimension +end + +function Parallel:add(module) + table.insert(self.modules, module) + return self +end + +function Parallel:get(index) + return self.modules[index] +end + +function Parallel:updateOutput(input) + + local modules=input:size(self.inputDimension) + + for i=1,modules do + local currentOutput = + self.modules[i]:updateOutput(input:select(self.inputDimension,i)) + + if i == 1 then + self.size:resize(currentOutput:dim()):copy(currentOutput:size()) + else + self.size[self.outputDimension] = self.size[self.outputDimension] + + currentOutput:size(self.outputDimension) + end + end + self.output:resize(self.size) + + local offset = 1 + for i=1,modules do + local currentOutput = self.modules[i]:updateOutput(input:select(self.inputDimension,i)) + + self.output:narrow(self.outputDimension, offset, + currentOutput:size(self.outputDimension)):copy(currentOutput) + offset = offset + currentOutput:size(self.outputDimension) + end + return self.output +end + +function Parallel:updateGradInput(input, gradOutput) + local nModule=input:size(self.inputDimension) + self.gradInput:resizeAs(input) + + local offset = 1 + for i=1,nModule do + local module=self.modules[i]; + local currentOutput = module.output + local currentGradInput = + module:updateGradInput(input:select(self.inputDimension,i), + gradOutput:narrow(self.outputDimension, + offset, currentOutput:size(self.outputDimension))) + + self.gradInput:select(self.inputDimension,i):copy(currentGradInput) + offset = offset + currentOutput:size(self.outputDimension) + end + return self.gradInput +end + +function Parallel:accGradParameters(input, gradOutput, scale) + local nModule=input:size(self.inputDimension) + + local offset = 1 + for i=1,nModule do + local module = self.modules[i]; + local currentOutput = module.output + local currentGradInput = + module:accGradParameters(input:select(self.inputDimension,i), + gradOutput:narrow(self.outputDimension, + offset, currentOutput:size(self.outputDimension)), scale) + + offset = offset + currentOutput:size(self.outputDimension) + end +end + +function Parallel:accUpdateGradParameters(input, gradOutput, lr) + local nModule=input:size(self.inputDimension) + + local offset = 1 + for i=1,nModule do + local module = self.modules[i]; + local currentOutput = module.output + local currentGradInput = + module:accUpdateGradParameters(input:select(self.inputDimension,i), + gradOutput:narrow(self.outputDimension, + offset, currentOutput:size(self.outputDimension)), lr) + + offset = offset + currentOutput:size(self.outputDimension) + end +end + +function Parallel:zeroGradParameters() + for _,module in ipairs(self.modules) do + module:zeroGradParameters() + end +end + +function Parallel:updateParameters(learningRate) + for _,module in ipairs(self.modules) do + module:updateParameters(learningRate) + end +end + +function Parallel:share(mlp,...) + for i=1,#self.modules do + self.modules[i]:share(mlp.modules[i],...); + end +end + +function Parallel:parameters() + local function tinsert(to, from) + if type(from) == 'table' then + for i=1,#from do + tinsert(to,from[i]) + end + else + table.insert(to,from) + end + end + local w = {} + local gw = {} + for i=1,#self.modules do + local mw,mgw = self.modules[i]:parameters() + if mw then + tinsert(w,mw) + tinsert(gw,mgw) + end + end + return w,gw +end diff --git a/ParallelTable.lua b/ParallelTable.lua new file mode 100644 index 0000000..a97904f --- /dev/null +++ b/ParallelTable.lua @@ -0,0 +1,71 @@ +local ParallelTable, parent = torch.class('nn.ParallelTable', 'nn.Module') + +function ParallelTable:__init() + parent.__init(self) + self.modules = {} + self.output = {} + self.gradInput = {} +end + +function ParallelTable:add(module) + table.insert(self.modules, module) + return self +end + +function ParallelTable:get(index) + return self.modules[index] +end + +function ParallelTable:size() + return #self.modules +end + +function ParallelTable:updateOutput(input) + for i=1,#self.modules do + self.output[i] = self.modules[i]:updateOutput(input[i]) + end + return self.output +end + + +function ParallelTable:updateGradInput(input, gradOutput) + for i,module in ipairs(self.modules) do + self.gradInput[i]= module:updateGradInput(input[i], gradOutput[i]) + end + return self.gradInput +end + +function ParallelTable:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + for i,module in ipairs(self.modules) do + module:accGradParameters(input[i], gradOutput[i], scale) + end +end + +function ParallelTable:accUpdateGradParameters(input, gradOutput, lr) + lr = lr or 1 + for i,module in ipairs(self.modules) do + module:accUpdateGradParameters(input[i], gradOutput[i], lr) + end +end + +function ParallelTable:zeroGradParameters() + for _,module in ipairs(self.modules) do + module:zeroGradParameters() + end +end + +function ParallelTable:updateParameters(learningRate) + for _,module in ipairs(self.modules) do + module:updateParameters(learningRate) + end +end + +function ParallelTable:share(mlp,...) + for i=1,#self.modules do + self.modules[i]:share(mlp.modules[i],...); + end +end + + + diff --git a/Power.lua b/Power.lua new file mode 100644 index 0000000..8052b3f --- /dev/null +++ b/Power.lua @@ -0,0 +1,21 @@ +local Power, parent = torch.class('nn.Power','nn.Module') + +function Power:__init(p) + parent.__init(self) + self.pow = p + if not p then + error('nn.Power(power)') + end +end + +function Power:updateOutput(input) + self.output:resizeAs(input):copy(input) + self.output:pow(self.pow) + return self.output +end + +function Power:updateGradInput(input, gradOutput) + self.gradInput:resizeAs(input):copy(gradOutput) + self.gradInput:cmul(self.output):cdiv(input):mul(self.pow) + return self.gradInput +end diff --git a/Replicate.lua b/Replicate.lua new file mode 100644 index 0000000..c30a86a --- /dev/null +++ b/Replicate.lua @@ -0,0 +1,29 @@ +local Replicate, parent = torch.class('nn.Replicate','nn.Module') + +function Replicate:__init(nf) + parent.__init(self) + self.nfeatures = nf +end + +function Replicate:updateOutput(input) + local sz = torch.LongStorage(input:dim()+1) + sz[1] = self.nfeatures + for i = 1,input:dim() do + sz[i+1] = input:size(i) + end + local st = torch.LongStorage(input:dim()+1) + st[1] = 0 + for i = 1,input:dim() do + st[i+1] = input:stride(i) + end + self.output = input.new(input:storage(),input:storageOffset(),sz,st) + return self.output +end + +function Replicate:updateGradInput(input, gradOutput) + self.gradInput:resizeAs(input):zero() + for k = 1,gradOutput:size(1) do + self.gradInput:add(gradOutput[k]) + end + return self.gradInput +end diff --git a/Reshape.lua b/Reshape.lua new file mode 100644 index 0000000..0be793f --- /dev/null +++ b/Reshape.lua @@ -0,0 +1,38 @@ +local Reshape, parent = torch.class('nn.Reshape', 'nn.Module') + +function Reshape:__init(...) + parent.__init(self) + self.size = torch.LongStorage() + self.batchsize = torch.LongStorage() + local n = select('#', ...) + if n == 1 and torch.typename(select(1, ...)) == 'torch.LongStorage' then + self.size:resize(#select(1, ...)):copy(select(1, ...)) + else + self.size:resize(n) + self.batchsize:resize(n+1) + self.nelement = 1 + for i=1,n do + self.size[i] = select(i, ...) + self.batchsize[i+1] = select(i, ...) + self.nelement = self.nelement * self.size[i] + end + end +end + +function Reshape:updateOutput(input) + input = input:contiguous() + local nelement = input:nElement() + if nelement == self.nelement then + self.output:set(input):resize(self.size) + else + self.batchsize[1] = input:size(1) + self.output:set(input):resize(self.batchsize) + end + return self.output +end + +function Reshape:updateGradInput(input, gradOutput) + gradOutput = gradOutput:contiguous() + self.gradInput:set(gradOutput):resizeAs(input) + return self.gradInput +end diff --git a/Select.lua b/Select.lua new file mode 100644 index 0000000..acf8e06 --- /dev/null +++ b/Select.lua @@ -0,0 +1,20 @@ +local Select, parent = torch.class('nn.Select', 'nn.Module') + +function Select:__init(dimension,index) + parent.__init(self) + self.dimension = dimension + self.index = index +end + +function Select:updateOutput(input) + local output = input:select(self.dimension,self.index); + self.output:resizeAs(output) + return self.output:copy(output) +end + +function Select:updateGradInput(input, gradOutput) + self.gradInput:resizeAs(input) + self.gradInput:zero() + self.gradInput:select(self.dimension,self.index):copy(gradOutput) + return self.gradInput +end diff --git a/Sequential.lua b/Sequential.lua new file mode 100644 index 0000000..3e23350 --- /dev/null +++ b/Sequential.lua @@ -0,0 +1,129 @@ +local Sequential, parent = torch.class('nn.Sequential', 'nn.Module') + +function Sequential:__init() + self.modules = {} +end + +function Sequential:add(module) + if #self.modules == 0 then + self.gradInput = module.gradInput + end + table.insert(self.modules, module) + self.output = module.output + return self +end + +function Sequential:size() + return #self.modules +end + +function Sequential:get(index) + return self.modules[index] +end + +function Sequential:updateOutput(input) + local currentOutput = input + for i=1,#self.modules do + currentOutput = self.modules[i]:updateOutput(currentOutput) + end + self.output = currentOutput + return currentOutput +end + +function Sequential:updateGradInput(input, gradOutput) + local currentGradOutput = gradOutput + local currentModule = self.modules[#self.modules] + for i=#self.modules-1,1,-1 do + local previousModule = self.modules[i] + currentGradOutput = currentModule:updateGradInput(previousModule.output, currentGradOutput) + currentModule = previousModule + end + currentGradOutput = currentModule:updateGradInput(input, currentGradOutput) + self.gradInput = currentGradOutput + return currentGradOutput +end + +function Sequential:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + + local currentGradOutput = gradOutput + local currentModule = self.modules[#self.modules] + for i=#self.modules-1,1,-1 do + local previousModule = self.modules[i] + currentModule:accGradParameters(previousModule.output, currentGradOutput, scale) + currentGradOutput = currentModule.gradInput + currentModule = previousModule + end + + currentModule:accGradParameters(input, currentGradOutput, scale) +end + +function Sequential:accUpdateGradParameters(input, gradOutput, lr) + local currentGradOutput = gradOutput + local currentModule = self.modules[#self.modules] + for i=#self.modules-1,1,-1 do + local previousModule = self.modules[i] + currentModule:accUpdateGradParameters(previousModule.output, currentGradOutput, lr) + currentGradOutput = currentModule.gradInput + currentModule = previousModule + end + + currentModule:accUpdateGradParameters(input, currentGradOutput, lr) +end + +function Sequential:zeroGradParameters() + for i=1,#self.modules do + self.modules[i]:zeroGradParameters() + end +end + +function Sequential:updateParameters(learningRate) + for i=1,#self.modules do + self.modules[i]:updateParameters(learningRate) + end +end + +function Sequential:share(mlp,...) + for i=1,#self.modules do + self.modules[i]:share(mlp.modules[i],...); + end +end + +function Sequential:parameters() + local function tinsert(to, from) + if type(from) == 'table' then + for i=1,#from do + tinsert(to,from[i]) + end + else + table.insert(to,from) + end + end + local w = {} + local gw = {} + for i=1,#self.modules do + local mw,mgw = self.modules[i]:parameters() + if mw then + tinsert(w,mw) + tinsert(gw,mgw) + end + end + return w,gw +end + +function Sequential:__tostring__() + local tab = ' ' + local line = '\n' + local next = ' -> ' + local str = 'nn.Sequential' + str = str .. ' {' .. line .. tab .. '[input' + for i=1,#self.modules do + str = str .. next .. '(' .. i .. ')' + end + str = str .. next .. 'output]' + for i=1,#self.modules do + str = str .. line .. tab .. '(' .. i .. '): ' .. tostring(self.modules[i]):gsub(line, line .. tab) + end + str = str .. line .. '}' + return str +end diff --git a/Sigmoid.lua b/Sigmoid.lua new file mode 100644 index 0000000..efde004 --- /dev/null +++ b/Sigmoid.lua @@ -0,0 +1,9 @@ +local Sigmoid = torch.class('nn.Sigmoid', 'nn.Module') + +function Sigmoid:updateOutput(input) + return input.nn.Sigmoid_updateOutput(self, input) +end + +function Sigmoid:updateGradInput(input, gradOutput) + return input.nn.Sigmoid_updateGradInput(self, input, gradOutput) +end diff --git a/SoftMax.lua b/SoftMax.lua new file mode 100644 index 0000000..609b353 --- /dev/null +++ b/SoftMax.lua @@ -0,0 +1,9 @@ +local SoftMax, parent = torch.class('nn.SoftMax', 'nn.Module') + +function SoftMax:updateOutput(input) + return input.nn.SoftMax_updateOutput(self, input) +end + +function SoftMax:updateGradInput(input, gradOutput) + return input.nn.SoftMax_updateGradInput(self, input, gradOutput) +end diff --git a/SoftMin.lua b/SoftMin.lua new file mode 100644 index 0000000..90c6c60 --- /dev/null +++ b/SoftMin.lua @@ -0,0 +1,15 @@ +local SoftMin, parent = torch.class('nn.SoftMin', 'nn.Module') + +function SoftMin:updateOutput(input) + self.mininput = self.mininput or input.new() + self.mininput:resizeAs(input):copy(input):mul(-1) + return input.nn.SoftMax_updateOutput(self, self.mininput) +end + +function SoftMin:updateGradInput(input, gradOutput) + self.mininput = self.mininput or input.new() + self.mininput:resizeAs(input):copy(input):mul(-1) + self.gradInput = input.nn.SoftMax_updateGradInput(self, self.mininput, gradOutput) + self.gradInput:mul(-1) + return self.gradInput +end diff --git a/SoftPlus.lua b/SoftPlus.lua new file mode 100644 index 0000000..18d586a --- /dev/null +++ b/SoftPlus.lua @@ -0,0 +1,9 @@ +local SoftPlus = torch.class('nn.SoftPlus', 'nn.Module') + +function SoftPlus:updateOutput(input) + return input.nn.SoftPlus_updateOutput(self, input) +end + +function SoftPlus:updateGradInput(input, gradOutput) + return input.nn.SoftPlus_updateGradInput(self, input, gradOutput) +end diff --git a/SoftShrink.lua b/SoftShrink.lua new file mode 100644 index 0000000..379dc61 --- /dev/null +++ b/SoftShrink.lua @@ -0,0 +1,16 @@ +local SoftShrink, parent = torch.class('nn.SoftShrink', 'nn.Module') + +function SoftShrink:__init(lam) + parent.__init(self) + self.lambda = lam or 0.5 +end + +function SoftShrink:updateOutput(input) + input.nn.SoftShrink_updateOutput(self, input) + return self.output +end + +function SoftShrink:updateGradInput(input, gradOutput) + input.nn.SoftShrink_updateGradInput(self, input, gradOutput) + return self.gradInput +end diff --git a/SoftSign.lua b/SoftSign.lua new file mode 100644 index 0000000..480894c --- /dev/null +++ b/SoftSign.lua @@ -0,0 +1,15 @@ +local SoftSign = torch.class('nn.SoftSign', 'nn.Module') + +function SoftSign:updateOutput(input) + self.temp = self.temp or input.new() + self.temp:resizeAs(input):copy(input):abs():add(1) + self.output:resizeAs(input):copy(input):cdiv(self.temp) + return self.output +end + +function SoftSign:updateGradInput(input, gradOutput) + self.tempgrad = self.tempgrad or input.new() + self.tempgrad:resizeAs(self.output):copy(input):abs():add(1):cmul(self.tempgrad) + self.gradInput:resizeAs(input):copy(gradOutput):cdiv(self.tempgrad) + return self.gradInput +end diff --git a/SparseLinear.lua b/SparseLinear.lua new file mode 100644 index 0000000..ec8845e --- /dev/null +++ b/SparseLinear.lua @@ -0,0 +1,42 @@ +local SparseLinear, parent = torch.class('nn.SparseLinear', 'nn.Module') + +function SparseLinear:__init(inputSize, outputSize) + parent.__init(self) + + self.weightDecay = 0 + self.weight = torch.Tensor(outputSize, inputSize) + self.bias = torch.Tensor(outputSize) + self.gradWeight = torch.Tensor(outputSize, inputSize) + self.gradBias = torch.Tensor(outputSize) + self.lastInput = torch.Tensor() + -- state + self.gradInput:resize(inputSize) + self.output:resize(outputSize) + + self:reset() +end + +function SparseLinear:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1./math.sqrt(self.weight:size(1)) + end + + -- we do this so the initialization is exactly + -- the same than in previous torch versions + for i=1,self.weight:size(1) do + self.weight:select(1, i):apply(function() + return torch.uniform(-stdv, stdv) + end) + self.bias[i] = torch.uniform(-stdv, stdv) * 0.000001 + end +end + +function SparseLinear:updateOutput(input) + return input.nn.SparseLinear_updateOutput(self, input) +end + +function SparseLinear:accGradParameters(input, gradOutput, scale) + return input.nn.SparseLinear_accGradParameters(self, input, gradOutput, scale) +end diff --git a/SpatialConvolution.lua b/SpatialConvolution.lua new file mode 100644 index 0000000..38d2737 --- /dev/null +++ b/SpatialConvolution.lua @@ -0,0 +1,50 @@ +local SpatialConvolution, parent = torch.class('nn.SpatialConvolution', 'nn.Module') + +function SpatialConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH) + parent.__init(self) + + dW = dW or 1 + dH = dH or 1 + + self.nInputPlane = nInputPlane + self.nOutputPlane = nOutputPlane + self.kW = kW + self.kH = kH + self.dW = dW + self.dH = dH + + self.weight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW) + self.bias = torch.Tensor(nOutputPlane) + self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane, kH, kW) + self.gradBias = torch.Tensor(nOutputPlane) + + self:reset() +end + +function SpatialConvolution:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane) + end + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) +end + +function SpatialConvolution:updateOutput(input) + return input.nn.SpatialConvolution_updateOutput(self, input) +end + +function SpatialConvolution:updateGradInput(input, gradOutput) + if self.gradInput then + return input.nn.SpatialConvolution_updateGradInput(self, input, gradOutput) + end +end + +function SpatialConvolution:accGradParameters(input, gradOutput, scale) + return input.nn.SpatialConvolution_accGradParameters(self, input, gradOutput, scale) +end diff --git a/SpatialConvolutionMap.lua b/SpatialConvolutionMap.lua new file mode 100644 index 0000000..0dbff2f --- /dev/null +++ b/SpatialConvolutionMap.lua @@ -0,0 +1,119 @@ +local SpatialConvolutionMap, parent = torch.class('nn.SpatialConvolutionMap', 'nn.Module') + +nn.tables = nn.tables or {} + +function nn.tables.full(nin, nout) + local ft = torch.Tensor(nin*nout,2) + local p = 1 + for j=1,nout do + for i=1,nin do + ft[p][1] = i + ft[p][2] = j + p = p + 1 + end + end + return ft +end + +function nn.tables.oneToOne(nfeat) + local ft = torch.Tensor(nfeat,2) + for i=1,nfeat do + ft[i][1] = i + ft[i][2] = i + end + return ft +end + +function nn.tables.random(nin, nout, nto) + local nker = nto * nout + local tbl = torch.Tensor(nker, 2) + local fi = torch.randperm(nin) + local frcntr = 1 + local tocntr = 1 + local nfi = math.floor(nin/nto) -- number of distinct nto chunks + local rfi = math.mod(nin,nto) -- number of remaining from maps + local totbl = tbl:select(2,2) + local frtbl = tbl:select(2,1) + local fitbl = fi:narrow(1, 1, (nfi * nto)) -- part of fi that covers distinct chunks + local ufrtbl= frtbl:unfold(1, nto, nto) + local utotbl= totbl:unfold(1, nto, nto) + local ufitbl= fitbl:unfold(1, nto, nto) + + -- start filling frtbl + for i=1,nout do -- fro each unit in target map + ufrtbl:select(1,i):copy(ufitbl:select(1,frcntr)) + frcntr = frcntr + 1 + if frcntr-1 == nfi then -- reset fi + fi:copy(torch.randperm(nin)) + frcntr = 1 + end + end + for tocntr=1,utotbl:size(1) do + utotbl:select(1,tocntr):fill(tocntr) + end + return tbl +end + +function SpatialConvolutionMap:__init(conMatrix, kW, kH, dW, dH) + parent.__init(self) + + dW = dW or 1 + dH = dH or 1 + + self.kW = kW + self.kH = kH + self.dW = dW + self.dH = dH + self.connTable = conMatrix + self.nInputPlane = self.connTable:select(2,1):maxall() + self.nOutputPlane = self.connTable:select(2,2):maxall() + + self.weight = torch.Tensor(self.connTable:size(1), kH, kW) + self.bias = torch.Tensor(self.nOutputPlane) + self.gradWeight = torch.Tensor(self.connTable:size(1), kH, kW) + self.gradBias = torch.Tensor(self.nOutputPlane) + + self:reset() +end + +function SpatialConvolutionMap:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) + else + local ninp = torch.Tensor(self.nOutputPlane):zero() + for i=1,self.connTable:size(1) do ninp[self.connTable[i][2]] = ninp[self.connTable[i][2]]+1 end + for k=1,self.connTable:size(1) do + stdv = 1/math.sqrt(self.kW*self.kH*ninp[self.connTable[k][2]]) + self.weight:select(1,k):apply(function() return torch.uniform(-stdv,stdv) end) + end + for k=1,self.bias:size(1) do + stdv = 1/math.sqrt(self.kW*self.kH*ninp[k]) + self.bias[k] = torch.uniform(-stdv,stdv) + end + end +end + +function SpatialConvolutionMap:updateOutput(input) + input.nn.SpatialConvolutionMap_updateOutput(self, input) + return self.output +end + +function SpatialConvolutionMap:updateGradInput(input, gradOutput) + input.nn.SpatialConvolutionMap_updateGradInput(self, input, gradOutput) + return self.gradInput +end + +function SpatialConvolutionMap:accGradParameters(input, gradOutput, scale) + return input.nn.SpatialConvolutionMap_accGradParameters(self, input, gradOutput, scale) +end + +function SpatialConvolutionMap:decayParameters(decay) + self.weight:add(-decay, self.weight) + self.bias:add(-decay, self.bias) +end diff --git a/SpatialLPPooling.lua b/SpatialLPPooling.lua new file mode 100644 index 0000000..9b9c87d --- /dev/null +++ b/SpatialLPPooling.lua @@ -0,0 +1,32 @@ +local SpatialLPPooling, parent = torch.class('nn.SpatialLPPooling', 'nn.Sequential') + +function SpatialLPPooling:__init(nInputPlane, pnorm, kW, kH, dW, dH) + parent.__init(self) + + dW = dW or kW + dH = dH or kH + + self.kW = kW + self.kH = kH + self.dW = dW + self.dH = dH + + self.nInputPlane = nInputPlane + self.learnKernel = learnKernel + + if pnorm == 2 then + self:add(nn.Square()) + else + self:add(nn.Power(pnorm)) + end + self:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(nInputPlane), kW, kH, dW, dH)) + if pnorm == 2 then + self:add(nn.Sqrt()) + else + self:add(nn.Power(1/pnorm)) + end + + self:get(2).bias:zero() + self:get(2).weight:fill(1/(kW*kH)) + self:get(2).accGradParameters = nil +end diff --git a/SpatialMaxPooling.lua b/SpatialMaxPooling.lua new file mode 100644 index 0000000..21197ac --- /dev/null +++ b/SpatialMaxPooling.lua @@ -0,0 +1,34 @@ +local SpatialMaxPooling, parent = torch.class('nn.SpatialMaxPooling', 'nn.Module') + +function SpatialMaxPooling:__init(kW, kH, dW, dH) + parent.__init(self) + + dW = dW or kW + dH = dH or kH + + self.kW = kW + self.kH = kH + self.dW = dW + self.dH = dH + + self.indices = torch.Tensor() +end + +function SpatialMaxPooling:updateOutput(input) + input.nn.SpatialMaxPooling_updateOutput(self, input) + return self.output +end + +function SpatialMaxPooling:updateGradInput(input, gradOutput) + input.nn.SpatialMaxPooling_updateGradInput(self, input, gradOutput) + return self.gradInput +end + +function SpatialMaxPooling:empty() + self.gradInput:resize() + self.gradInput:storage():resize(0) + self.output:resize() + self.output:storage():resize(0) + self.indices:resize() + self.indices:storage():resize(0) +end diff --git a/SpatialSubSampling.lua b/SpatialSubSampling.lua new file mode 100644 index 0000000..48b32b9 --- /dev/null +++ b/SpatialSubSampling.lua @@ -0,0 +1,49 @@ +local SpatialSubSampling, parent = torch.class('nn.SpatialSubSampling', 'nn.Module') + +function SpatialSubSampling:__init(nInputPlane, kW, kH, dW, dH) + parent.__init(self) + + dW = dW or 1 + dH = dH or 1 + + self.nInputPlane = nInputPlane + self.kW = kW + self.kH = kH + self.dW = dW + self.dH = dH + + self.weight = torch.Tensor(nInputPlane) + self.bias = torch.Tensor(nInputPlane) + self.gradWeight = torch.Tensor(nInputPlane) + self.gradBias = torch.Tensor(nInputPlane) + + self:reset() +end + +function SpatialSubSampling:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1/math.sqrt(self.kW*self.kH) + end + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) +end + +function SpatialSubSampling:updateOutput(input) + return input.nn.SpatialSubSampling_updateOutput(self, input) +end + +function SpatialSubSampling:updateGradInput(input, gradOutput) + if self.gradInput then + return input.nn.SpatialSubSampling_updateGradInput(self, input, gradOutput) + end +end + +function SpatialSubSampling:accGradParameters(input, gradOutput, scale) + return input.nn.SpatialSubSampling_accGradParameters(self, input, gradOutput, scale) +end diff --git a/SpatialSubtractiveNormalization.lua b/SpatialSubtractiveNormalization.lua new file mode 100644 index 0000000..4df0fc1 --- /dev/null +++ b/SpatialSubtractiveNormalization.lua @@ -0,0 +1,104 @@ +local SpatialSubtractiveNormalization, parent = torch.class('nn.SpatialSubtractiveNormalization','nn.Module') + +function SpatialSubtractiveNormalization:__init(nInputPlane, kernel) + parent.__init(self) + + -- get args + self.nInputPlane = nInputPlane or 1 + self.kernel = kernel or torch.Tensor(9,9):fill(1) + local kdim = self.kernel:nDimension() + + -- check args + if kdim ~= 2 and kdim ~= 1 then + error('<SpatialSubtractiveNormalization> averaging kernel must be 2D or 1D') + end + if (self.kernel:size(1) % 2) == 0 or (kdim == 2 and (self.kernel:size(2) % 2) == 0) then + error('<SpatialSubtractiveNormalization> averaging kernel must have ODD dimensions') + end + + -- normalize kernel + self.kernel:div(self.kernel:sumall() * self.nInputPlane) + + -- padding values + local padH = math.floor(self.kernel:size(1)/2) + local padW = padH + if kdim == 2 then + padW = math.floor(self.kernel:size(2)/2) + end + + -- create convolutional mean extractor + self.meanestimator = nn.Sequential() + self.meanestimator:add(nn.SpatialZeroPadding(padW, padW, padH, padH)) + if kdim == 2 then + self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane), + self.kernel:size(2), self.kernel:size(1))) + else + self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane), + self.kernel:size(1), 1)) + self.meanestimator:add(nn.SpatialConvolutionMap(nn.tables.oneToOne(self.nInputPlane), + 1, self.kernel:size(1))) + end + self.meanestimator:add(nn.Sum(1)) + self.meanestimator:add(nn.Replicate(self.nInputPlane)) + + -- set kernel and bias + if kdim == 2 then + for i = 1,self.nInputPlane do + self.meanestimator.modules[2].weight[i] = self.kernel + end + self.meanestimator.modules[2].bias:zero() + else + for i = 1,self.nInputPlane do + self.meanestimator.modules[2].weight[i]:copy(self.kernel) + self.meanestimator.modules[3].weight[i]:copy(self.kernel) + end + self.meanestimator.modules[2].bias:zero() + self.meanestimator.modules[3].bias:zero() + end + + -- other operation + self.subtractor = nn.CSubTable() + self.divider = nn.CDivTable() + + -- coefficient array, to adjust side effects + self.coef = torch.Tensor(1,1,1) +end + +function SpatialSubtractiveNormalization:updateOutput(input) + -- compute side coefficients + if (input:size(3) ~= self.coef:size(2)) or (input:size(2) ~= self.coef:size(1)) then + local ones = input.new():resizeAs(input):fill(1) + self.coef = self.meanestimator:updateOutput(ones) + self.coef = self.coef:clone() + end + + -- compute mean + self.localsums = self.meanestimator:updateOutput(input) + self.adjustedsums = self.divider:updateOutput{self.localsums, self.coef} + self.output = self.subtractor:updateOutput{input, self.adjustedsums} + + -- done + return self.output +end + +function SpatialSubtractiveNormalization:updateGradInput(input, gradOutput) + -- resize grad + self.gradInput:resizeAs(input):zero() + + -- backprop through all modules + local gradsub = self.subtractor:updateGradInput({input, self.adjustedsums}, gradOutput) + local graddiv = self.divider:updateGradInput({self.localsums, self.coef}, gradsub[2]) + self.gradInput:add(self.meanestimator:updateGradInput(input, graddiv[1])) + self.gradInput:add(gradsub[1]) + + -- done + return self.gradInput +end + +function SpatialSubtractiveNormalization:type(type) + parent.type(self,type) + self.meanestimator:type(type) + self.divider:type(type) + self.subtractor:type(type) + return self +end diff --git a/SpatialZeroPadding.lua b/SpatialZeroPadding.lua new file mode 100644 index 0000000..af03e71 --- /dev/null +++ b/SpatialZeroPadding.lua @@ -0,0 +1,53 @@ +local SpatialZeroPadding, parent = torch.class('nn.SpatialZeroPadding', 'nn.Module') + +function SpatialZeroPadding:__init(pad_l, pad_r, pad_t, pad_b) + parent.__init(self) + self.pad_l = pad_l + self.pad_r = pad_r or self.pad_l + self.pad_t = pad_t or self.pad_l + self.pad_b = pad_b or self.pad_l +end + +function SpatialZeroPadding:updateOutput(input) + if input:dim() ~= 3 then error('input must be 3-dimensional') end + local h = input:size(2) + self.pad_t + self.pad_b + local w = input:size(3) + self.pad_l + self.pad_r + if w < 1 or h < 1 then error('input is too small') end + self.output:resize(input:size(1), h, w) + self.output:zero() + -- crop input if necessary + local c_input = input + if self.pad_t < 0 then c_input = c_input:narrow(2, 1 - self.pad_t, c_input:size(2) + self.pad_t) end + if self.pad_b < 0 then c_input = c_input:narrow(2, 1, c_input:size(2) + self.pad_b) end + if self.pad_l < 0 then c_input = c_input:narrow(3, 1 - self.pad_l, c_input:size(3) + self.pad_l) end + if self.pad_r < 0 then c_input = c_input:narrow(3, 1, c_input:size(3) + self.pad_r) end + -- crop outout if necessary + local c_output = self.output + if self.pad_t > 0 then c_output = c_output:narrow(2, 1 + self.pad_t, c_output:size(2) - self.pad_t) end + if self.pad_b > 0 then c_output = c_output:narrow(2, 1, c_output:size(2) - self.pad_b) end + if self.pad_l > 0 then c_output = c_output:narrow(3, 1 + self.pad_l, c_output:size(3) - self.pad_l) end + if self.pad_r > 0 then c_output = c_output:narrow(3, 1, c_output:size(3) - self.pad_r) end + -- copy input to output + c_output:copy(c_input) + return self.output +end + +function SpatialZeroPadding:updateGradInput(input, gradOutput) + if input:dim() ~= 3 then error('input must be 3-dimensional') end + self.gradInput:resizeAs(input):zero() + -- crop gradInput if necessary + local cg_input = self.gradInput + if self.pad_t < 0 then cg_input = cg_input:narrow(2, 1 - self.pad_t, cg_input:size(2) + self.pad_t) end + if self.pad_b < 0 then cg_input = cg_input:narrow(2, 1, cg_input:size(2) + self.pad_b) end + if self.pad_l < 0 then cg_input = cg_input:narrow(3, 1 - self.pad_l, cg_input:size(3) + self.pad_l) end + if self.pad_r < 0 then cg_input = cg_input:narrow(3, 1, cg_input:size(3) + self.pad_r) end + -- crop gradOutout if necessary + local cg_output = gradOutput + if self.pad_t > 0 then cg_output = cg_output:narrow(2, 1 + self.pad_t, cg_output:size(2) - self.pad_t) end + if self.pad_b > 0 then cg_output = cg_output:narrow(2, 1, cg_output:size(2) - self.pad_b) end + if self.pad_l > 0 then cg_output = cg_output:narrow(3, 1 + self.pad_l, cg_output:size(3) - self.pad_l) end + if self.pad_r > 0 then cg_output = cg_output:narrow(3, 1, cg_output:size(3) - self.pad_r) end + -- copy gradOuput to gradInput + cg_input:copy(cg_output) + return self.gradInput +end diff --git a/SplitTable.lua b/SplitTable.lua new file mode 100644 index 0000000..d2c690e --- /dev/null +++ b/SplitTable.lua @@ -0,0 +1,30 @@ +local SplitTable, parent = torch.class('nn.SplitTable', 'nn.Module') + +function SplitTable:__init(dimension) + parent.__init(self) + self.modules = {} + self.dimension = dimension +end + +function SplitTable:updateOutput(input) + local currentOutput= {}; + local slices = input:size(self.dimension) + for i=1,slices do + currentOutput[#currentOutput+1] = input:select(self.dimension,i) + end + self.output = currentOutput + return self.output +end + + +function SplitTable:updateGradInput(input, gradOutput) + local slices = input:size(self.dimension) + self.gradInput:resizeAs(input) + + local offset = 1 + for i=1,slices do + local currentGradInput = gradOutput[i]; + self.gradInput:select(self.dimension,i):copy(currentGradInput) + end + return self.gradInput +end diff --git a/Sqrt.lua b/Sqrt.lua new file mode 100644 index 0000000..664d434 --- /dev/null +++ b/Sqrt.lua @@ -0,0 +1,13 @@ +local Sqrt, parent = torch.class('nn.Sqrt','nn.Module') + +function Sqrt:__init(args) + parent.__init(self) +end + +function Sqrt:updateOutput(input) + return input.nn.Sqrt_updateOutput(self,input) +end + +function Sqrt:updateGradInput(input, gradOutput) + return input.nn.Sqrt_updateGradInput(self,input,gradOutput) +end diff --git a/Square.lua b/Square.lua new file mode 100644 index 0000000..c1b80dc --- /dev/null +++ b/Square.lua @@ -0,0 +1,13 @@ +local Square, parent = torch.class('nn.Square','nn.Module') + +function Square:__init(args) + parent.__init(self) +end + +function Square:updateOutput(input) + return input.nn.Square_updateOutput(self, input) +end + +function Square:updateGradInput(input, gradOutput) + return input.nn.Square_updateGradInput(self, input, gradOutput) +end diff --git a/StochasticGradient.lua b/StochasticGradient.lua new file mode 100644 index 0000000..2d5e810 --- /dev/null +++ b/StochasticGradient.lua @@ -0,0 +1,57 @@ +local StochasticGradient = torch.class('nn.StochasticGradient') + +function StochasticGradient:__init(module, criterion) + self.learningRate = 0.01 + self.learningRateDecay = 0 + self.maxIteration = 25 + self.shuffleIndices = true + self.module = module + self.criterion = criterion +end + +function StochasticGradient:train(dataset) + local iteration = 1 + local currentLearningRate = self.learningRate + local module = self.module + local criterion = self.criterion + + local shuffledIndices = torch.randperm(dataset:size(), 'torch.LongTensor') + if not self.shuffleIndices then + for t = 1,dataset:size() do + shuffledIndices[t] = t + end + end + + print("# StochasticGradient: training") + + while true do + local currentError = 0 + for t = 1,dataset:size() do + local example = dataset[shuffledIndices[t]] + local input = example[1] + local target = example[2] + + currentError = currentError + criterion:forward(module:forward(input), target) + + module:updateGradInput(input, criterion:updateGradInput(module.output, target)) + module:accUpdateGradParameters(input, criterion.gradInput, currentLearningRate) + + if self.hookExample then + self.hookExample(self, example) + end + end + + if self.hookIteration then + self.hookIteration(self, iteration) + end + + currentError = currentError / dataset:size() + print("# current error = " .. currentError) + iteration = iteration + 1 + currentLearningRate = self.learningRate/(1+iteration*self.learningRateDecay) + if self.maxIteration > 0 and iteration > self.maxIteration then + print("# StochasticGradient: you have reached the maximum number of iterations") + break + end + end +end @@ -0,0 +1,27 @@ +local Sum, parent = torch.class('nn.Sum', 'nn.Module') + +function Sum:__init(dimension) + parent.__init(self) + dimension = dimension or 1 + self.dimension = dimension +end + +function Sum:updateOutput(input) + input.torch.sum(self.output, input, self.dimension) + self.output = self.output:select(self.dimension, 1) + return self.output +end + +function Sum:updateGradInput(input, gradOutput) + local size = gradOutput:size():totable() + local stride = gradOutput:stride():totable() + table.insert(size, self.dimension, input:size(self.dimension)) + table.insert(stride, self.dimension, 0) + + self.gradInput:set(gradOutput:storage(), + 1, + torch.LongStorage(size), + torch.LongStorage(stride)) + + return self.gradInput +end diff --git a/Tanh.lua b/Tanh.lua new file mode 100644 index 0000000..b6cf1bf --- /dev/null +++ b/Tanh.lua @@ -0,0 +1,9 @@ +local Tanh = torch.class('nn.Tanh', 'nn.Module') + +function Tanh:updateOutput(input) + return input.nn.Tanh_updateOutput(self, input) +end + +function Tanh:updateGradInput(input, gradOutput) + return input.nn.Tanh_updateGradInput(self, input, gradOutput) +end diff --git a/TemporalConvolution.lua b/TemporalConvolution.lua new file mode 100644 index 0000000..a3aaa7f --- /dev/null +++ b/TemporalConvolution.lua @@ -0,0 +1,51 @@ +local TemporalConvolution, parent = torch.class('nn.TemporalConvolution', 'nn.Module') + +function TemporalConvolution:__init(inputFrameSize, outputFrameSize, kW, dW) + parent.__init(self) + + dW = dW or 1 + + self.inputFrameSize = inputFrameSize + self.outputFrameSize = outputFrameSize + self.kW = kW + self.dW = dW + + self.weight = torch.Tensor(outputFrameSize, inputFrameSize*kW) + self.bias = torch.Tensor(outputFrameSize) + self.gradWeight = torch.Tensor(outputFrameSize, inputFrameSize*kW) + self.gradBias = torch.Tensor(outputFrameSize) + + self:reset() +end + +function TemporalConvolution:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1/math.sqrt(self.kW*self.inputFrameSize) + end + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) +end + +function TemporalConvolution:updateOutput(input) + return input.nn.TemporalConvolution_updateOutput(self, input) +end + +function TemporalConvolution:updateGradInput(input, gradOutput) + if self.gradInput then + return input.nn.TemporalConvolution_updateGradInput(self, input, gradOutput) + end +end + +function TemporalConvolution:accGradParameters(input, gradOutput, scale) + scale = scale or 1 + input.nn.TemporalConvolution_accGradParameters(self, input, gradOutput, scale) +end + +-- we do not need to accumulate parameters when sharing +TemporalConvolution.sharedAccUpdateGradParameters = TemporalConvolution.accUpdateGradParameters diff --git a/TemporalSubSampling.lua b/TemporalSubSampling.lua new file mode 100644 index 0000000..3d06f6e --- /dev/null +++ b/TemporalSubSampling.lua @@ -0,0 +1,48 @@ +local TemporalSubSampling, parent = torch.class('nn.TemporalSubSampling', 'nn.Module') + +function TemporalSubSampling:__init(inputFrameSize, kW, dW) + parent.__init(self) + + dW = dW or 1 + + self.inputFrameSize = inputFrameSize + self.kW = kW + self.dW = dW + + self.weight = torch.Tensor(inputFrameSize) + self.bias = torch.Tensor(inputFrameSize) + self.gradWeight = torch.Tensor(inputFrameSize) + self.gradBias = torch.Tensor(inputFrameSize) + + self:reset() +end + +function TemporalSubSampling:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1/math.sqrt(self.kW) + end + + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) +end + +function TemporalSubSampling:updateOutput(input) + return input.nn.TemporalSubSampling_updateOutput(self, input) +end + +function TemporalSubSampling:updateGradInput(input, gradOutput) + if self.gradInput then + return input.nn.TemporalSubSampling_updateGradInput(self, input, gradOutput) + end +end + +function TemporalSubSampling:accGradParameters(input, gradOutput, scale) + return input.nn.TemporalSubSampling_accGradParameters(self, input, gradOutput, scale) +end diff --git a/Threshold.lua b/Threshold.lua new file mode 100644 index 0000000..6083957 --- /dev/null +++ b/Threshold.lua @@ -0,0 +1,20 @@ +local Threshold, parent = torch.class('nn.Threshold','nn.Module') + +function Threshold:__init(th,v) + parent.__init(self) + self.threshold = th or 1e-6 + self.val = v or 0 + if (th and type(th) ~= 'number') or (v and type(v) ~= 'number') then + error('nn.Threshold(threshold, value)') + end +end + +function Threshold:updateOutput(input) + input.nn.Threshold_updateOutput(self, input) + return self.output +end + +function Threshold:updateGradInput(input, gradOutput) + input.nn.Threshold_updateGradInput(self, input, gradOutput) + return self.gradInput +end diff --git a/VolumetricConvolution.lua b/VolumetricConvolution.lua new file mode 100644 index 0000000..4262199 --- /dev/null +++ b/VolumetricConvolution.lua @@ -0,0 +1,51 @@ +local VolumetricConvolution, parent = torch.class('nn.VolumetricConvolution', 'nn.Module') + +function VolumetricConvolution:__init(nInputPlane, nOutputPlane, kT, kW, kH, dT, dW, dH) + parent.__init(self) + + dT = dT or 1 + dW = dW or 1 + dH = dH or 1 + + self.nInputPlane = nInputPlane + self.nOutputPlane = nOutputPlane + self.kT = kT + self.kW = kW + self.kH = kH + self.dT = dT + self.dW = dW + self.dH = dH + + self.weight = torch.Tensor(nOutputPlane, nInputPlane, kT, kH, kW) + self.bias = torch.Tensor(nOutputPlane) + self.gradWeight = torch.Tensor(nOutputPlane, nInputPlane, kT, kH, kW) + self.gradBias = torch.Tensor(nOutputPlane) + + self:reset() +end + +function VolumetricConvolution:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1/math.sqrt(self.kT*self.kW*self.kH*self.nInputPlane) + end + self.weight:apply(function() + return torch.uniform(-stdv, stdv) + end) + self.bias:apply(function() + return torch.uniform(-stdv, stdv) + end) +end + +function VolumetricConvolution:updateOutput(input) + return input.nn.VolumetricConvolution_updateOutput(self, input) +end + +function VolumetricConvolution:updateGradInput(input, gradOutput) + return input.nn.VolumetricConvolution_updateGradInput(self, input, gradOutput) +end + +function VolumetricConvolution:accGradParameters(input, gradOutput, scale) + return input.nn.VolumetricConvolution_accGradParameters(self, input, gradOutput, scale) +end diff --git a/WeightedEuclidean.lua b/WeightedEuclidean.lua new file mode 100644 index 0000000..2761228 --- /dev/null +++ b/WeightedEuclidean.lua @@ -0,0 +1,85 @@ +local WeightedEuclidean, parent = torch.class('nn.WeightedEuclidean', 'nn.Module') + +function WeightedEuclidean:__init(inputSize,outputSize) + parent.__init(self) + + self.templates = torch.Tensor(inputSize,outputSize) + self.gradTemplates = torch.Tensor(inputSize,outputSize) + + self.diagCov = torch.Tensor(inputSize,outputSize) + self.gradDiagCov = torch.Tensor(inputSize,outputSize) + + self.gradInput:resize(inputSize) + self.output:resize(outputSize) + self.temp = torch.Tensor(inputSize) + + -- for compat with Torch's modules (it's bad we have to do that) + do + self.weight = self.templates + self.gradWeight = self.gradTemplates + self.bias = self.diagCov + self.gradBias = self.gradDiagCov + end + + self:reset() +end + +function WeightedEuclidean:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1./math.sqrt(self.templates:size(1)) + end + + for i=1,self.templates:size(2) do + self.templates:select(2, i):apply(function() + return torch.uniform(-stdv, stdv) + end) + end + + self.diagCov:fill(1) +end + +function WeightedEuclidean:updateOutput(input) + self.output:zero() + for o = 1,self.templates:size(2) do + self.temp:copy(input):add(-1,self.templates:select(2,o)) + self.temp:cmul(self.temp) + self.temp:cmul(self.diagCov:select(2,o)):cmul(self.diagCov:select(2,o)) + self.output[o] = math.sqrt(self.temp:sumall()) + end + return self.output +end + +function WeightedEuclidean:updateGradInput(input, gradOutput) + self:forward(input) + self.gradInput:zero() + for o = 1,self.templates:size(2) do + if self.output[o] ~= 0 then + self.temp:copy(input):add(-1,self.templates:select(2,o)) + self.temp:cmul(self.diagCov:select(2,o)):cmul(self.diagCov:select(2,o)) + self.temp:mul(gradOutput[o]/self.output[o]) + self.gradInput:add(self.temp) + end + end + return self.gradInput +end + +function WeightedEuclidean:accGradParameters(input, gradOutput, scale) + self:forward(input) + scale = scale or 1 + for o = 1,self.templates:size(2) do + if self.output[o] ~= 0 then + self.temp:copy(self.templates:select(2,o)):add(-1,input) + self.temp:cmul(self.diagCov:select(2,o)):cmul(self.diagCov:select(2,o)) + self.temp:mul(gradOutput[o]/self.output[o]) + self.gradTemplates:select(2,o):add(self.temp) + + self.temp:copy(self.templates:select(2,o)):add(-1,input) + self.temp:cmul(self.temp) + self.temp:cmul(self.diagCov:select(2,o)) + self.temp:mul(gradOutput[o]/self.output[o]) + self.gradDiagCov:select(2,o):add(self.temp) + end + end +end diff --git a/dok/abs.png b/dok/abs.png Binary files differnew file mode 100644 index 0000000..fa7f470 --- /dev/null +++ b/dok/abs.png diff --git a/dok/exp.png b/dok/exp.png Binary files differnew file mode 100644 index 0000000..07d28d4 --- /dev/null +++ b/dok/exp.png diff --git a/dok/hshrink.png b/dok/hshrink.png Binary files differnew file mode 100644 index 0000000..7f96292 --- /dev/null +++ b/dok/hshrink.png diff --git a/dok/htanh.png b/dok/htanh.png Binary files differnew file mode 100644 index 0000000..c8e6084 --- /dev/null +++ b/dok/htanh.png diff --git a/dok/index.dok b/dok/index.dok new file mode 100644 index 0000000..ded5265 --- /dev/null +++ b/dok/index.dok @@ -0,0 +1,3053 @@ +====== Neural Network Package ======= +{{anchor:nn.dok}} + +This package provides an easy way to build and train simple or complex +neural networks. + +Each module of a network is composed of [[#nn.Modules|Modules]] and there +are several sub-classes of ''Module'' available: container classes like +[[#nn.Sequential|Sequential]], [[#nn.Parallel|Parallel]] and +[[#nn.Concat|Concat]] , which can contain simple layers like +[[#nn.Linear|Linear]], [[#nn.Mean|Mean]], [[#nn.Max|Max]] and +[[#nn.Reshape|Reshape]], as well as convolutional layers, and transfer +functions like [[#nn.Tanh|Tanh]]. + +Loss functions are implemented as sub-classes of +[[#nn.Criterions|Criterion]]. They are helpful to train neural network on +classical tasks. Common criterions are the Mean Squared Error +criterion implemented in [[#nn.MSECriterion|MSECriterion]] and the +cross-entropy criterion implemented in +[[#nn.ClassNLLCriterion|ClassNLLCriterion]]. + +Finally, the [[#nn.StochasticGradient|StochasticGradient]] class provides a +high level way to train the neural network of choice, even though it is +easy with a simple for loop to [[#nn.DoItYourself|train a neural network yourself]]. + +For those who want to implement their own modules, we suggest using +the ''nn.Jacobian'' class for testing the derivatives of their class, +together with the [[..:torch:tester|torch.Tester]] class. The sources +of ''nn'' package contains sufficiently many examples of such tests. + + +====== Detailed Overview of the Neural Network Package ====== +{{anchor:nn.overview.dok}} + +**Module** + +A neural network is called a [[#nn.Module|Module]] (or simply +//module// in this documentation) in Torch. ''Module'' is an abstract +class which defines four main methods: + * [[#nn.Module.forward|forward(input)]] which computes the output of the module given the ''input'' [[..:torch:tensor|Tensor]]. + * [[#nn.Module.backward|backward(input, gradOutput)]] which computes the gradients of the module with respect to its own parameters, and its own inputs. + * [[#nn.Module.zeroGradParameters|zeroGradParameters()]] which zeroes the gradient with respect to the parameters of the module. + * [[#nn.Module.updateParameters|updateParameters(learningRate)]] which updates the parameters after one has computed the gradients with ''backward()'' + +It also declares two members: + * [[#nn.Module.output|output]] which is the output returned by ''forward()''. + * [[#nn.Module.gradInput|gradInput]] which contains the gradients with respect to the input of the module, computed in a ''backward()''. + +Two other perhaps less used but handy methods are also defined: + * [[#nn.Module.share|share(mlp,s1,s2,...,sn)]] which makes this module share the parameters s1,..sn of the module ''mlp''. This is useful if you want to have modules that share the same weights. + * [[#nn.Module.clone|clone(...)]] which produces a deep copy of (i.e. not just a pointer to) this Module, including the current state of its parameters (if any). + +Some important remarks: + * ''output'' contains only valid values after a [[#nn.Module.forward|forward(input)]]. + * ''gradInput'' contains only valid values after a [[#nn.Module.backward|backward(input, gradOutput)]]. + * [[#nn.Module.backward|backward(input, gradOutput)]] uses certain computations obtained during [[#nn.Module.forward|forward(input)]]. You //must// call ''forward()'' before calling a ''backward()'', on the //same// ''input'', or your gradients are going to be incorrect! + + +**Plug and play** + +Building a simple neural network can be achieved by constructing an available layer. +A linear neural network (perceptron!) is built only in one line: +<file lua> +nn = nn.Linear(10,1) -- perceptron with 10 inputs +</file> + +More complex neural networks are easily built using container classes +[[#nn.Sequential|Sequential]] and [[#nn.Concat|Concat]]. ''Sequential'' plugs +layer in a feed-forward fully connected manner. ''Concat'' concatenates in +one layer several modules: they take the same inputs, and their output is +concatenated. + +Creating a one hidden-layer multi-layer perceptron is thus just as easy as: +<file lua> +mlp = nn.Sequential() +mlp:add( nn.Linear(10, 25) ) -- 10 input, 25 hidden units +mlp:add( nn.Tanh() ) -- some hyperbolic tangent transfer function +mlp:add( nn.Linear(25, 1) ) -- 1 output +</file> + +Of course, ''Sequential'' and ''Concat'' can contains other +''Sequential'' or ''Concat'', allowing you to try the craziest neural +networks you ever dreamt of! See the [[#nn.Modules|complete list of +available modules]]. + +**Training a neural network** + +Once you built your neural network, you have to choose a particular +[[#nn.Criterions|Criterion]] to train it. A criterion is a class which +describes the cost to be minimized during training. + +You can then train the neural network by using the +[[#nn.StochasticGradient|StochasticGradient]] class. + +<file lua> + criterion = nn.MSECriterion() -- Mean Squared Error criterion + trainer = nn.StochasticGradient(mlp, criterion) + trainer:train(dataset) -- train using some examples +</file> + +StochasticGradient expect as a ''dataset'' an object which implements +the operator ''dataset[index]'' and implements the method +''dataset:size()''. The ''size()'' methods returns the number of +examples and ''dataset[i]'' has to return the i-th example. + +An ''example'' has to be an object which implements the operator +''example[field]'', where ''field'' might take the value ''1'' (input +features) or ''2'' (corresponding label which will be given to the +criterion). The input is usually a Tensor (except if you use special +kind of gradient modules, like [[#nn.TableLayers|table layers]]). The +label type depends of the criterion. For example, the +[[#nn.MSECriterion|MSECriterion]] expect a Tensor, but the +[[#nn.ClassNLLCriterion|ClassNLLCriterion]] except a integer number (the +class). + +Such a dataset is easily constructed by using Lua tables, but it could +any ''C'' object for example, as long as required operators/methods +are implemented. [[#nn.DoItStochasticGradient|See an example]]. + +''StochasticGradient'' being written in ''Lua'', it is extremely easy +to cut-and-paste it and create a variant to it adapted to your needs +(if the constraints of ''StochasticGradient'' do not satisfy you). + +**Low Level Training Of a Neural Network** + +If you want to program the ''StochasticGradient'' by hand, you +essentially need to control the use of forwards and backwards through +the network yourself. For example, here is the code fragment one +would need to make a gradient step given an input ''x'', a desired +output ''y'', a network ''mlp'' and a given criterion ''criterion'' +and learning rate ''learningRate'': + +<file lua> +function gradUpdate(mlp, x, y, criterion, learningRate) + local pred = mlp:forward(x) + local err = criterion:forward(pred, y) + local gradCriterion = criterion:backward(pred, y) + mlp:zeroGradParameters() + mlp:backward(x, gradCriterion) + mlp:updateParameters(learningRate) +end +</file> +For example, if you wish to use your own criterion you can simple replace +''gradCriterion'' with the gradient vector of your criterion of choice. + + +====== Modules ====== +{{anchor:nn.Modules}} + +Modules are bricks to build neural networks. A [[#nn.Module|Module]] is a neural network +by itself, but it can be combined with other networks using [[#nn.Containers|container classes]] to create +complex neural networks. + +===== Module ===== +{{anchor:nn.Module}} + +''Module'' is an abstract class which defines fundamental methods necessary +for a training a neural network. Modules are [[..:torch:file#torch.file.serialization|serializable]]. + +Modules contain two states variables: [[#nn.ModuleOutput|output]] and +[[#nn.ModuleGradInput|gradInput]]. + +==== [output] forward(input) ==== +{{anchor:nn.Module.forward}} + +Takes an ''input'' object, and computes the corresponding ''output'' of the +module. In general ''input'' and ''output'' are +[[..:torch:tensor|Tensors]]. However, some special sub-classes +like [[#nn.TableLayers|table layers]] might expect something else. Please, +refer to each module specification for further information. + +After a ''forward()'', the [[#nn.ModuleOutput|ouput]] state variable should +have been updated to the new value. + +It is not advised to override this function. Instead, one should +implement [[#nn.Module.updateOutput|updateOutput(input)]] +function. The forward module in the abstract parent class +[[#nn.Module|Module]] will call ''updateOutput(input)''. + +==== [gradInput] backward(input, gradOutput) ==== +{{anchor:nn.Module.backward}} + +Performs a //backpropagation step// through the module, with respect to the +given ''input''. In general this method makes the assumption +[[#nn.Module.forward|forward(input)]] has been called before, //with the same input//. +This is necessary for optimization reasons. If you do not respect +this rule, ''backward()'' will compute incorrect gradients. + +In general ''input'' and ''gradOutput'' and ''gradInput'' are +[[..:torch:tensor|Tensors]]. However, some special sub-classes +like [[#nn.TableLayers|table layers]] might expect something else. Please, +refer to each module specification for further information. + +A //backpropagation step// consist in computing two kind of gradients +at ''input'' given ''gradOutput'' (gradients with respect to the +output of the module). This function simply performs this task using +two function calls: + + - A function call to [[#nn.Module.updateGradInput|updateGradInput(input, gradOutput)]]. + - A function call to [[#nn.Module.accGradParameters|accGradParameters(input,gradOutput)]]. + +It is not advised to override this function call in custom classes. It +is better to override +[[#nn.Module.updateGradInput|updateGradInput(input, gradOutput)]] and +[[#nn.Module.accGradParameters|accGradParameters(input, gradOutput)]] +functions. + +==== updateOutput(input) ==== +{{anchor:nn.Module.updateOutput}} + +Computes the output using the current parameter set of the class and +input. This function returns the result which is stored in the +[[#nn.Module.output|output]] field. + +==== updateGradInput(input, gradOutput) ==== +{{anchor:nn.Module.updateGradInput}} + +Computing the gradient of the module with respect to its own +input. This is returned in ''gradInput''. Also, the +[[#nn.Module.gradInput|gradInput]] state variable is updated +accordingly. + +==== accGradParameters(input, gradOutput) ==== +{{anchor:nn.Module.accGradParameters}} + +Computing the gradient of the module with respect to its +ownparameters. Many modules do not perform this step as they do not +have any parameters. The state variable name for the parameters is +module dependent. The module is expected to //accumulate// the +gradients with respect to the parameters in some variable. + +Zeroing this accumulation is achieved with +[[#nn.Module.zeroGradParameters|zeroGradParameters()]] and updating +the parameters according to this accumulation is done with +[[#nn.Module.updateParameters|updateParameters()]]. + +==== zeroGradParameters() ==== +{{anchor:nn.Module.zeroGradParameters}} + +If the module has parameters, this will zero the accumulation of the +gradients with respect to these parameters, accumulated through +[[#nn.Module.accGradParameters|accGradParameters(input, gradOutput)]] +calls. Otherwise, it does nothing. + +==== updateParameters(learningRate) ==== +{{anchor:nn.Module.updateParameters}} + +If the module has parameters, this will update these parameters, according +to the accumulation of the gradients with respect to these parameters, +accumulated through [[#nn.Module.backward|backward()]] calls. + +The update is basically: +<file lua> +parameters = parameters - learningRate * gradients_wrt_parameters +</file> +If the module does not have parameters, it does nothing. + +==== accUpdateGradParameters(input, gradOutput, learningRate) ==== +{{anchor:nn.Module.accUpdateGradParameters}} + +This is a convenience module that performs two functions at +once. Calculates and accumulates the gradients with respect to the +weights after mutltiplying with negative of the learning rate +''learningRate''. Performing these two operations at once is more +performance efficient and it might be advantageous in certain +situations. + +Keep in mind that, this function uses a simple trick to achieve its +goal and it might not be valid for a custom module. + +<file lua> +function Module:accUpdateGradParameters(input, gradOutput, lr) + local gradWeight = self.gradWeight + local gradBias = self.gradBias + self.gradWeight = self.weight + self.gradBias = self.bias + self:accGradParameters(input, gradOutput, -lr) + self.gradWeight = gradWeight + self.gradBias = gradBias +end +</file> + +As it can be seen, the gradients are accumulated directly into +weights. This assumption may not be true for a module that computes a +nonlinear operation. + +==== share(mlp,s1,s2,...,sn) ==== +{{anchor:nn.Module.share}} + +This function modifies the parameters of the module named +''s1'',..''sn'' (if they exist) so that they are shared with (pointers +to) the parameters with the same names in the given module ''mlp''. + +The parameters have to be Tensors. This function is typically used if +you want to have modules that share the same weights or biases. + +Note that this function if called on a [[#nn.Containers|Container]] +module will share the same parameters for all the contained modules as +well. + +Example: +<file lua> + +-- make an mlp +mlp1=nn.Sequential(); +mlp1:add(nn.Linear(100,10)); + +-- make a second mlp +mlp2=nn.Sequential(); +mlp2:add(nn.Linear(100,10)); + +-- the second mlp shares the bias of the first +mlp2:share(mlp1,'bias'); + +-- we change the bias of the first +mlp1:get(1).bias[1]=99; + +-- and see that the second one's bias has also changed.. +print(mlp2:get(1).bias[1]) + +</file> + + +==== clone(mlp,...) ==== +{{anchor:nn.Module.clone}} + +Creates a deep copy of (i.e. not just a pointer to) the module, +including the current state of its parameters (e.g. weight, biases +etc., if any). + +If arguments are provided to the ''clone(...)'' function it also calls +[[#nn.Module.share|share(...)]] with those arguments on the cloned +module after creating it, hence making a deep copy of this module with +some shared parameters. + +Example: +<file lua> +-- make an mlp +mlp1=nn.Sequential(); +mlp1:add(nn.Linear(100,10)); + +-- make a copy that shares the weights and biases +mlp2=mlp1:clone('weight','bias'); + +-- we change the bias of the first mlp +mlp1:get(1).bias[1]=99; + +-- and see that the second one's bias has also changed.. +print(mlp2:get(1).bias[1]) + +</file> + +==== type(type) ==== +{{anchor:nn.Module.type}} + +This function converts all the parameters of a module to the given +''type''. The ''type'' can be one of the types defined for +[[..:torch:tensor|torch.Tensor]]. + +==== float() ==== +{{anchor:nn.Module.float}} + +Convenience method for calling [[#nn.Module.type|module:type('torch.FloatTensor')]] + +==== double() ==== +{{anchor:nn.Module.double}} + +Convenience method for calling [[#nn.Module.type|module:type('torch.DoubleTensor')]] + +==== cuda() ==== +{{anchor:nn.Module.cuda}} + +Convenience method for calling [[#nn.Module.type|module:type('torch.CudaTensor')]] + +==== State Variables ==== +{{anchor:nn.statevars.dok}} + +These state variables are useful objects if one wants to check the guts of +a ''Module''. The object pointer is //never// supposed to change. However, its +contents (including its size if it is a Tensor) are supposed to change. + +In general state variables are +[[..:torch:tensor|Tensors]]. However, some special sub-classes +like [[#nn.TableLayers|table layers]] contain something else. Please, +refer to each module specification for further information. + +=== output === +{{anchor:nn.Module.output}} + +This contains the output of the module, computed with the last call of +[[#nn.Module.forward|forward(input)]]. + +=== gradInput === +{{anchor:nn.Module.gradInput}} + +This contains the gradients with respect to the inputs of the module, computed with the last call of +[[#nn.Module.updateGradInput|updateGradInput(input, gradOutput)]]. + +==== Parameters and gradients w.r.t parameters ==== + +Some modules contain parameters (the ones that we actually want to +train!). The name of these parameters, and gradients w.r.t these parameters +are module dependent. + +==== [{weights}, {gradWeights}] parameters() ==== +{{anchor:nn.Module.parameters}} + +This function should returns two tables. One for the learnable +parameters ''{weights}'' and another for the gradients of the energy +wrt to the learnable parameters ''{gradWeights}''. + +For custom modules, it is a good idea to also override this +function. By default none of the built-in functions/modules use this +function call, but it is especialy useful when one wants to obtain a +global view of the whole network. + +===== Containers ===== +{{anchor:nn.Containers}} + +==== Concat ==== +{{anchor:nn.Concat}} + +<file lua> +module = nn.Concat(dim) +</file> +Concat concatenates the output of one layer of "parallel" modules along the +provided dimension ''dim'': they take the same inputs, and their output is +concatenated. +<file lua> +mlp=nn.Concat(1); +mlp:add(nn.Linear(5,3)) +mlp:add(nn.Linear(5,7)) +require "lab" +print(mlp:forward(lab.randn(5))) +</file> +which gives the output: +<file lua> + 0.7486 + 0.1349 + 0.7924 +-0.0371 +-0.4794 + 0.3044 +-0.0835 +-0.7928 + 0.7856 +-0.1815 +[torch.Tensor of dimension 10] +</file> + + +==== Sequential ==== +{{anchor:nn.Sequential}} + +Sequential provides a means to plug layers together +in a feed-forward fully connected manner. + +E.g. +creating a one hidden-layer multi-layer perceptron is thus just as easy as: +<file lua> +mlp = nn.Sequential() +mlp:add( nn.Linear(10, 25) ) -- 10 input, 25 hidden units +mlp:add( nn.Tanh() ) -- some hyperbolic tangent transfer function +mlp:add( nn.Linear(25, 1) ) -- 1 output + +require "lab" +print(mlp:forward(lab.randn(10))) +</file> +which gives the output: +<file lua> +-0.1815 +[torch.Tensor of dimension 1] +</file> + +==== Parallel ==== +{{anchor:nn.Parallel}} + +''module'' = ''Parallel(inputDimension,outputDimension)'' + +Creates a container module that applies its ''ith'' child module to the ''ith'' slice of the input Tensor by using [[..:torch:tensor#torch.tensor.select|select]] +on dimension ''inputDimension''. It concatenates the results of its contained modules together along dimension ''outputDimension''. + +Example: +<file lua> + require "lab" + mlp=nn.Parallel(2,1); -- iterate over dimension 2 of input + mlp:add(nn.Linear(10,3)); -- apply to first slice + mlp:add(nn.Linear(10,2)) -- apply to first second slice + print(mlp:forward(lab.randn(10,2))) +</file> +gives the output: +<file lua> +-0.5300 +-1.1015 + 0.7764 + 0.2819 +-0.6026 +[torch.Tensor of dimension 5] +</file> + +A more complicated example: +<file lua> +require "lab" + +mlp=nn.Sequential(); +c=nn.Parallel(1,2) +for i=1,10 do + local t=nn.Sequential() + t:add(nn.Linear(3,2)) + t:add(nn.Reshape(2,1)) + c:add(t) +end +mlp:add(c) + +pred=mlp:forward(lab.randn(10,3)) +print(pred) + +for i=1,10000 do -- Train for a few iterations + x=lab.randn(10,3); + y=lab.ones(2,10); + pred=mlp:forward(x) + + criterion= nn.MSECriterion() + local err=criterion:forward(pred,y) + local gradCriterion = criterion:backward(pred,y); + mlp:zeroGradParameters(); + mlp:backward(x, gradCriterion); + mlp:updateParameters(0.01); + print(err) +end +</file> +===== Simple layers ===== +{{anchor:nn.simplelayers.dok}} +==== Linear ==== +{{anchor:nn.Linear}} + +''module'' = ''Linear(inputDimension,outputDimension)'' + +Applies a linear transformation to the incoming data, i.e. //y= +Ax+b//. The ''input'' tensor given in ''forward(input)'' must be +either a vector (1D tensor) or matrix (2D tensor). If the input is a +matrix, then each row is assumed to be an input sample of given batch. + +You can create a layer in the following way: +<file lua> + module= nn.Linear(10,5) -- 10 inputs, 5 outputs +</file> +Usually this would be added to a network of some kind, e.g.: +<file lua> + mlp = nn.Sequential(); + mlp:add(module) +</file> +The weights and biases (//A// and //b//) can be viewed with: +<file lua> + print(module.weight) + print(module.bias) +</file> +The gradients for these weights can be seen with: +<file lua> + print(module.gradWeight) + print(module.gradBias) +</file> +As usual with ''nn'' modules, +applying the linear transformation is performed with: +<file lua> + x=torch.Tensor(10) -- 10 inputs + y=module:forward(x) +</file> + +==== SparseLinear ==== +{{anchor:nn.SparseLinear}} + +''module'' = ''SparseLinear(inputDimension,outputDimension)'' + +Applies a linear transformation to the incoming sparse data, i.e. +//y= Ax+b//. The ''input'' tensor given in ''forward(input)'' must +be a sparse vector represented as 2D tensor of the form +torch.Tensor(N, 2) where the pairs represent indices and values. +The SparseLinear layer is useful when the number of input +dimensions is very large and the input data is sparse. + +You can create a sparse linear layer in the following way: + +<file lua> + module= nn.SparseLinear(10000,2) -- 10000 inputs, 2 outputs +</file> +The sparse linear module may be used as part of a larger network, +and apart from the form of the input, +[[#nn.SparseLinear|SparseLinear]] +operates in exactly the same way as the [[#nn.Linear|Linear]] layer. + +A sparse input vector may be created as so.. +<file lua> + + x=lab.new({1, 0.1},{2, 0.3},{10, 0.3},{31, 0.2}) + + print(x) + + 1.0000 0.1000 + 2.0000 0.3000 + 10.0000 0.3000 + 31.0000 0.2000 +[torch.Tensor of dimension 4x2] + +</file> + +The first column contains indices, the second column contains +values in a a vector where all other elements are zeros. The +indices should not exceed the stated dimesions of the input to the +layer (10000 in the example). + +==== Abs ==== +{{anchor:nn.Abs}} + +''module'' = ''Abs()'' + +''output = abs(input)''. + +<file lua> +m=nn.Abs() +ii=lab.linspace(-5,5) +oo=m:forward(ii) +go=lab.ones(100) +gi=m:backward(ii,go) +gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'}) +gnuplot.grid(true) +</file> + +{{abs.png?400}} + +==== Add ==== +{{anchor:nn.Add }} + +''module'' = ''Add(inputDimension,scalar)'' + +Applies a bias term to the incoming data, i.e. +//y_i= x_i + b_i, or if _scalar=true// then uses a single bias term, +_y_i= x_i + b. + +Example: +<file lua> +y=torch.Tensor(5); +mlp=nn.Sequential() +mlp:add(nn.Add(5)) + +function gradUpdate(mlp, x, y, criterion, learningRate) + local pred = mlp:forward(x) + local err = criterion:forward(pred, y) + local gradCriterion = criterion:backward(pred, y) + mlp:zeroGradParameters() + mlp:backward(x, gradCriterion) + mlp:updateParameters(learningRate) + return err +end + +for i=1,10000 do + x=lab.rand(5) + y:copy(x); + for i=1,5 do y[i]=y[i]+i; end + err=gradUpdate(mlp,x,y,nn.MSECriterion(),0.01) +end +print(mlp:get(1).bias) +</file> +gives the output: +<file lua> + 1.0000 + 2.0000 + 3.0000 + 4.0000 + 5.0000 +[torch.Tensor of dimension 5] +</file> +i.e. the network successfully learns the input //x// has been shifted +to produce the output //y//. + + +==== Mul ==== +{{anchor:nn.Mul}} + +''module'' = ''Mul(inputDimension)'' + +Applies a //single// scaling factor to the incoming data, i.e. +//y= w x//, where //w// is a scalar. + +Example: +<file lua> +y=torch.Tensor(5); +mlp=nn.Sequential() +mlp:add(nn.Mul(5)) + +function gradUpdate(mlp, x, y, criterion, learningRate) + local pred = mlp:forward(x) + local err = criterion:forward(pred,y) + local gradCriterion = criterion:backward(pred,y); + mlp:zeroGradParameters(); + mlp:backward(x, gradCriterion); + mlp:updateParameters(learningRate); + return err +end + + +for i=1,10000 do + x=lab.rand(5) + y:copy(x); y:mul(math.pi); + err=gradUpdate(mlp,x,y,nn.MSECriterion(),0.01) +end +print(mlp:get(1).weight) +</file> +gives the output: +<file lua> + 3.1416 +[torch.Tensor of dimension 1] +</file> +i.e. the network successfully learns the input ''x'' has been scaled by +pi. + +==== CMul ==== +{{anchor:nn.CMul }} + +''module'' = ''CMul(inputDimension)'' + +Applies a component-wise multiplication to the incoming data, i.e. +''y_i'' = ''w_i'' =x_i=. + +Example: +<file lua> +mlp=nn.Sequential() +mlp:add(nn.CMul(5)) + +y=torch.Tensor(5); +sc=torch.Tensor(5); for i=1,5 do sc[i]=i; end -- scale input with this + +function gradUpdate(mlp,x,y,criterion,learningRate) + local pred = mlp:forward(x) + local err = criterion:forward(pred,y) + local gradCriterion = criterion:backward(pred,y); + mlp:zeroGradParameters(); + mlp:backward(x, gradCriterion); + mlp:updateParameters(learningRate); + return err +end + +for i=1,10000 do + x=lab.rand(5) + y:copy(x); y:cmul(sc); + err=gradUpdate(mlp,x,y,nn.MSECriterion(),0.01) +end +print(mlp:get(1).weight) +</file> +gives the output: +<file lua> + 1.0000 + 2.0000 + 3.0000 + 4.0000 + 5.0000 +[torch.Tensor of dimension 5] +</file> +i.e. the network successfully learns the input //x// has been scaled by +those scaling factors to produce the output //y//. + + +==== Max ==== +{{anchor:nn.Max}} + +''module'' = ''Max(dimension)'' + +Applies a max operation over dimension ''dimension''. +Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2'' +then an ''nxq'' matrix would be output. + + +==== Min ==== +{{anchor:nn.Min}} + +''module'' = ''Min(dimension)'' + +Applies a min operation over dimension ''dimension''. +Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2'' +then an ''nxq'' matrix would be output. + + +==== Mean ==== +{{anchor:nn.Mean}} + +''module'' = ''Mean(dimension)'' + +Applies a mean operation over dimension ''dimension''. +Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2'' +then an ''nxq'' matrix would be output. + +==== Sum ==== +{{anchor:nn.Sum}} + +''module'' = ''Sum(dimension)'' + +Applies a sum operation over dimension ''dimension''. +Hence, if an ''nxpxq'' Tensor was given as input, and ''dimension'' = ''2'' +then an ''nxq'' matrix would be output. + + +==== Euclidean ==== +{{anchor:nn.Euclidean}} + +''module'' = ''Euclidean(inputDimension,outputDimension)'' + +Outputs the Euclidean distance of the input to ''outputDimension'' centers, +i.e. this layer has the weights ''c_i'', ''i'' = ''1'',..,''outputDimension'', where +''c_i'' are vectors of dimension ''inputDimension''. Output dimension ''j'' is +''|| c_i - x||^2'', where ''x'' is the input. + +==== WeightedEuclidean ==== +{{anchor:nn.WeightedEuclidean}} + +''module'' = ''WeightedEuclidean(inputDimension,outputDimension)'' + +This module is similar to [[#nn.Euclidian|Euclidian]], but +additionally learns a separate diagonal covariance matrix across the +features of the input space for each center. + + +==== Copy ==== +{{anchor:nn.Copy}} + +''module'' = ''Copy(inputType,outputType)'' + +This layer copies the input to output with type casting from input +type from ''inputType'' to ''outputType''. + + +==== Narrow ==== +{{anchor:nn.Narrow}} + +''module'' = ''Narrow(dimension, offset, length)'' + +Narrow is application of +[[..:torch:tensor:#torch.Tensor.narrow|narrow]] operation in a +module. + +==== Replicate ==== +{{anchor:nn.Replicate}} + +''module'' = ''Replicate(nFeature)'' + +This class creates an output where the input is replicated +''nFeature'' times along its first dimension. There is no memory +allocation or memory copy in this module. It sets the +[[..:torch:tensor#torch.Tensor.stride|stride]] along the first +dimension to zero. + +<file lua> +torch> x=lab.linspace(1,5,5) +torch> =x + 1 + 2 + 3 + 4 + 5 +[torch.DoubleTensor of dimension 5] + +torch> m=nn.Replicate(3) +torch> o=m:forward(x) +torch> =o + 1 2 3 4 5 + 1 2 3 4 5 + 1 2 3 4 5 +[torch.DoubleTensor of dimension 3x5] + +torch> x:fill(13) +torch> =x + 13 + 13 + 13 + 13 + 13 +[torch.DoubleTensor of dimension 5] + +torch> =o + 13 13 13 13 13 + 13 13 13 13 13 + 13 13 13 13 13 +[torch.DoubleTensor of dimension 3x5] + +</file> + + +==== Reshape ==== +{{anchor:nn.Reshape}} + +''module'' = ''Reshape(dimension1, dimension2, ..)'' + +Reshapes an ''nxpxqx..'' Tensor into a ''dimension1xdimension2x...'' Tensor, +taking the elements column-wise. + +Example: +<file lua> +> x=torch.Tensor(4,4) +> for i=1,4 do +> for j=1,4 do +> x[i][j]=(i-1)*4+j; +> end +> end +> print(x) + + 1 2 3 4 + 5 6 7 8 + 9 10 11 12 + 13 14 15 16 +[torch.Tensor of dimension 4x4] + +> print(nn.Reshape(2,8):forward(x)) + + 1 9 2 10 3 11 4 12 + 5 13 6 14 7 15 8 16 +[torch.Tensor of dimension 2x8] + +> print(nn.Reshape(8,2):forward(x)) + + 1 3 + 5 7 + 9 11 + 13 15 + 2 4 + 6 8 + 10 12 + 14 16 +[torch.Tensor of dimension 8x2] + +> print(nn.Reshape(16):forward(x)) + + 1 + 5 + 9 + 13 + 2 + 6 + 10 + 14 + 3 + 7 + 11 + 15 + 4 + 8 + 12 + 16 +[torch.Tensor of dimension 16] + + +</file> + + +==== Select ==== +{{anchor:nn.Select}} + +Selects a dimension and index of a ''nxpxqx..'' Tensor. + +Example: +<file lua> +mlp=nn.Sequential(); +mlp:add(nn.Select(1,3)) + +require "lab" +x=lab.randn(10,5) +print(x) +print(mlp:forward(x)) +</file> +gives the output: +<file lua> + 0.9720 -0.0836 0.0831 -0.2059 -0.0871 + 0.8750 -2.0432 -0.1295 -2.3932 0.8168 + 0.0369 1.1633 0.6483 1.2862 0.6596 + 0.1667 -0.5704 -0.7303 0.3697 -2.2941 + 0.4794 2.0636 0.3502 0.3560 -0.5500 +-0.1898 -1.1547 0.1145 -1.1399 0.1711 +-1.5130 1.4445 0.2356 -0.5393 -0.6222 +-0.6587 0.4314 1.1916 -1.4509 1.9400 + 0.2733 1.0911 0.7667 0.4002 0.1646 + 0.5804 -0.5333 1.1621 1.5683 -0.1978 +[torch.Tensor of dimension 10x5] + + 0.0369 + 1.1633 + 0.6483 + 1.2862 + 0.6596 +[torch.Tensor of dimension 5] +</file> + +This can be used in conjunction with [[#nn.Concat|Concat]] +to emulate the behavior +of [[#nn.Parallel|Parallel]], or to select various parts of an input Tensor to +perform operations on. Here is a fairly complicated example: +<file lua> +require "lab" + +mlp=nn.Sequential(); +c=nn.Concat(2) +for i=1,10 do + local t=nn.Sequential() + t:add(nn.Select(1,i)) + t:add(nn.Linear(3,2)) + t:add(nn.Reshape(2,1)) + c:add(t) +end +mlp:add(c) + +pred=mlp:forward(lab.randn(10,3)) +print(pred) + +for i=1,10000 do -- Train for a few iterations + x=lab.randn(10,3); + y=lab.ones(2,10); + pred=mlp:forward(x) + + criterion= nn.MSECriterion() + err=criterion:forward(pred,y) + gradCriterion = criterion:backward(pred,y); + mlp:zeroGradParameters(); + mlp:backward(x, gradCriterion); + mlp:updateParameters(0.01); + print(err) +end +</file> + +==== Exp ==== +{{anchor:nn.Exp}} + +Applies the ''exp'' function element-wise to the input Tensor, +thus outputting a Tensor of the same dimension. +<file lua> +ii=lab.linspace(-2,2) +m=nn.Exp() +oo=m:forward(ii) +go=lab.ones(100) +gi=m:backward(ii,go) +gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'}) +gnuplot.grid(true) +</file> +{{exp.png?400}} + + +==== Square ==== +{{anchor:nn.Square}} + +Takes the square of each element. + +<file lua> +ii=lab.linspace(-5,5) +m=nn.Square() +oo=m:forward(ii) +go=lab.ones(100) +gi=m:backward(ii,go) +gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'}) +gnuplot.grid(true) +</file> +{{square.png?400}} + +==== Sqrt ==== +{{anchor:nn.Sqrt}} + +Takes the square root of each element. + +<file lua> +ii=lab.linspace(0,5) +m=nn.Sqrt() +oo=m:forward(ii) +go=lab.ones(100) +gi=m:backward(ii,go) +gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'}) +gnuplot.grid(true) +</file> +{{sqrt.png?400}} + +==== Power ==== +{{anchor:nn.Power}} + +''module'' = ''Power(p)'' + +Raises each element to its ''pth'' power. + +<file lua> +ii=lab.linspace(0,2) +m=nn.Power(1.25) +oo=m:forward(ii) +go=lab.ones(100) +gi=m:backward(ii,go) +gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'}) +gnuplot.grid(true) +</file> +{{power.png?400}} + +===== Transfer Function Layers ===== +{{anchor:nn.transfer.dok}} + +==== HardTanh ==== +{{anchor:nn.HardTanh}} + +Applies the ''HardTanh'' function element-wise to the input Tensor, +thus outputting a Tensor of the same dimension. + +''HardTanh'' is defined as: + + * ''f(x)'' = ''1, if x >'' ''1,'' + * ''f(x)'' = ''-1, if x <'' ''-1,'' + * ''f(x)'' = ''x,'' ''otherwise.'' + +<file lua> +ii=lab.linspace(-2,2) +m=nn.HardTanh() +oo=m:forward(ii) +go=lab.ones(100) +gi=m:backward(ii,go) +gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'}) +gnuplot.grid(true) +</file> +{{htanh.png?400}} + + +==== HardShrink ==== +{{anchor:nn.HardShrink}} + +''module = nn.HardShrink(lambda)'' + +Applies the hard shrinkage function element-wise to the input +[[..:torch:Tensor|Tensor]]. The output is the same size as the input. + +''HardShrinkage'' operator is defined as: + + * ''f(x) = x, if x > lambda'' + * ''f(x) = -x, if < -lambda'' + * ''f(x) = 0, otherwise'' + +<file lua> +ii=lab.linspace(-2,2) +m=nn.HardShrink(0.85) +oo=m:forward(ii) +go=lab.ones(100) +gi=m:backward(ii,go) +gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'}) +gnuplot.grid(true) +</file> +{{hshrink.png?400}} + +==== SoftShrink ==== +{{anchor:nn.SoftShrink}} + +''module = nn.SoftShrink(lambda)'' + +Applies the hard shrinkage function element-wise to the input +[[..:torch:Tensor|Tensor]]. The output is the same size as the input. + +''HardShrinkage'' operator is defined as: + + * ''f(x) = x-lambda, if x > lambda'' + * ''f(x) = -x+lambda, if < -lambda'' + * ''f(x) = 0, otherwise'' + +<file lua> +ii=lab.linspace(-2,2) +m=nn.SoftShrink(0.85) +oo=m:forward(ii) +go=lab.ones(100) +gi=m:backward(ii,go) +gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'}) +gnuplot.grid(true) +</file> +{{sshrink.png?400}} + + +==== SoftMax ==== +{{anchor:nn.SoftMax}} + +Applies the ''Softmax'' function to an n-dimensional input Tensor, +rescaling them so that the elements of the n-dimensional output Tensor +lie in the range (0,1) and sum to 1. + +''Softmax'' is defined as ''f_i(x)'' = ''exp(x_i-shift) / sum_j exp(x_j-shift)'', +where ''shift'' = ''max_i x_i''. + + +<file lua> +ii=lab.exp(lab.abs(lab.randn(10))) +m=nn.SoftMax() +oo=m:forward(ii) +gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'}) +gnuplot.grid(true) +</file> +{{softmax.png?400}} + +==== SoftMin ==== +{{anchor:nn.SoftMin}} + +Applies the ''Softmin'' function to an n-dimensional input Tensor, +rescaling them so that the elements of the n-dimensional output Tensor +lie in the range (0,1) and sum to 1. + +''Softmin'' is defined as ''f_i(x)'' = ''exp(-x_i-shift) / sum_j exp(-x_j-shift)'', +where ''shift'' = ''max_i x_i''. + + +<file lua> +ii=lab.exp(lab.abs(lab.randn(10))) +m=nn.SoftMin() +oo=m:forward(ii) +gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'}) +gnuplot.grid(true) +</file> +{{softmin.png?400}} + +==== SoftPlus ==== +{{anchor:nn.SoftPlus}} + +Applies the ''SoftPlus'' function to an n-dimensioanl input Tensor. +Can be used to constrain the output of a machine to always be positive. + +''SoftPlus'' is defined as ''f_i(x)'' = ''log(1 + exp(x_i)))''. + +<file lua> +ii=lab.randn(10) +m=nn.SoftPlus() +oo=m:forward(ii) +go=lab.ones(10) +gi=m:backward(ii,go) +gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'},{'gradInput',gi,'+-'}) +gnuplot.grid(true) +</file> +{{softplus.png?400}} + +==== SoftSign ==== +{{anchor:nn.SoftSign}} + +Applies the ''SoftSign'' function to an n-dimensioanl input Tensor. + +''SoftSign'' is defined as ''f_i(x) = x_i / (1+|x_i|)'' + +<file lua> +ii=lab.linspace(-5,5) +m=nn.SoftSign() +oo=m:forward(ii) +go=lab.ones(100) +gi=m:backward(ii,go) +gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'}) +gnuplot.grid(true) +</file> +{{softsign.png?400}} + +==== LogSigmoid ==== +{{anchor:nn.LogSigmoid}} + +Applies the ''LogSigmoid'' function to an n-dimensional input Tensor. + +''LogSigmoid'' is defined as ''f_i(x)'' = ''log(1/(1+ exp(-x_i)))''. + + +<file lua> +ii=lab.randn(10) +m=nn.LogSigmoid() +oo=m:forward(ii) +go=lab.ones(10) +gi=m:backward(ii,go) +gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'},{'gradInput',gi,'+-'}) +gnuplot.grid(true) +</file> +{{logsigmoid.png?400}} + + +==== LogSoftMax ==== +{{anchor:nn.LogSoftMax}} + +Applies the ''LogSoftmax'' function to an n-dimensional input Tensor. + +''LogSoftmax'' is defined as ''f_i(x)'' = ''log(1/a exp(x_i))'', +where ''a'' = ''sum_j exp(x_j)''. + +<file lua> +ii=lab.randn(10) +m=nn.LogSoftMax() +oo=m:forward(ii) +go=lab.ones(10) +gi=m:backward(ii,go) +gnuplot.plot({'Input',ii,'+-'},{'Output',oo,'+-'},{'gradInput',gi,'+-'}) +gnuplot.grid(true) +</file> +{{logsoftmax.png?400}} + +==== Sigmoid ==== +{{anchor:nn.Sigmoid}} + +Applies the ''Sigmoid'' function element-wise to the input Tensor, +thus outputting a Tensor of the same dimension. + +''Sigmoid'' is defined as ''f(x)'' = ''1/(1+exp(-x))''. + +<file lua> +ii=lab.linspace(-5,5) +m=nn.Sigmoid() +oo=m:forward(ii) +go=lab.ones(100) +gi=m:backward(ii,go) +gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'}) +gnuplot.grid(true) +</file> +{{sigmoid.png?400}} + +==== Tanh ==== +{{anchor:nn.Tanh}} + +Applies the ''Tanh'' function element-wise to the input Tensor, +thus outputting a Tensor of the same dimension. + +<file lua> +ii=lab.linspace(-3,3) +m=nn.Tanh() +oo=m:forward(ii) +go=lab.ones(100) +gi=m:backward(ii,go) +gnuplot.plot({'f(x)',ii,oo,'+-'},{'df/dx',ii,gi,'+-'}) +gnuplot.grid(true) +</file> +{{tanh.png?400}} + +===== Convolutional layers ===== +{{anchor:nn.convlayers.dok}} + +SpatialConvolution and SpatialSubsampling apply to inputs with +two-dimensional relationships (e.g. images). TemporalConvolution and +TemporalSubsampling apply to sequences with a one-dimensional +relationship (e.g. strings of some kind). + +For spatial convolutional layers, the input is supposed to be 3D. The +first dimension is the number of features, the last two dimenstions +are spatial. + +==== SpatialConvolution ==== +{{anchor:nn.SpatialConvolution}} + +<file lua> +module = nn.SpatialConvolution(nInputPlane, nOutputPlane, kW, kH, [dW], [dH]) +</file> + +Applies a 2D convolution over an input image composed of several input planes. The ''input'' tensor in +''forward(input)'' is expected to be a 3D tensor (''width x height x nInputPlane''). + +The parameters are the following: + * ''nInputPlane'': The number of expected input planes in the image given into ''forward()''. + * ''nOutputPlane'': The number of output planes the convolution layer will produce. + * ''kW'': The kernel width of the convolution + * ''kH'': The kernel height of the convolution + * ''dW'': The step of the convolution in the width dimension. Default is ''1''. + * ''dH'': The step of the convolution in the height dimension. Default is ''1''. + +Note that depending of the size of your kernel, several (of the last) +columns or rows of the input image might be lost. It is up to the user to +add proper padding in images. + +If the input image is a 3D tensor ''nInputPlane x width x height'', the output image size +will be ''nOutputPlane x owidth x oheight'' where +<file lua> +owidth = (width - kW) / dW + 1 +oheight = (height - kH) / dH + 1 . +</file> + +The parameters of the convolution can be found in ''self.weight'' (Tensor of +size ''nOutputPlane x nInputPlane x kH x kW'') and ''self.bias'' (Tensor of +size ''nOutputPlane''). The corresponding gradients can be found in +''self.gradWeight'' and ''self.gradBias''. + +The output value of the layer can be precisely described as: +<file lua> +output[i][j][k] = bias[k] + + sum_l sum_{s=1}^kW sum_{t=1}^kH weight[s][t][l][k] + * input[dW*(i-1)+s)][dH*(j-1)+t][l] +</file> + +==== SpatialConvolutionMap ==== +{{anchor:nn.SpatialConvolutionMap}} + +<file lua> +module = nn.SpatialConvolutionMap(connectionMatrix, kW, kH, [dW], [dH]) +</file> + +This class is a generalization of +[[#nn.SpatialConvolution|nn.SpatialConvolution]]. It uses a geenric +connection table between input and output features. The +[[#nn.SpatialConvolution|nn.SpatialConvolution]] is equivalent to +using a [[#nn.tables.full|full connection table]]. One can specify +different types of connection tables. + +=== Full Connection Table === +{{anchor:nn.tables.full}} + +''table = nn.tables.full(nin,nout)'' + +This is a precomputed table that specifies connections between every +input and output node. + +=== One to One Connection Table === +{{anchor:nn.tables.onetoone}} + +''table = nn.tables.oneToOne(n)'' + +This is a precomputed table that specifies a single connection to each +output node from corresponding input node. + +=== Random Connection Table === +{{anchor:nn.tables.random}} + +''table = nn.tables.random(nin,nout, nto)'' + +This table is randomly populated such that each output unit has +''nto'' incoming connections. The algorihtm tries to assign uniform +number of outgoing connections to each input node if possible. + +==== SpatialLPPooling ==== +{{anchor:nn.SpatialLPPooling}} + +<file lua> +module = nn.SpatialLPPooling(nInputPlane, pnorm, kW, kH, [dW], [dH]) +</file> + +Computes the ''p'' norm in a convolutional manner on a set of 2D input planes. + +==== SpatialMaxPooling ==== +{{anchor:nn.SpatialMaxPooling}} + +<file lua> +module = nn.SpatialMaxPooling(kW, kH [, dW, dH]) +</file> + +Applies 2D max-pooling operation in ''kWxkH'' regions by step size +''dWxdH'' steps. The number of output features is equal to the number of +input planes. + +==== SpatialSubSampling ==== +{{anchor:nn.SpatialSubSampling}} + +<file lua> +module = nn.SpatialSubSampling(nInputPlane, kW, kH, [dW], [dH]) +</file> + +Applies a 2D sub-sampling over an input image composed of several input planes. The ''input'' tensor in +''forward(input)'' is expected to be a 3D tensor (''nInputPlane x width x height''). The number of output +planes will be the same as ''nInputPlane''. + +The parameters are the following: + * ''nInputPlane'': The number of expected input planes in the image given into ''forward()''. + * ''kW'': The kernel width of the sub-sampling + * ''kH'': The kernel height of the sub-sampling + * ''dW'': The step of the sub-sampling in the width dimension. Default is ''1''. + * ''dH'': The step of the sub-sampling in the height dimension. Default is ''1''. + +Note that depending of the size of your kernel, several (of the last) +columns or rows of the input image might be lost. It is up to the user to +add proper padding in images. + +If the input image is a 3D tensor ''width x height x nInputPlane'', the output image size +will be ''owidth x oheight x nInputPlane'' where +<file lua> +owidth = (width - kW) / dW + 1 +oheight = (height - kH) / dH + 1 . +</file> + +The parameters of the sub-sampling can be found in ''self.weight'' (Tensor of +size ''nInputPlane'') and ''self.bias'' (Tensor of size ''nInputPlane''). The +corresponding gradients can be found in ''self.gradWeight'' and +''self.gradBias''. + +The output value of the layer can be precisely described as: +<file lua> +output[i][j][k] = bias[k] + + weight[k] sum_{s=1}^kW sum_{t=1}^kH input[dW*(i-1)+s)][dH*(j-1)+t][k] +</file> + +==== SpatialZeroPadding ==== +{{anchor:nn.SpatialZeroPadding}} + +<file lua> +module = nn.SpatialZeroPadding(padLeft, padRight, padTop, padBottom) +</file> + +Each feature map of a given input is padded with specified number of +zeros. If padding values are negative, then input is cropped. + +==== SpatialSubtractiveNormalization ==== +{{anchor:nn.SpatialSubtractiveNormalization}} + +<file lua> +module = nn.SpatialSubtractiveNormalization(ninputplane, kernel) +</file> + +Applies a spatial subtraction operation on a series of 2D inputs using +''kernel'' for computing the weighted average in a neighborhood. The +neighborhood is defined for a local spatial region that is the size as +kernel and across all features. For a an input image, since there is +only one feature, the region is only spatial. For an RGB image, the +weighted anerage is taken over RGB channels and a spatial region. + +If the ''kernel'' is 1D, then it will be used for constructing and seperable +2D kernel. The operations will be much more efficient in this case. + +The kernel is generally chosen as a gaussian when it is believed that +the correlation of two pixel locations decrease with increasing +distance. On the feature dimension, a uniform average is used since +the weighting across features is not known. + +For this example we use an external package +[[http://www.github.com/clementfarabet/lua---image/|image]] + +<file lua> +require 'image' +require 'nn' +lena = image.rgb2y(image.lena()) +ker = lab.ones(11) +m=nn.SpatialSubtractiveNormalization(1,ker) +processed = m:forward(lena) +w1=image.display(lena) +w2=image.display(processed) +</file> +{{lena.jpg?300}}{{lenap.jpg?300}} + +==== TemporalConvolution ==== +{{anchor:nn.TemporalConvolution}} + +<file lua> +module = nn.TemporalConvolution(inputFrameSize, outputFrameSize, kW, [dW]) +</file> + +Applies a 1D convolution over an input sequence composed of ''nInputFrame'' frames. The ''input'' tensor in +''forward(input)'' is expected to be a 2D tensor (''nInputFrame x inputFrameSize''). + +The parameters are the following: + * ''inputFrameSize'': The input frame size expected in sequences given into ''forward()''. + * ''outputFrameSize'': The output frame size the convolution layer will produce. + * ''kW'': The kernel width of the convolution + * ''dW'': The step of the convolution. Default is ''1''. + +Note that depending of the size of your kernel, several (of the last) +frames of the sequence might be lost. It is up to the user to add proper padding frames in the input +sequences. + +If the input sequence is a 2D tensor ''inputFrameSize x nInputFrame'', the output sequence will be +''nOutputFrame x outputFrameSize'' where +<file lua> +nOutputFrame = (nInputFrame - kW) / dW + 1 +</file> + +The parameters of the convolution can be found in ''self.weight'' (Tensor of +size ''outputFrameSize x (inputFrameSize x kW) '') and ''self.bias'' (Tensor of +size ''outputFrameSize''). The corresponding gradients can be found in +''self.gradWeight'' and ''self.gradBias''. + +The output value of the layer can be precisely described as: +<file lua> +output[i][t] = bias[i] + + sum_j sum_{k=1}^kW weight[j][k][i] + * input[j][dW*(t-1)+k)] +</file> + +Here is a simple example: + +<file lua> +inp=5; -- dimensionality of one sequence element +outp=1; -- number of derived features for one sequence element +kw=1; -- kernel only operates on one sequence element at once +dw=1; -- we step once and go on to the next sequence element + +mlp=nn.TemporalConvolution(inp,outp,kw,dw) + +require "lab" +x=lab.rand(7,inp) -- a sequence of 7 elements +print(mlp:forward(x)) +</file> +which gives: +<file lua> +-0.9109 +-0.9872 +-0.6808 +-0.9403 +-0.9680 +-0.6901 +-0.6387 +[torch.Tensor of dimension 7x1] +</file> + +This is equivalent to: +<file lua> +weights=lab.reshape(mlp.weight,inp) -- weights applied to all +bias= mlp.bias[1]; +for i=1,x:size(1) do -- for each sequence element + element= x[i]; -- features of ith sequence element + print(element:dot(weights) + bias) +end +</file> +which gives: +<file lua> +-0.91094998687717 +-0.98721705771773 +-0.68075004276185 +-0.94030132495887 +-0.96798754116609 +-0.69008470895581 +-0.63871422284166 +</file> + + +==== TemporalSubSampling ==== +{{anchor:nn.TemporalSubSampling}} + +<file lua> +module = nn.TemporalSubSampling(inputFrameSize, kW, [dW]) +</file> + +Applies a 1D sub-sampling over an input sequence composed of ''nInputFrame'' frames. The ''input'' tensor in +''forward(input)'' is expected to be a 2D tensor (''nInputFrame x inputFrameSize''). The output frame size +will be the same as the input one (''inputFrameSize''). + +The parameters are the following: + * ''inputFrameSize'': The input frame size expected in sequences given into ''forward()''. + * ''kW'': The kernel width of the sub-sampling + * ''dW'': The step of the sub-sampling. Default is ''1''. + +Note that depending of the size of your kernel, several (of the last) +frames of the sequence might be lost. It is up to the user to add proper padding frames in the input +sequences. + +If the input sequence is a 2D tensor ''nInputFrame x inputFrameSize'', the output sequence will be +''inputFrameSize x nOutputFrame'' where +<file lua> +nOutputFrame = (nInputFrame - kW) / dW + 1 +</file> + +The parameters of the sub-sampling can be found in ''self.weight'' (Tensor of +size ''inputFrameSize'') and ''self.bias'' (Tensor of +size ''inputFrameSize''). The corresponding gradients can be found in +''self.gradWeight'' and ''self.gradBias''. + +The output value of the layer can be precisely described as: +<file lua> +output[i][t] = bias[i] + weight[i] * sum_{k=1}^kW input[i][dW*(t-1)+k)] +</file> + +==== LookupTable ==== +{{anchor:nn.LookupTable}} + +<file lua> +module = nn.LookupTable(nIndex, sizes) +</file> +or +<file lua> +module = nn.LookupTable(nIndex, size1, [size2], [size3], ...) +</file> + +This layer is a particular case of a convolution, where the width of the convolution would be ''1''. +When calling ''forward(input)'', it assumes ''input'' is a 1D tensor filled with indices. Indices start +at ''1'' and can go up to ''nIndex''. For each index, it outputs a corresponding ''Tensor'' of size +specified by ''sizes'' (an ''LongStorage'') or ''size1 x size2 x...''. + +The output tensors are concatenated, generating a ''size1 x size2 x ... x sizeN x n'' tensor, where ''n'' +is the size of the ''input'' tensor. + +When only ''size1'' is provided, this is equivalent to do the following matrix-matrix multiplication +in an efficient manner: +<file lua> +M P +</file> +where ''M'' is a 2D matrix ''size1 x nIndex'' containing the parameters of the lookup-table and +''P'' is a 2D matrix, where each column vector ''i'' is a zero vector except at index ''input[i]'' where it is ''1''. + +Example: +<file lua> + -- a lookup table containing 10 tensors of size 3 + module = nn.LookupTable(10, 3) + + input = torch.Tensor(4) + input[1] = 1; input[2] = 2; input[3] = 1; input[4] = 10; + print(module:forward(input)) +</file> + +Outputs something like: +<file lua> +-0.1784 2.2045 -0.1784 -0.2475 +-1.0120 0.0537 -1.0120 -0.2148 +-1.2840 0.8685 -1.2840 -0.2792 +[torch.Tensor of dimension 3x4] +</file> +Note that the first column vector is the same than the 3rd one! + +===== Layers for manipulating tables ===== +{{anchor:nn.TableLayers}} + +This set of modules allows the manipulation of Tables +through the layers of a neural network. +This allows one to build very rich architectures. + +Table-based modules work by supporting forward and backward methods that can accept +tables as inputs. It turns out that the usual [[#nn.Sequential|Sequential]] module can do this, so all that is needed is other child modules that take advantage of such tables. +<file lua> +mlp = nn.Sequential(); +t={x,y,z} +pred=mlp:forward(t) +pred=mlp:forward{x,y,z} -- This is equivalent to the line before +</file> + +==== ConcatTable ==== +{{anchor:nn.ConcatTable}} + +ConcatTable is a container module that applies each member module to +the same input Tensor. + +Example: +<file lua> +mlp= nn.ConcatTable() +mlp:add(nn.Linear(5,2)) +mlp:add(nn.Linear(5,3)) + +require "lab" +pred=mlp:forward(lab.randn(5)); +for i,k in pairs(pred) do print(i,k); end +</file> +which gives the output: +<file lua> +1 +-0.4073 + 0.0110 +[torch.Tensor of dimension 2] + +2 + 0.0027 +-0.0598 +-0.1189 +[torch.Tensor of dimension 3] +</file> + +==== ParallelTable ==== +{{anchor:nn.ParallelTable}} + +ParallelTable is a container module that, in its ''forward'' method, applies the ''ith'' member module to the ''ith'' input, and outputs a table of the set of outputs. + +Example: +<file lua> +mlp= nn.ParallelTable() +mlp:add(nn.Linear(10,2)) +mlp:add(nn.Linear(5,3)) + +require "lab" +x=lab.randn(10) +y=lab.rand(5) + +pred=mlp:forward{x,y} +for i,k in pairs(pred) do print(i,k); end +</file> +which gives the output: +<file lua> +1 + 0.0331 + 0.7003 +[torch.Tensor of dimension 2] + +2 + 0.0677 +-0.1657 +-0.7383 +[torch.Tensor of dimension 3] +</file> + +==== SplitTable ==== +{{anchor:nn.SplitTable}} + +''module'' = ''SplitTable(dimension)'' + +Creates a module that takes a Tensor as input and outputs several tables, splitting the Tensor along dimension ''dimension''. + +Example 1: +<file lua> +require "lab" +mlp=nn.SplitTable(2) +x=lab.randn(4,3) +pred=mlp:forward(x) +for i,k in pairs(pred) do print(i,k); end +</file> +gives the output: +<file lua> +1 + 1.3885 + 1.3295 + 0.4281 +-1.0171 +[torch.Tensor of dimension 4] + +2 +-1.1565 +-0.8556 +-1.0717 +-0.8316 +[torch.Tensor of dimension 4] + +3 +-1.3678 +-0.1709 +-0.0191 +-2.5871 +[torch.Tensor of dimension 4] +</file> + +Example 2: +<file lua> +require "lab" +mlp=nn.SplitTable(1) +pred=mlp:forward(lab.randn(10,3)) +for i,k in pairs(pred) do print(i,k); end +</file> +gives the output: +<file lua> +1 + 1.6114 + 0.9038 + 0.8419 +[torch.Tensor of dimension 3] + +2 + 2.4742 + 0.2208 + 1.6043 +[torch.Tensor of dimension 3] + +3 + 1.3415 + 0.2984 + 0.2260 +[torch.Tensor of dimension 3] + +4 + 2.0889 + 1.2309 + 0.0983 +[torch.Tensor of dimension 3] +</file> + +A more complicated example: +<file lua> +require "lab" + +mlp=nn.Sequential(); --Create a network that takes a Tensor as input +mlp:add(nn.SplitTable(2)) + c=nn.ParallelTable() --The two Tensors go through two different Linear + c:add(nn.Linear(10,3)) --Layers in Parallel + c:add(nn.Linear(10,7)) +mlp:add(c) --Outputing a table with 2 elements + p=nn.ParallelTable() --These tables go through two more linear layers + p:add(nn.Linear(3,2)) -- separately. + p:add(nn.Linear(7,1)) +mlp:add(p) +mlp:add(nn.JoinTable(1)) --Finally, the tables are joined together and output. + +pred=mlp:forward(lab.randn(10,2)) +print(pred) + +for i=1,100 do -- A few steps of training such a network.. + x=lab.ones(10,2); + y=torch.Tensor(3); y:copy(x:select(2,1,1):narrow(1,1,3)) + pred=mlp:forward(x) + + criterion= nn.MSECriterion() + local err=criterion:forward(pred,y) + local gradCriterion = criterion:backward(pred,y); + mlp:zeroGradParameters(); + mlp:backward(x, gradCriterion); + mlp:updateParameters(0.05); + + print(err) +end +</file> + +==== JoinTable ==== +{{anchor:nn.JoinTable}} + +''module'' = ''JoinTable(dimension)'' + +Creates a module that takes a list of Tensors as input and outputs a Tensor by joining them together along dimension ''dimension''. + +Example: +<file lua> +require "lab" +x=lab.randn(5,1) +y=lab.randn(5,1) +z=lab.randn(2,1) + +print(nn.JoinTable(1):forward{x,y}) +print(nn.JoinTable(2):forward{x,y}) +print(nn.JoinTable(1):forward{x,z}) +</file> +gives the output: +<file lua> +1.3965 + 0.5146 +-1.5244 +-0.9540 + 0.4256 + 0.1575 + 0.4491 + 0.6580 + 0.1784 +-1.7362 + + 1.3965 0.1575 + 0.5146 0.4491 +-1.5244 0.6580 +-0.9540 0.1784 + 0.4256 -1.7362 + + 1.3965 + 0.5146 +-1.5244 +-0.9540 + 0.4256 +-1.2660 + 1.0869 +[torch.Tensor of dimension 7x1] +</file> + +A more complicated example: +<file lua> +require "lab" + +mlp=nn.Sequential(); --Create a network that takes a Tensor as input + c=nn.ConcatTable() --The same Tensor goes through two different Linear + c:add(nn.Linear(10,3)) --Layers in Parallel + c:add(nn.Linear(10,7)) +mlp:add(c) --Outputing a table with 2 elements + p=nn.ParallelTable() --These tables go through two more linear layers + p:add(nn.Linear(3,2)) -- separately. + p:add(nn.Linear(7,1)) +mlp:add(p) +mlp:add(nn.JoinTable(1)) --Finally, the tables are joined together and output. + +pred=mlp:forward(lab.randn(10)) +print(pred) + +for i=1,100 do -- A few steps of training such a network.. + x=lab.ones(10); + y=torch.Tensor(3); y:copy(x:narrow(1,1,3)) + pred=mlp:forward(x) + + criterion= nn.MSECriterion() + local err=criterion:forward(pred,y) + local gradCriterion = criterion:backward(pred,y); + mlp:zeroGradParameters(); + mlp:backward(x, gradCriterion); + mlp:updateParameters(0.05); + + print(err) +end +</file> + +==== Identity ==== +{{anchor:nn.Identity}} + +''module'' = ''Identity()'' + +Creates a module that returns whatever is input to it as output. +This is useful when combined with the module +[[#nn.ParallelTable|ParallelTable]] +in case you do not wish to do anything to one of the input Tensors. +Example: +<file lua> +require "lab" +mlp=nn.Identity() +print(mlp:forward(lab.ones(5,2))) +</file> +gives the output: +<file lua> + 1 1 + 1 1 + 1 1 + 1 1 + 1 1 +[torch.Tensor of dimension 5x2] +</file> + +Here is a more useful example, where one can implement a network which also computes a Criterion using this module: +<file lua> +pred_mlp=nn.Sequential(); -- A network that makes predictions given x. +pred_mlp:add(nn.Linear(5,4)) +pred_mlp:add(nn.Linear(4,3)) + +xy_mlp=nn.ParallelTable();-- A network for predictions and for keeping the +xy_mlp:add(pred_mlp) -- true label for comparison with a criterion +xy_mlp:add(nn.Identity()) -- by forwarding both x and y through the network. + +mlp=nn.Sequential(); -- The main network that takes both x and y. +mlp:add(xy_mlp) -- It feeds x and y to parallel networks; +cr=nn.MSECriterion(); +cr_wrap=nn.CriterionTable(cr) +mlp:add(cr_wrap) -- and then applies the criterion. + +for i=1,100 do -- Do a few training iterations + x=lab.ones(5); -- Make input features. + y=torch.Tensor(3); + y:copy(x:narrow(1,1,3)) -- Make output label. + err=mlp:forward{x,y} -- Forward both input and output. + print(err) -- Print error from criterion. + + mlp:zeroGradParameters(); -- Do backprop... + mlp:backward({x, y} ); + mlp:updateParameters(0.05); +end +</file> + +==== PairwiseDistance ==== +{{anchor:nn.PairwiseDistance}} + +''module'' = ''PairwiseDistance(p)'' creates a module that takes a table of two vectors as input and outputs the distance between them using the ''p''-norm. + +Example: +<file lua> +mlp_l1=nn.PairwiseDistance(1) +mlp_l2=nn.PairwiseDistance(2) +x=lab.new(1,2,3) +y=lab.new(4,5,6) +print(mlp_l1:forward({x,y})) +print(mlp_l2:forward({x,y})) +</file> +gives the output: +<file lua> + 9 +[torch.Tensor of dimension 1] + + 5.1962 +[torch.Tensor of dimension 1] +</file> + +A more complicated example: +<file lua> +-- imagine we have one network we are interested in, it is called "p1_mlp" +p1_mlp= nn.Sequential(); p1_mlp:add(nn.Linear(5,2)) + +-- But we want to push examples towards or away from each other +-- so we make another copy of it called p2_mlp +-- this *shares* the same weights via the set command, but has its own set of temporary gradient storage +-- that's why we create it again (so that the gradients of the pair don't wipe each other) +p2_mlp= nn.Sequential(); p2_mlp:add(nn.Linear(5,2)) +p2_mlp:get(1).weight:set(p1_mlp:get(1).weight) +p2_mlp:get(1).bias:set(p1_mlp:get(1).bias) + +-- we make a parallel table that takes a pair of examples as input. they both go through the same (cloned) mlp +prl = nn.ParallelTable() +prl:add(p1_mlp) +prl:add(p2_mlp) + +-- now we define our top level network that takes this parallel table and computes the pairwise distance betweem +-- the pair of outputs +mlp= nn.Sequential() +mlp:add(prl) +mlp:add(nn.PairwiseDistance(1)) + +-- and a criterion for pushing together or pulling apart pairs +crit=nn.HingeEmbeddingCriterion(1) + +-- lets make two example vectors +x=lab.rand(5) +y=lab.rand(5) + + +-- Use a typical generic gradient update function +function gradUpdate(mlp, x, y, criterion, learningRate) +local pred = mlp:forward(x) +local err = criterion:forward(pred, y) +local gradCriterion = criterion:backward(pred, y) +mlp:zeroGradParameters() +mlp:backward(x, gradCriterion) +mlp:updateParameters(learningRate) +end + +-- push the pair x and y together, notice how then the distance between them given +-- by print(mlp:forward({x,y})[1]) gets smaller +for i=1,10 do +gradUpdate(mlp,{x,y},1,crit,0.01) +print(mlp:forward({x,y})[1]) +end + + +-- pull apart the pair x and y, notice how then the distance between them given +-- by print(mlp:forward({x,y})[1]) gets larger + +for i=1,10 do +gradUpdate(mlp,{x,y},-1,crit,0.01) +print(mlp:forward({x,y})[1]) +end + +</file> + +==== DotProduct ==== +{{anchor:nn.DotProduct}} + +''module'' = ''DotProduct()'' creates a module that takes a table of two vectors as input and outputs the dot product between them. + +Example: +<file lua> +mlp=nn.DotProduct() +x=lab.new(1,2,3) +y=lab.new(4,5,6) +print(mlp:forward({x,y})) +</file> +gives the output: +<file lua> + 32 +[torch.Tensor of dimension 1] +</file> + + +A more complicated example: +<file lua> + +-- Train a ranking function so that mlp:forward({x,y},{x,z}) returns a number +-- which indicates whether x is better matched with y or z (larger score = better match), or vice versa. + +mlp1=nn.Linear(5,10) +mlp2=mlp1:clone('weight','bias') + +prl=nn.ParallelTable(); +prl:add(mlp1); prl:add(mlp2) + +mlp1=nn.Sequential() +mlp1:add(prl) +mlp1:add(nn.DotProduct()) + +mlp2=mlp1:clone('weight','bias') + +mlp=nn.Sequential() +prla=nn.ParallelTable() +prla:add(mlp1) +prla:add(mlp2) +mlp:add(prla) + +x=lab.rand(5); +y=lab.rand(5) +z=lab.rand(5) + + +print(mlp1:forward{x,x}) +print(mlp1:forward{x,y}) +print(mlp1:forward{y,y}) + + +crit=nn.MarginRankingCriterion(1); + +-- Use a typical generic gradient update function +function gradUpdate(mlp, x, y, criterion, learningRate) + local pred = mlp:forward(x) + local err = criterion:forward(pred, y) + local gradCriterion = criterion:backward(pred, y) + mlp:zeroGradParameters() + mlp:backward(x, gradCriterion) + mlp:updateParameters(learningRate) +end + +inp={{x,y},{x,z}} + +math.randomseed(1) + +-- make the pair x and y have a larger dot product than x and z + +for i=1,100 do + gradUpdate(mlp,inp,1,crit,0.05) + o1=mlp1:forward{x,y}[1]; + o2=mlp2:forward{x,z}[1]; + o=crit:forward(mlp:forward{{x,y},{x,z}},1) + print(o1,o2,o) +end + +print "******************" + +-- make the pair x and z have a larger dot product than x and y + +for i=1,100 do + gradUpdate(mlp,inp,-1,crit,0.05) + o1=mlp1:forward{x,y}[1]; + o2=mlp2:forward{x,z}[1]; + o=crit:forward(mlp:forward{{x,y},{x,z}},-1) + print(o1,o2,o) +end +</file> + + +==== CosineDistance ==== +{{anchor:nn.CosineDistance}} + +''module'' = ''CosineDistance()'' creates a module that takes a table of two vectors as input and outputs the cosine distance between them. + +Example: +<file lua> +mlp=nn.CosineDistance() +x=lab.new(1,2,3) +y=lab.new(4,5,6) +print(mlp:forward({x,y})) +</file> +gives the output: +<file lua> + 0.9746 +[torch.Tensor of dimension 1] +</file> + +A more complicated example: +<file lua> + +-- imagine we have one network we are interested in, it is called "p1_mlp" +p1_mlp= nn.Sequential(); p1_mlp:add(nn.Linear(5,2)) + +-- But we want to push examples towards or away from each other +-- so we make another copy of it called p2_mlp +-- this *shares* the same weights via the set command, but has its own set of temporary gradient storage +-- that's why we create it again (so that the gradients of the pair don't wipe each other) +p2_mlp= p1_mlp:clone('weight','bias') + +-- we make a parallel table that takes a pair of examples as input. they both go through the same (cloned) mlp +prl = nn.ParallelTable() +prl:add(p1_mlp) +prl:add(p2_mlp) + +-- now we define our top level network that takes this parallel table and computes the cosine distance betweem +-- the pair of outputs +mlp= nn.Sequential() +mlp:add(prl) +mlp:add(nn.CosineDistance()) + + +-- lets make two example vectors +x=lab.rand(5) +y=lab.rand(5) + +-- Grad update function.. +function gradUpdate(mlp, x, y, learningRate) +local pred = mlp:forward(x) +if pred[1]*y < 1 then + gradCriterion=lab.new(-y) + mlp:zeroGradParameters() + mlp:backward(x, gradCriterion) + mlp:updateParameters(learningRate) +end +end + +-- push the pair x and y together, the distance should get larger.. +for i=1,1000 do + gradUpdate(mlp,{x,y},1,0.1) + if ((i%100)==0) then print(mlp:forward({x,y})[1]);end +end + + +-- pull apart the pair x and y, the distance should get smaller.. + +for i=1,1000 do + gradUpdate(mlp,{x,y},-1,0.1) + if ((i%100)==0) then print(mlp:forward({x,y})[1]);end +end +</file> + + + +==== CriterionTable ==== +{{anchor:nn.CriterionTable}} + +''module'' = ''CriterionTable(criterion)'' + +Creates a module that wraps a Criterion module so that it can accept a Table of inputs. Typically the table would contain two elements: the input and output ''x'' and ''y'' that the Criterion compares. + +Example: +<file lua> +mlp = nn.CriterionTable(nn.MSECriterion()) +require "lab" +x=lab.randn(5) +y=lab.randn(5) +print(mlp:forward{x,x}) +print(mlp:forward{x,y}) +</file> +gives the output: +<file lua> +0 +1.9028918413199 +</file> + +Here is a more complex example of embedding the criterion into a network: +<file lua> +require "lab" + +function table.print(t) + for i,k in pairs(t) do print(i,k); end +end + +mlp=nn.Sequential(); -- Create an mlp that takes input + main_mlp=nn.Sequential(); -- and output using ParallelTable + main_mlp:add(nn.Linear(5,4)) + main_mlp:add(nn.Linear(4,3)) + cmlp=nn.ParallelTable(); + cmlp:add(main_mlp) + cmlp:add(nn.Identity()) +mlp:add(cmlp) +mlp:add(nn.CriterionTable(nn.MSECriterion())) -- Apply the Criterion + +for i=1,20 do -- Train for a few iterations + x=lab.ones(5); + y=torch.Tensor(3); y:copy(x:narrow(1,1,3)) + err=mlp:forward{x,y} -- Pass in both input and output + print(err) + + mlp:zeroGradParameters(); + mlp:backward({x, y} ); + mlp:updateParameters(0.05); +end +</file> + +==== CAddTable ==== +{{anchor:nn.CAddTable}} + +Takes a table of tensors and outputs summation of all tensors. + +<file lua> +ii = {lab.ones(5),lab.ones(5)*2,lab.ones(5)*3} +=ii[1] + 1 + 1 + 1 + 1 + 1 +[torch.DoubleTensor of dimension 5] + +return ii[2] + 2 + 2 + 2 + 2 + 2 +[torch.DoubleTensor of dimension 5] + +return ii[3] + 3 + 3 + 3 + 3 + 3 +[torch.DoubleTensor of dimension 5] + +m=nn.CAddTable() +=m:forward(ii) + 6 + 6 + 6 + 6 + 6 +[torch.DoubleTensor of dimension 5] + + +==== CSubTable ==== +{{anchor:nn.CSubTable}} + +Takes a table with two tensor and returns the component-wise +subtraction between them. + +<file lua> +m=nn.CSubTable() +=m:forward({lab.ones(5)*2.2,lab.ones(5)}) + 1.2000 + 1.2000 + 1.2000 + 1.2000 + 1.2000 +[torch.DoubleTensor of dimension 5] +</file> + +==== CMulTable ==== +{{anchor:nn.CMulTable}} + +Takes a table of tensors and outputs the multiplication of all of them. + +<file lua> +ii = {lab.ones(5)*2,lab.ones(5)*3,lab.ones(5)*4} +m=nn.CMulTable() +=m:forward(ii) + 24 + 24 + 24 + 24 + 24 +[torch.DoubleTensor of dimension 5] + +</file> + +==== CDivTable ==== +{{anchor:nn.CDivTable}} + +Takes a table with two tensor and returns the component-wise +division between them. + +<file lua> +m=nn.CDivTable() +=m:forward({lab.ones(5)*2.2,lab.ones(5)*4.4}) + 0.5000 + 0.5000 + 0.5000 + 0.5000 + 0.5000 +[torch.DoubleTensor of dimension 5] +</file> + +====== Criterions ====== +{{anchor:nn.Criterions}} + +Criterions are helpful to train a neural network. Given an input and a +target, they compute a gradient according to a given loss +function. [[#nn.AbsCriterion|AbsCriterion]] and +[[#nn.MSECriterion|MSECriterion]] are perfect for regression problems, while +[[#nn.ClassNLLCriterion|ClassNLLCriterion]] is the criterion of choice when +dealing with classification. + +Criterions are [[..:torch:file#torch.file.serialization|serializable]]. + +===== Criterion ===== +{{anchor:nn.Criterion}} + +This is an abstract class which declares methods defined in all criterions. +This class is [[..:torch:file#torch.file.serialization|serializable]]. + +==== [output] forward(input, target) ==== +{{anchor:nn.Criterion.forward}} + +Given an ''input'' and a ''target'', compute the loss function associated to the criterion and return the +result. In general ''input'' and ''target'' are [[..:torch:tensor|tensors]], but some specific criterions +might require some other type of object. + +The ''output'' returned should be a scalar in general. + +The state variable [[#nn.Criterion.output|self.output]] should be updated after a call to ''forward()''. + +==== [gradInput] backward(input, target) ==== +{{anchor:nn.Criterion.backward}} + +Given an ''input'' and a ''target'', compute the gradients of the loss function associated to the criterion and +return the result.In general ''input'', ''target'' and ''gradInput'' are [[..:torch:tensor|tensors]], but some specific criterions +might require some other type of object. + +The state variable [[#nn.Criterion.gradInput|self.gradInput]] should be updated after a call to ''backward()''. + +==== State variable: output ==== +{{anchor:nn.Criterion.output}} + +State variable which contains the result of the last [[#nn.Criterion.forward|forward(input, target)]] call. + +==== State variable: gradInput ==== +{{anchor:nn.Criterion.gradInput}} + +State variable which contains the result of the last [[#nn.Criterion.backward|backward(input, target)]] call. + +===== AbsCriterion ===== +{{anchor:nn.AbsCriterion}} + +<file lua> +criterion = AbsCriterion() +</file> + +Creates a criterion that +measures the mean absolute value between ''n'' elements in the input ''x'' +and output ''y'': + +''loss(x,y)'' = ''1/n \sum |x_i-y_i|''. + +If ''x'' and ''y'' are ''d''-dimensional Tensors with a total of ''n'' elements, +the sum operation still operates over all the elements, and divides by ''n''. + +The division by ''n'' can be avoided if one sets the internal variable ''sizeAverage'' to ''false'': +<file lua> +criterion = nn.AbsCriterion() +criterion.sizeAverage = false +</file> + +===== ClassNLLCriterion ===== +{{anchor:nn.ClassNLLCriterion}} + +<file lua> +criterion = ClassNLLCriterion() +</file> + +The negative log likelihood criterion. It is useful to train a classication +problem with ''n'' classes. The ''input'' given through a ''forward()'' is +expected to contain //log-probabilities// of each class: ''input'' has to be a +1D tensor of size ''n''. Obtaining log-probabilities in a neural network is +easily achieved by adding a [[#nn.LogSoftMax|LogSoftMax]] layer in the last +layer of your neural network. + +This criterion expect a class index (1 to the number of class) as ''target'' +when calling [[#nn.CriterionForward|forward(input, target)]] and +[[#nn.CriterionBackward|backward(input, target)]]. + +The loss can be described as: +<file lua> +loss(x, class) = forward(x, class) = -x[class] +</file> + +The following is a code fragment showing how to make a gradient step +given an input ''x'', a desired output ''y'' (an integer ''1'' to ''n'', +in this case ''n'' = ''2'' classes), +a network ''mlp'' and a learning rate ''learningRate'': +<file lua> +function gradUpdate(mlp,x,y,learningRate) + local criterion = nn.ClassNLLCriterion() + pred = mlp:forward(x) + local err = criterion:forward(pred, y); + mlp:zeroGradParameters(); + local t = criterion:backward(pred, y); + mlp:backward(x, t); + mlp:updateParameters(learningRate); +end +</file> + +===== MarginCriterion ===== +{{anchor:nn.MarginCriterion}} + +<file lua> +criterion = MarginCriterion() +</file> + +Creates a criterion that optimizes a two-class classification hinge loss (margin-based loss) between input ''x'' (a Tensor of dimension 1) and output ''y'' (which is a scalar, either 1 or -1) : + +<file lua> +loss(x,y) = forward(x,y) = max(0,m- y x). +</file> + +''m'' is the margin, which is by default 1. + +<file lua> +criterion = MarginCriterion(marginValue) +</file> + +sets a different value of ''m''. + + +Example: +<file lua> +require "nn" +require "lab" + +function gradUpdate(mlp, x, y, criterion, learningRate) + local pred = mlp:forward(x) + local err = criterion:forward(pred, y) + local gradCriterion = criterion:backward(pred, y) + mlp:zeroGradParameters() + mlp:backward(x, gradCriterion) + mlp:updateParameters(learningRate) +end + +mlp=nn.Sequential() +mlp:add(nn.Linear(5,1)) + +x1=lab.rand(5) +x2=lab.rand(5) +criterion=nn.MarginCriterion(1) + +for i=1,1000 do + gradUpdate(mlp,x1,1,criterion,0.01) + gradUpdate(mlp,x2,-1,criterion,0.01) +end + +print(mlp:forward(x1)) +print(mlp:forward(x2)) + +print(criterion:forward(mlp:forward(x1),1)) +print(criterion:forward(mlp:forward(x2),-1)) +</file> +gives the output: +<file lua> + 1.0043 +[torch.Tensor of dimension 1] + + +-1.0061 +[torch.Tensor of dimension 1] + +0 +0 +</file> +i.e. the mlp successfully separates the two data points such that they both have a margin of 1, and hence a loss of 0. + +===== MSECriterion ===== +{{anchor:nn.MSECriterion}} + +<file lua> +criterion = MSECriterion() +</file> + +Creates a criterion that measures the mean squared error between ''n'' elements in the input ''x'' +and output ''y'': + +<file lua> +loss(x,y) = forward(x,y) = 1/n \sum |x_i-y_i|^2 . +</file> + +If ''x'' and ''y'' are ''d''-dimensional Tensors with a total of ''n'' elements, +the sum operation still operates over all the elements, and divides by ''n''. The two tensors must +have the same number of elements (but their sizes might be different...) + +The division by ''n'' can be avoided if one sets the internal variable ''sizeAverage'' to ''false'': +<file lua> +criterion = nn.MSECriterion() +criterion.sizeAverage = false +</file> + +===== MultiCriterion ===== +{{anchor:nn.MultiCriterion}} + +<file lua> +criterion = MultiCriterion() +</file> + +This returns a Criterion which is a weighted sum of other Criterion. +Criterions are added using the method: + +''criterion:add(singleCriterion, weight)'' + +where ''weight'' is a scalar. + + +===== HingeEmbeddingCriterion ===== +{{anchor:nn.HingeEmbeddingCriterion}} + +<file lua> +criterion = HingeEmbeddingCriterion() +</file> + +Creates a criterion that measures the loss given an input +''x'' which is a 1-dimensional vector and a label ''y'' (1 or -1). +This is usually used for measuring whether two inputs are similar +or dissimilar, e.g. using the L1 pairwise distance, +and is typically used for +learning nonlinear embeddings or semi-supervised learning. + +<verbatim> +loss(x,y) = forward(x,y) = x, if y=1 += max(0,margin - x), if y=-1 +</verbatim> + +The ''margin'' has a default value of 1, or can be set in the constructor: +<file lua> +criterion = HingeEmbeddingCriterion(marginValue) +</file> + +Example use: +<file lua> +-- imagine we have one network we are interested in, it is called "p1_mlp" +p1_mlp= nn.Sequential(); p1_mlp:add(nn.Linear(5,2)) + +-- But we want to push examples towards or away from each other +-- so we make another copy of it called p2_mlp +-- this *shares* the same weights via the set command, but has its own set of temporary gradient storage +-- that's why we create it again (so that the gradients of the pair don't wipe each other) +p2_mlp= nn.Sequential(); p2_mlp:add(nn.Linear(5,2)) +p2_mlp:get(1).weight:set(p1_mlp:get(1).weight) +p2_mlp:get(1).bias:set(p1_mlp:get(1).bias) + +-- we make a parallel table that takes a pair of examples as input. they both go through the same (cloned) mlp +prl = nn.ParallelTable() +prl:add(p1_mlp) +prl:add(p2_mlp) + +-- now we define our top level network that takes this parallel table and computes the pairwise distance betweem +-- the pair of outputs +mlp= nn.Sequential() +mlp:add(prl) +mlp:add(nn.PairwiseDistance(1)) + +-- and a criterion for pushing together or pulling apart pairs +crit=nn.HingeEmbeddingCriterion(1) + +-- lets make two example vectors +x=lab.rand(5) +y=lab.rand(5) + + +-- Use a typical generic gradient update function +function gradUpdate(mlp, x, y, criterion, learningRate) +local pred = mlp:forward(x) +local err = criterion:forward(pred, y) +local gradCriterion = criterion:backward(pred, y) +mlp:zeroGradParameters() +mlp:backward(x, gradCriterion) +mlp:updateParameters(learningRate) +end + +-- push the pair x and y together, notice how then the distance between them given +-- by print(mlp:forward({x,y})[1]) gets smaller +for i=1,10 do +gradUpdate(mlp,{x,y},1,crit,0.01) +print(mlp:forward({x,y})[1]) +end + + +-- pull apart the pair x and y, notice how then the distance between them given +-- by print(mlp:forward({x,y})[1]) gets larger + +for i=1,10 do +gradUpdate(mlp,{x,y},-1,crit,0.01) +print(mlp:forward({x,y})[1]) +end + +</file> + +===== L1HingeEmbeddingCriterion ===== +{{anchor:nn.L1HingeEmbeddingCriterion}} + +<file lua> +criterion = L1HingeEmbeddingCriterion(margin) +</file> + +Creates a criterion that measures the loss given an input +''x'' = ''{x1,x2}'', a table of two tensors, and a label ''y'' (1 or -1): +This is used for measuring whether two inputs are similar +or dissimilar, using the L1 distance, and is typically used for +learning nonlinear embeddings or semi-supervised learning. + +<verbatim> +loss(x,y) = forward(x,y) = ||x1-x2||_1, if y=1 += max(0,margin - ||x1-x2||_1), if y=-1 +</verbatim> + +The ''margin'' has a default value of 1, or can be set in the constructor: +<file lua> +criterion = L1HingeEmbeddingCriterion(marginValue) +</file> + +===== CosineEmbeddingCriterion ===== +{{anchor:nn.CosineEmbeddingCriterion}} + +<file lua> +criterion = nn.CosineEmbeddingCriterion(margin) +</file> + +Creates a criterion that measures the loss given an input +''x'' = ''{x1,x2}'', a table of two tensors, and a label ''y'' (1 or -1): +This is used for measuring whether two inputs are similar +or dissimilar, using the cosine distance, and is typically used for +learning nonlinear embeddings or semi-supervised learning. + +''margin'' should be a number from -1 to 1, 0 to 0.5 is suggested. +Forward and Backward have to be used alternately. If ''margin'' is missing, the default value is 0. + +The loss function is: +<verbatim> +loss(x,y) = forward(x,y) = 1-cos(x1, x2), if y=1 += max(0,cos(x1, x2)-margin), if y=-1 +</verbatim> + +===== MarginRankingCriterion ===== +{{anchor:nn.MarginRankingCriterion}} + +<file lua> +criterion = nn.MarginRankingCriterion(margin) +</file> + +Creates a criterion that measures the loss given an input +''x'' = ''{x1,x2}'', a table of two Tensors of size 1 (they contain only scalars), +and a label ''y'' (1 or -1): + +If ''y'' = ''1'' then it assumed the first input should be ranked higher (have a larger value) +than the second input, and vice-versa for ''y'' = ''-1''. + +The loss function is: +<verbatim> +loss(x,y) = forward(x,y) = max(0,-y*(x[1]-x[2])+margin) +</verbatim> + +Example: +<file lua> + +p1_mlp= nn.Linear(5,2) +p2_mlp= p1_mlp:clone('weight','bias') + +prl=nn.ParallelTable() +prl:add(p1_mlp) +prl:add(p2_mlp) + +mlp1=nn.Sequential() +mlp1:add(prl) +mlp1:add(nn.DotProduct()) + +mlp2=mlp1:clone('weight','bias') + +mlpa=nn.Sequential() +prla=nn.ParallelTable() +prla:add(mlp1) +prla:add(mlp2) +mlpa:add(prla) + +crit=nn.MarginRankingCriterion(0.1) + +x=lab.randn(5) +y=lab.randn(5) +z=lab.randn(5) + + +-- Use a typical generic gradient update function +function gradUpdate(mlp, x, y, criterion, learningRate) + local pred = mlp:forward(x) + local err = criterion:forward(pred, y) + local gradCriterion = criterion:backward(pred, y) + mlp:zeroGradParameters() + mlp:backward(x, gradCriterion) + mlp:updateParameters(learningRate) +end + +for i=1,100 do + gradUpdate(mlpa,{{x,y},{x,z}},1,crit,0.01) + if true then + o1=mlp1:forward{x,y}[1]; + o2=mlp2:forward{x,z}[1]; + o=crit:forward(mlpa:forward{{x,y},{x,z}},1) + print(o1,o2,o) + end +end + +print "--" + +for i=1,100 do + gradUpdate(mlpa,{{x,y},{x,z}},-1,crit,0.01) + if true then + o1=mlp1:forward{x,y}[1]; + o2=mlp2:forward{x,z}[1]; + o=crit:forward(mlpa:forward{{x,y},{x,z}},-1) + print(o1,o2,o) + end +end +</file> + +====== Training a neural network ====== +{{anchor:nn.traningneuralnet.dok}} + +Training a neural network is easy with a [[#nn.DoItYourself|simple ''for'' loop]]. +While doing your own loop provides great flexibility, you might +want sometimes a quick way of training neural +networks. [[#nn.StochasticGradient|StochasticGradient]], a simple class +which does the job for you is provided as standard. + +===== StochasticGradient ===== +{{anchor:nn.StochasticGradient.dok}} + +''StochasticGradient'' is a high-level class for training [[#nn.Module|neural networks]], using a stochastic gradient +algorithm. This class is [[..:torch:file#torch.file.serialization|serializable]]. + +==== StochasticGradient(module, criterion) ==== +{{anchor:nn.StochasticGradient}} + +Create a ''StochasticGradient'' class, using the given [[#nn.Module|Module]] and [[#nn.Criterion|Criterion]]. +The class contains [[#nn.StochasticGradientParameters|several parameters]] you might want to set after initialization. + +==== train(dataset) ==== +{{anchor:nn.StochasticGradientTrain}} + +Train the module and criterion given in the +[[#nn.StochasticGradient|constructor]] over ''dataset'', using the +internal [[#nn.StochasticGradientParameters|parameters]]. + +StochasticGradient expect as a ''dataset'' an object which implements the operator +''dataset[index]'' and implements the method ''dataset:size()''. The ''size()'' methods +returns the number of examples and ''dataset[i]'' has to return the i-th example. + +An ''example'' has to be an object which implements the operator +''example[field]'', where ''field'' might take the value ''1'' (input features) +or ''2'' (corresponding label which will be given to the criterion). +The input is usually a Tensor (except if you use special kind of gradient modules, +like [[#nn.TableLayers|table layers]]). The label type depends of the criterion. +For example, the [[#nn.MSECriterion|MSECriterion]] expects a Tensor, but the +[[#nn.ClassNLLCriterion|ClassNLLCriterion]] except a integer number (the class). + +Such a dataset is easily constructed by using Lua tables, but it could any ''C'' object +for example, as long as required operators/methods are implemented. +[[#nn.DoItStochasticGradient|See an example]]. + +==== Parameters ==== +{{anchor:nn.StochasticGradientParameters}} + +''StochasticGradient'' has several field which have an impact on a call to [[#nn.StochasticGradientTrain|train()]]. + + * ''learningRate'': This is the learning rate used during training. The update of the parameters will be ''parameters = parameters - learningRate * parameters_gradient''. Default value is ''0.01''. + * ''learningRateDecay'': The learning rate decay. If non-zero, the learning rate (note: the field learningRate will not change value) will be computed after each iteration (pass over the dataset) with: ''current_learning_rate =learningRate / (1 + iteration * learningRateDecay)'' + * ''maxIteration'': The maximum number of iteration (passes over the dataset). Default is ''25''. + * ''shuffleIndices'': Boolean which says if the examples will be randomly sampled or not. Default is ''true''. If ''false'', the examples will be taken in the order of the dataset. + * ''hookExample'': A possible hook function which will be called (if non-nil) during training after each example forwarded and backwarded through the network. The function takes ''(self, example)'' as parameters. Default is ''nil''. + * ''hookIteration'': A possible hook function which will be called (if non-nil) during training after a complete pass over the dataset. The function takes ''(self, iteration)'' as parameters. Default is ''nil''. + +===== Example of training using StochasticGradient ===== +{{anchor:nn.DoItStochasticGradient}} + +We show an example here on a classical XOR problem. + +**Dataset** + +We first need to create a dataset, following the conventions described in +[[#nn.StochasticGradientTrain|StochasticGradient]]. +<file lua> +require "lab" +dataset={}; +function dataset:size() return 100 end -- 100 examples +for i=1,dataset:size() do + local input = lab.randn(2); -- normally distributed example in 2d + local output = torch.Tensor(1); + if input[1]*input[2]>0 then -- calculate label for XOR function + output[1] = -1; + else + output[1] = 1 + end + dataset[i] = {input, output} +end +</file> + +**Neural Network** + +We create a simple neural network with one hidden layer. +<file lua> +require "nn" +mlp = nn.Sequential(); -- make a multi-layer perceptron +inputs = 2; outputs = 1; HUs = 20; -- parameters +mlp:add(nn.Linear(inputs, HUs)) +mlp:add(nn.Tanh()) +mlp:add(nn.Linear(HUs, outputs)) +</file> + +**Training** + +We choose the Mean Squared Error criterion and train the beast. +<file lua> +criterion = nn.MSECriterion() +trainer = nn.StochasticGradient(mlp, criterion) +trainer.learningRate = 0.01 +trainer:train(dataset) +</file> + +**Test the network** + +<file lua> +x = torch.Tensor(2) +x[1] = 0.5; x[2] = 0.5; print(mlp:forward(x)) +x[1] = 0.5; x[2] = -0.5; print(mlp:forward(x)) +x[1] = -0.5; x[2] = 0.5; print(mlp:forward(x)) +x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x)) +</file> + +You should see something like: +<file lua> +> x = torch.Tensor(2) +> x[1] = 0.5; x[2] = 0.5; print(mlp:forward(x)) + +-0.3490 +[torch.Tensor of dimension 1] + +> x[1] = 0.5; x[2] = -0.5; print(mlp:forward(x)) + + 1.0561 +[torch.Tensor of dimension 1] + +> x[1] = -0.5; x[2] = 0.5; print(mlp:forward(x)) + + 0.8640 +[torch.Tensor of dimension 1] + +> x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x)) + +-0.2941 +[torch.Tensor of dimension 1] +</file> + +===== Example of manual training of a neural network ===== +{{anchor:nn.DoItYourself}} + +We show an example here on a classical XOR problem. + +**Neural Network** + +We create a simple neural network with one hidden layer. +<file lua> +require "nn" +mlp = nn.Sequential(); -- make a multi-layer perceptron +inputs = 2; outputs = 1; HUs = 20; -- parameters +mlp:add(nn.Linear(inputs, HUs)) +mlp:add(nn.Tanh()) +mlp:add(nn.Linear(HUs, outputs)) +</file> + +**Loss function** + +We choose the Mean Squared Error criterion. +<file lua> +criterion = nn.MSECriterion() +</file> + +**Training** + +We create data //on the fly// and feed it to the neural network. + +<file lua> +require "lab" +for i = 1,2500 do + -- random sample + local input= lab.randn(2); -- normally distributed example in 2d + local output= torch.Tensor(1); + if input[1]*input[2] > 0 then -- calculate label for XOR function + output[1] = -1 + else + output[1] = 1 + end + + -- feed it to the neural network and the criterion + criterion:forward(mlp:forward(input), output) + + -- train over this example in 3 steps + -- (1) zero the accumulation of the gradients + mlp:zeroGradParameters() + -- (2) accumulate gradients + mlp:backward(input, criterion:backward(mlp.output, output)) + -- (3) update parameters with a 0.01 learning rate + mlp:updateParameters(0.01) +end +</file> + +**Test the network** + +<file lua> +x = torch.Tensor(2) +x[1] = 0.5; x[2] = 0.5; print(mlp:forward(x)) +x[1] = 0.5; x[2] = -0.5; print(mlp:forward(x)) +x[1] = -0.5; x[2] = 0.5; print(mlp:forward(x)) +x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x)) +</file> + +You should see something like: +<file lua> +> x = torch.Tensor(2) +> x[1] = 0.5; x[2] = 0.5; print(mlp:forward(x)) + +-0.6140 +[torch.Tensor of dimension 1] + +> x[1] = 0.5; x[2] = -0.5; print(mlp:forward(x)) + + 0.8878 +[torch.Tensor of dimension 1] + +> x[1] = -0.5; x[2] = 0.5; print(mlp:forward(x)) + + 0.8548 +[torch.Tensor of dimension 1] + +> x[1] = -0.5; x[2] = -0.5; print(mlp:forward(x)) + +-0.5498 +[torch.Tensor of dimension 1] +</file> diff --git a/dok/lena.jpg b/dok/lena.jpg Binary files differnew file mode 100644 index 0000000..d4a8c36 --- /dev/null +++ b/dok/lena.jpg diff --git a/dok/lenap.jpg b/dok/lenap.jpg Binary files differnew file mode 100644 index 0000000..0e6916d --- /dev/null +++ b/dok/lenap.jpg diff --git a/dok/logsigmoid.png b/dok/logsigmoid.png Binary files differnew file mode 100644 index 0000000..f632ed8 --- /dev/null +++ b/dok/logsigmoid.png diff --git a/dok/logsoftmax.png b/dok/logsoftmax.png Binary files differnew file mode 100644 index 0000000..dec5be5 --- /dev/null +++ b/dok/logsoftmax.png diff --git a/dok/power.png b/dok/power.png Binary files differnew file mode 100644 index 0000000..958eeb4 --- /dev/null +++ b/dok/power.png diff --git a/dok/sigmmoid.png b/dok/sigmmoid.png Binary files differnew file mode 100644 index 0000000..48aad7e --- /dev/null +++ b/dok/sigmmoid.png diff --git a/dok/sigmoid.png b/dok/sigmoid.png Binary files differnew file mode 100644 index 0000000..48aad7e --- /dev/null +++ b/dok/sigmoid.png diff --git a/dok/softmax.png b/dok/softmax.png Binary files differnew file mode 100644 index 0000000..29c5534 --- /dev/null +++ b/dok/softmax.png diff --git a/dok/softmin.png b/dok/softmin.png Binary files differnew file mode 100644 index 0000000..d1807a4 --- /dev/null +++ b/dok/softmin.png diff --git a/dok/softplus.png b/dok/softplus.png Binary files differnew file mode 100644 index 0000000..a5ee028 --- /dev/null +++ b/dok/softplus.png diff --git a/dok/softsign.png b/dok/softsign.png Binary files differnew file mode 100644 index 0000000..0805433 --- /dev/null +++ b/dok/softsign.png diff --git a/dok/sqrt.png b/dok/sqrt.png Binary files differnew file mode 100644 index 0000000..29b1d42 --- /dev/null +++ b/dok/sqrt.png diff --git a/dok/square.png b/dok/square.png Binary files differnew file mode 100644 index 0000000..c191eaf --- /dev/null +++ b/dok/square.png diff --git a/dok/sshrink.png b/dok/sshrink.png Binary files differnew file mode 100644 index 0000000..99c5d11 --- /dev/null +++ b/dok/sshrink.png diff --git a/dok/tanh.png b/dok/tanh.png Binary files differnew file mode 100644 index 0000000..d2f77aa --- /dev/null +++ b/dok/tanh.png diff --git a/generic/Abs.c b/generic/Abs.c new file mode 100644 index 0000000..8c65813 --- /dev/null +++ b/generic/Abs.c @@ -0,0 +1,43 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Abs.c" +#else + +static int nn_(Abs_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THTensor_(resizeAs)(output, input); + + TH_TENSOR_APPLY2(real, output, real, input, \ + *output_data = fabs(*input_data);) + return 1; +} + +static int nn_(Abs_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \ + real z = *input_data; \ + *gradInput_data = *gradOutput_data * (z >= 0 ? 1 : -1);) + return 1; +} + +static const struct luaL_Reg nn_(Abs__) [] = { + {"Abs_updateOutput", nn_(Abs_updateOutput)}, + {"Abs_updateGradInput", nn_(Abs_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(Abs_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(Abs__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/AbsCriterion.c b/generic/AbsCriterion.c new file mode 100644 index 0000000..b9b948d --- /dev/null +++ b/generic/AbsCriterion.c @@ -0,0 +1,54 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/AbsCriterion.c" +#else + +static int nn_(AbsCriterion_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id)); + int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); + real sum; + + sum = 0; + TH_TENSOR_APPLY2(real, input, real, target, + sum += fabs(*input_data - *target_data);) + + if(sizeAverage) + sum /= THTensor_(nElement)(input); + + lua_pushnumber(L, sum); + lua_setfield(L, 1, "output"); + + lua_pushnumber(L, sum); + return 1; +} + +static int nn_(AbsCriterion_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id)); + int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + real norm = (sizeAverage ? 1./((real)THTensor_(nElement)(input)) : 1.); + + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, input, real, target, + *gradInput_data = ( (*input_data - *target_data) >= 0 ? norm : -norm);) + + return 1; +} + +static const struct luaL_Reg nn_(AbsCriterion__) [] = { + {"AbsCriterion_updateOutput", nn_(AbsCriterion_updateOutput)}, + {"AbsCriterion_updateGradInput", nn_(AbsCriterion_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(AbsCriterion_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(AbsCriterion__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/Exp.c b/generic/Exp.c new file mode 100644 index 0000000..b56f379 --- /dev/null +++ b/generic/Exp.c @@ -0,0 +1,43 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Exp.c" +#else + +static int nn_(Exp_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THTensor_(resizeAs)(output, input); + + TH_TENSOR_APPLY2(real, output, real, input, \ + *output_data = exp(*input_data);) + + return 1; +} + +static int nn_(Exp_updateGradInput)(lua_State *L) +{ + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor_(resizeAs)(gradInput, output); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \ + *gradInput_data = *gradOutput_data * *output_data;); + return 1; +} + +static const struct luaL_Reg nn_(Exp__) [] = { + {"Exp_updateOutput", nn_(Exp_updateOutput)}, + {"Exp_updateGradInput", nn_(Exp_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(Exp_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(Exp__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/HardShrink.c b/generic/HardShrink.c new file mode 100644 index 0000000..be98ddc --- /dev/null +++ b/generic/HardShrink.c @@ -0,0 +1,50 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/HardShrink.c" +#else + +static int nn_(HardShrink_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + real lambda = luaT_getfieldchecknumber(L, 1, "lambda"); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THTensor_(resizeAs)(output, input); + + TH_TENSOR_APPLY2(real, output, real, input, \ + if ((*input_data) > lambda) *output_data = *input_data; \ + else if ((*input_data) < -lambda) *output_data = *input_data; \ + else *output_data = 0;); + return 1; +} + +static int nn_(HardShrink_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + real lambda = luaT_getfieldchecknumber(L, 1, "lambda"); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \ + if ((*input_data) > lambda || (*input_data) < -lambda) \ + *gradInput_data = (*gradOutput_data); \ + else \ + *gradInput_data = 0; \ + ); + return 1; +} + +static const struct luaL_Reg nn_(HardShrink__) [] = { + {"HardShrink_updateOutput", nn_(HardShrink_updateOutput)}, + {"HardShrink_updateGradInput", nn_(HardShrink_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(HardShrink_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(HardShrink__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/HardTanh.c b/generic/HardTanh.c new file mode 100644 index 0000000..3764095 --- /dev/null +++ b/generic/HardTanh.c @@ -0,0 +1,50 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/HardTanh.c" +#else + +static int nn_(HardTanh_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THTensor_(resizeAs)(output, input); + + TH_TENSOR_APPLY2(real, output, real, input, \ + if(*input_data < -1) \ + *output_data = -1; \ + else if(*input_data <= 1) \ + *output_data = *input_data; \ + else \ + *output_data = 1;) + return 1; +} + +static int nn_(HardTanh_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \ + if(*input_data < -1 || *input_data > 1) \ + *gradInput_data = 0; \ + else \ + *gradInput_data = *gradOutput_data;); + return 1; +} + +static const struct luaL_Reg nn_(HardTanh__) [] = { + {"HardTanh_updateOutput", nn_(HardTanh_updateOutput)}, + {"HardTanh_updateGradInput", nn_(HardTanh_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(HardTanh_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(HardTanh__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/LogSigmoid.c b/generic/LogSigmoid.c new file mode 100644 index 0000000..b5bdae4 --- /dev/null +++ b/generic/LogSigmoid.c @@ -0,0 +1,49 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/LogSigmoid.c" +#else + +static int nn_(LogSigmoid_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *buffer = luaT_getfieldcheckudata(L, 1, "buffer", torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THTensor_(resizeAs)(output, input); + THTensor_(resizeAs)(buffer, input); + + TH_TENSOR_APPLY3(real, output, real, input, real, buffer, \ + real z = exp(-*input_data); \ + *buffer_data = z; \ + *output_data = -log(1. + z);) + + return 1; +} + +static int nn_(LogSigmoid_updateGradInput)(lua_State *L) +{ + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + THTensor *buffer = luaT_getfieldcheckudata(L, 1, "buffer", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor_(resizeAs)(gradInput, buffer); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, buffer, \ + real z = *buffer_data; \ + *gradInput_data = *gradOutput_data * z / (1. + z);) + + return 1; +} + +static const struct luaL_Reg nn_(LogSigmoid__) [] = { + {"LogSigmoid_updateOutput", nn_(LogSigmoid_updateOutput)}, + {"LogSigmoid_updateGradInput", nn_(LogSigmoid_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(LogSigmoid_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(LogSigmoid__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/LogSoftMax.c b/generic/LogSoftMax.c new file mode 100644 index 0000000..5d4dbfc --- /dev/null +++ b/generic/LogSoftMax.c @@ -0,0 +1,111 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/LogSoftMax.c" +#else + +static int nn_(LogSoftMax_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + real *input_data, *output_data; + long nframe = 0, dim = 0; + long t, d; + + if(input->nDimension == 1) + { + nframe = 1; + dim = input->size[0]; + } + else if(input->nDimension == 2) + { + nframe = input->size[0]; + dim = input->size[1]; + } + else + THArgCheck(0, 2, "vector or matrix expected"); + + input = THTensor_(newContiguous)(input); + THTensor_(resizeAs)(output, input); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + for(t = 0; t < nframe; t++) + { + accreal logsum = 0; + real maxInput = -THInf; + + for(d = 0; d < dim; d++) + maxInput = THMax(maxInput, input_data[d]); + + for(d = 0; d < dim; d++) + logsum += THExpMinusApprox(maxInput-input_data[d]); + logsum = maxInput + log(logsum); + + for(d = 0; d < dim; d++) + output_data[d] = input_data[d] - logsum; + + input_data += dim; + output_data += dim; + } + + THTensor_(free)(input); + + return 1; +} + +static int nn_(LogSoftMax_updateGradInput)(lua_State *L) +{ + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + real *gradInput_data, *gradOutput_data, *output_data; + long nframe = 0, dim = 0; + long t, d; + + if(output->nDimension == 1) + { + nframe = 1; + dim = output->size[0]; + } + else if(output->nDimension == 2) + { + nframe = output->size[0]; + dim = output->size[1]; + } + else + THError("vector or matrix expected"); + + THTensor_(resizeAs)(gradInput, output); + gradInput_data = THTensor_(data)(gradInput); + output_data = THTensor_(data)(output); + gradOutput_data = THTensor_(data)(gradOutput); + for(t = 0; t < nframe; t++) + { + accreal sum = 0; + for(d = 0; d < dim; d++) + sum += gradOutput_data[d]; + + for(d = 0; d < dim; d++) + gradInput_data[d] = gradOutput_data[d] - exp(output_data[d])*sum; + + gradInput_data += dim; + output_data += dim; + gradOutput_data += dim; + } + + return 1; +} + +static const struct luaL_Reg nn_(LogSoftMax__) [] = { + {"LogSoftMax_updateOutput", nn_(LogSoftMax_updateOutput)}, + {"LogSoftMax_updateGradInput", nn_(LogSoftMax_updateGradInput)}, + {NULL, NULL} +}; + +void nn_(LogSoftMax_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(LogSoftMax__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/MSECriterion.c b/generic/MSECriterion.c new file mode 100644 index 0000000..c53735c --- /dev/null +++ b/generic/MSECriterion.c @@ -0,0 +1,54 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/MSECriterion.c" +#else + +static int nn_(MSECriterion_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id)); + int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); + real sum; + + sum = 0; + TH_TENSOR_APPLY2(real, input, real, target, + real z = (*input_data - *target_data); + sum += z*z;) + + if(sizeAverage) + sum /= THTensor_(nElement)(input); + + lua_pushnumber(L, sum); + lua_setfield(L, 1, "output"); + + lua_pushnumber(L, sum); + return 1; +} + +static int nn_(MSECriterion_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *target = luaT_checkudata(L, 3, torch_(Tensor_id)); + int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + real norm = (sizeAverage ? 2./((real)THTensor_(nElement)(input)) : 2.); + + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, input, real, target, + *gradInput_data = norm * (*input_data - *target_data);) + return 1; +} + +static const struct luaL_Reg nn_(MSECriterion__) [] = { + {"MSECriterion_updateOutput", nn_(MSECriterion_updateOutput)}, + {"MSECriterion_updateGradInput", nn_(MSECriterion_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(MSECriterion_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(MSECriterion__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/Max.c b/generic/Max.c new file mode 100644 index 0000000..87f52f1 --- /dev/null +++ b/generic/Max.c @@ -0,0 +1,100 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Max.c" +#else + +static int nn_(Max_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1; + THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THLongStorage *dim; + long i; + + luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range"); + + dim = THLongStorage_newWithSize(input->nDimension); + for(i = 0; i < input->nDimension; i++) + dim->data[i] = input->size[i]; + dim->data[dimension] = 1; + THTensor_(resize)(output, dim, NULL); + THTensor_(resize)(indices, dim, NULL); + THLongStorage_free(dim); + + TH_TENSOR_DIM_APPLY3(real, output, real, input, real, indices, dimension, + long theIndex = 0; + real theMax = input_data[0]; + for(i = 1; i < input_size; i++) + { + if(input_data[i*input_stride] > theMax) + { + theIndex = i; + theMax = input_data[i*input_stride]; + } + } + *indices_data = theIndex+1; + *output_data = theMax;) + + THTensor_(select)(output, NULL, dimension, 0); + + return 1; +} + +static int nn_(Max_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id)); + int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1; + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor *gradOutputPlusOneDim; + THLongStorage *dim, *str; + int i, j; + + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + dim = THLongStorage_newWithSize(gradOutput->nDimension+1); + str = THLongStorage_newWithSize(gradOutput->nDimension+1); + for(i = 0, j = 0; j < gradOutput->nDimension+1; j++) + { + if(j == dimension) + { + dim->data[j] = input->size[dimension]; + str->data[j] = 0; + continue; + } + + dim->data[j] = gradOutput->size[i]; + str->data[j] = gradOutput->stride[i]; + i++; + } + + gradOutputPlusOneDim = THTensor_(newWithStorage)(gradOutput->storage, gradOutput->storageOffset, dim, str); + THLongStorage_free(dim); + THLongStorage_free(str); + + TH_TENSOR_DIM_APPLY3(real, gradInput, real, gradOutputPlusOneDim, real, indices, dimension, + gradInput_data[ ((long)(*indices_data)-1)*gradInput_stride ] = *gradOutputPlusOneDim_data;) + + THTensor_(free)(gradOutputPlusOneDim); + + return 1; +} + +static const struct luaL_Reg nn_(Max__) [] = { + {"Max_updateOutput", nn_(Max_updateOutput)}, + {"Max_updateGradInput", nn_(Max_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(Max_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(Max__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/Min.c b/generic/Min.c new file mode 100644 index 0000000..d3309df --- /dev/null +++ b/generic/Min.c @@ -0,0 +1,100 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Min.c" +#else + +static int nn_(Min_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1; + THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THLongStorage *dim; + long i; + + luaL_argcheck(L, dimension >= 0 && dimension < input->nDimension, 2, "dimension out of range"); + + dim = THLongStorage_newWithSize(input->nDimension); + for(i = 0; i < input->nDimension; i++) + dim->data[i] = input->size[i]; + dim->data[dimension] = 1; + THTensor_(resize)(output, dim, NULL); + THTensor_(resize)(indices, dim, NULL); + THLongStorage_free(dim); + + TH_TENSOR_DIM_APPLY3(real, output, real, input, real, indices, dimension, + long theIndex = 0; + real theMin = input_data[0]; + for(i = 1; i < input_size; i++) + { + if(input_data[i*input_stride] < theMin) + { + theIndex = i; + theMin = input_data[i*input_stride]; + } + } + *indices_data = theIndex+1; + *output_data = theMin;) + + THTensor_(select)(output, NULL, dimension, 0); + + return 1; +} + +static int nn_(Min_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id)); + int dimension = luaT_getfieldcheckint(L, 1, "dimension")-1; + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor *gradOutputPlusOneDim; + THLongStorage *dim, *str; + int i, j; + + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + dim = THLongStorage_newWithSize(gradOutput->nDimension+1); + str = THLongStorage_newWithSize(gradOutput->nDimension+1); + for(i = 0, j = 0; j < gradOutput->nDimension+1; j++) + { + if(j == dimension) + { + dim->data[j] = input->size[dimension]; + str->data[j] = 0; + continue; + } + + dim->data[j] = gradOutput->size[i]; + str->data[j] = gradOutput->stride[i]; + i++; + } + + gradOutputPlusOneDim = THTensor_(newWithStorage)(gradOutput->storage, gradOutput->storageOffset, dim, str); + THLongStorage_free(dim); + THLongStorage_free(str); + + TH_TENSOR_DIM_APPLY3(real, gradInput, real, gradOutputPlusOneDim, real, indices, dimension, + gradInput_data[ ((long)(*indices_data)-1)*gradInput_stride ] = *gradOutputPlusOneDim_data;) + + THTensor_(free)(gradOutputPlusOneDim); + + return 1; +} + +static const struct luaL_Reg nn_(Min__) [] = { + {"Min_updateOutput", nn_(Min_updateOutput)}, + {"Min_updateGradInput", nn_(Min_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(Min_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(Min__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/MultiLabelMarginCriterion.c b/generic/MultiLabelMarginCriterion.c new file mode 100644 index 0000000..f4c3914 --- /dev/null +++ b/generic/MultiLabelMarginCriterion.c @@ -0,0 +1,185 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/MultiLabelMarginCriterion.c" +#else + +static int nn_(MultiLabelMarginCriterion_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); + real *input_data, *target_data; + long nframe, dim; + long t, d, dt, ddt; + THTensor *target; + real sum; + + THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected"); + + if(input->nDimension == 1) + { + nframe = 1; + dim = input->size[0]; + target = luaT_checkudata(L, 3, torch_(Tensor_id)); + THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, "inconsistent target size"); + } + else + { + nframe = input->size[0]; + dim = input->size[1]; + target = luaT_checkudata(L, 3, torch_(Tensor_id)); + THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) && (target->size[1] == dim), 3, "inconsistent target size"); + } + + THArgCheck(THTensor_(minall)(target) >= 0, 3, "target out of range"); + THArgCheck(THTensor_(maxall)(target) <= dim, 3, "target out of range"); + + target = THTensor_(newContiguous)(target); + input = THTensor_(newContiguous)(input); + input_data = THTensor_(data)(input); + target_data = THTensor_(data)(target); + + sum = 0; + for(t = 0; t < nframe; t++) + { + for(dt = 0; dt < dim; dt++) + { + long target_idx = (long)target_data[dt]-1; + real input_target; + if(target_idx < 0) + break; + + input_target = input_data[target_idx]; + for(d = 0; d < dim; d++) + { + int istarget = 0; + for(ddt = 0; ddt < dim; ddt++) + { + if(!target_data[ddt]) + break; + if(((long)target_data[ddt])-1 == d) + istarget = 1; + } + + if(!istarget) + { + real z = 1 - input_target + input_data[d]; + if(z > 0) + sum += z; + } + } + } + input_data += dim; + target_data += dim; + } + + if(sizeAverage) + sum /= dim; + + lua_pushnumber(L, sum); + lua_setfield(L, 1, "output"); + + THTensor_(free)(input); + THTensor_(free)(target); + lua_pushnumber(L, sum); + return 1; +} + +static int nn_(MultiLabelMarginCriterion_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + real *input_data; + real *gradInput_data; + real *target_data; + long nframe, dim; + long t, d, dt, ddt; + THTensor *target; + real g; + + THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected"); + + if(input->nDimension == 1) + { + nframe = 1; + dim = input->size[0]; + target = luaT_checkudata(L, 3, torch_(Tensor_id)); + THArgCheck((target->nDimension == 1) && (target->size[0] == dim), 3, "inconsistent target size"); + } + else + { + nframe = input->size[0]; + dim = input->size[1]; + target = luaT_checkudata(L, 3, torch_(Tensor_id)); + THArgCheck((target->nDimension == 2) && (target->size[0] == nframe) && (target->size[1] == dim), 3, "inconsistent target size"); + } + + THArgCheck(THTensor_(minall)(target) >= 0, 3, "target out of range"); + THArgCheck(THTensor_(maxall)(target) <= dim, 3, "target out of range"); + + target = THTensor_(newContiguous)(target); + input = THTensor_(newContiguous)(input); + input_data = THTensor_(data)(input); + target_data = THTensor_(data)(target); + + g = (sizeAverage ? 1./((real)dim) : 1.); + + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + gradInput_data = THTensor_(data)(gradInput); + + for(t = 0; t < nframe; t++) + { + for(dt = 0; dt < dim; dt++) + { + long target_idx = (long)target_data[dt]-1; + real input_target; + if(target_idx < 0) + break; + + input_target = input_data[target_idx]; + for(d = 0; d < dim; d++) + { + int istarget = 0; + for(ddt = 0; ddt < dim; ddt++) + { + if(!target_data[ddt]) + break; + if(((long)target_data[ddt])-1 == d) + istarget = 1; + } + + if(!istarget) + { + real z = 1 - input_target + input_data[d]; + if(z > 0) + { + gradInput_data[target_idx] -= g; + gradInput_data[d] += g; + } + } + } + } + input_data += dim; + target_data += dim; + gradInput_data += dim; + } + + THTensor_(free)(input); + THTensor_(free)(target); + return 1; +} + +static const struct luaL_Reg nn_(MultiLabelMarginCriterion__) [] = { + {"MultiLabelMarginCriterion_updateOutput", nn_(MultiLabelMarginCriterion_updateOutput)}, + {"MultiLabelMarginCriterion_updateGradInput", nn_(MultiLabelMarginCriterion_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(MultiLabelMarginCriterion_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(MultiLabelMarginCriterion__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/MultiMarginCriterion.c b/generic/MultiMarginCriterion.c new file mode 100644 index 0000000..ca73bc9 --- /dev/null +++ b/generic/MultiMarginCriterion.c @@ -0,0 +1,162 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/MultiMarginCriterion.c" +#else + +static int nn_(MultiMarginCriterion_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); + real *input_data, *target_data; + long nframe, dim; + long t, d; + real target_; + THTensor *target; + real sum; + + THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected"); + + if(input->nDimension == 1) + { + nframe = 1; + dim = input->size[0]; + target_ = luaL_checknumber(L, 3); + target = THTensor_(newWithSize1d)(1); + THTensor_(fill)(target, target_); + } + else + { + nframe = input->size[0]; + dim = input->size[1]; + target = luaT_checkudata(L, 3, torch_(Tensor_id)); + THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, "inconsistent target size"); + target = THTensor_(newContiguous)(target); + } + + for(t = 0; t < nframe; t++) + { + real idx = THTensor_(get1d)(target, t); + THArgCheck((idx >= 1) && (idx <= dim), 3, "target out of range"); + } + + input = THTensor_(newContiguous)(input); + input_data = THTensor_(data)(input); + target_data = THTensor_(data)(target); + + sum = 0; + for(t = 0; t < nframe; t++) + { + long target_idx = (long)(target_data[t]-1); + real input_target = input_data[target_idx]; + for(d = 0; d < dim; d++) + { + real z = 1 - input_target + input_data[d]; + if(d == target_idx) + continue; + + if(z > 0) + sum += z; + } + input_data += dim; + } + + if(sizeAverage) + sum /= dim; + + lua_pushnumber(L, sum); + lua_setfield(L, 1, "output"); + + THTensor_(free)(input); + THTensor_(free)(target); + lua_pushnumber(L, sum); + return 1; +} + +static int nn_(MultiMarginCriterion_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + int sizeAverage = luaT_getfieldcheckboolean(L, 1, "sizeAverage"); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + real *input_data; + real *gradInput_data; + real *target_data; + THTensor *target; + long nframe, dim; + long t, d; + real target_; + real g; + real sum; + + THArgCheck((input->nDimension == 1) || (input->nDimension == 2), 2, "vector or matrix expected"); + + if(input->nDimension == 1) + { + nframe = 1; + dim = input->size[0]; + target_ = luaL_checknumber(L, 3); + target = THTensor_(newWithSize1d)(1); + THTensor_(fill)(target, target_); + } + else + { + nframe = input->size[0]; + dim = input->size[1]; + target = luaT_checkudata(L, 3, torch_(Tensor_id)); + THArgCheck((target->nDimension == 1) && (target->size[0] == nframe), 3, "inconsistent target size"); + target = THTensor_(newContiguous)(target); + } + + g = (sizeAverage ? 1./((real)dim) : 1.); + + input = THTensor_(newContiguous)(input); + input_data = THTensor_(data)(input); + + THTensor_(resizeAs)(gradInput, input); + gradInput_data = THTensor_(data)(gradInput); + + target_data = THTensor_(data)(target); + + for(t = 0; t < nframe; t++) + { + long target_idx = (long)(target_data[t])-1; + real input_target = input_data[target_idx]; + real gradInput_target = 0; + for(d = 0; d < dim; d++) + { + real z = 1 - input_target + input_data[d]; + if(d == target_idx) + continue; + + if(z > 0) + { + gradInput_target -= g; + gradInput_data[d] = g; + } + else + gradInput_data[d] = 0; + } + gradInput_data[target_idx] = gradInput_target; + + input_data += dim; + gradInput_data += dim; + } + + + THTensor_(free)(input); + THTensor_(free)(target); + return 1; +} + +static const struct luaL_Reg nn_(MultiMarginCriterion__) [] = { + {"MultiMarginCriterion_updateOutput", nn_(MultiMarginCriterion_updateOutput)}, + {"MultiMarginCriterion_updateGradInput", nn_(MultiMarginCriterion_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(MultiMarginCriterion_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(MultiMarginCriterion__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/Sigmoid.c b/generic/Sigmoid.c new file mode 100644 index 0000000..20348b9 --- /dev/null +++ b/generic/Sigmoid.c @@ -0,0 +1,44 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Sigmoid.c" +#else + +static int nn_(Sigmoid_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THTensor_(resizeAs)(output, input); + + TH_TENSOR_APPLY2(real, output, real, input, \ + *output_data = 1./(1.+ exp(- *input_data));) + + return 1; +} + +static int nn_(Sigmoid_updateGradInput)(lua_State *L) +{ + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor_(resizeAs)(gradInput, output); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \ + real z = *output_data; \ + *gradInput_data = *gradOutput_data * (1. - z) * z;) + return 1; +} + +static const struct luaL_Reg nn_(Sigmoid__) [] = { + {"Sigmoid_updateOutput", nn_(Sigmoid_updateOutput)}, + {"Sigmoid_updateGradInput", nn_(Sigmoid_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(Sigmoid_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(Sigmoid__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/SoftMax.c b/generic/SoftMax.c new file mode 100644 index 0000000..3aaae65 --- /dev/null +++ b/generic/SoftMax.c @@ -0,0 +1,114 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SoftMax.c" +#else + +static int nn_(SoftMax_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + real *input_data, *output_data; + long nframe = 0, dim = 0; + long t, d; + + if(input->nDimension == 1) + { + nframe = 1; + dim = input->size[0]; + } + else if(input->nDimension == 2) + { + nframe = input->size[0]; + dim = input->size[1]; + } + else + THArgCheck(0, 2, "vector or matrix expected"); + + input = THTensor_(newContiguous)(input); + THTensor_(resizeAs)(output, input); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + for(t = 0; t < nframe; t++) + { + real inputMax = -THInf; + for(d = 0; d < dim; d++) { + if (input_data[d] >= inputMax) inputMax = input_data[d]; + } + + accreal sum = 0; + for(d = 0; d < dim; d++) { + real z = THExpMinusApprox(inputMax - input_data[d]); + output_data[d] = z; + sum += z; + } + + for(d = 0; d < dim; d++) { + output_data[d] *= 1/sum; + } + + input_data += dim; + output_data += dim; + } + + THTensor_(free)(input); + + return 1; +} + +static int nn_(SoftMax_updateGradInput)(lua_State *L) +{ + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + real *gradInput_data, *gradOutput_data, *output_data; + long nframe = 0, dim = 0; + long t, d; + + if(output->nDimension == 1) + { + nframe = 1; + dim = output->size[0]; + } + else if(output->nDimension == 2) + { + nframe = output->size[0]; + dim = output->size[1]; + } + else + THError("vector or matrix expected"); + + THTensor_(resizeAs)(gradInput, output); + gradInput_data = THTensor_(data)(gradInput); + output_data = THTensor_(data)(output); + gradOutput_data = THTensor_(data)(gradOutput); + for(t = 0; t < nframe; t++) + { + accreal sum = 0; + for(d = 0; d < dim; d++) + sum += (accreal)gradOutput_data[d] * output_data[d]; + + for(d = 0; d < dim; d++) + gradInput_data[d] = output_data[d] * (gradOutput_data[d] - sum); + + gradInput_data += dim; + output_data += dim; + gradOutput_data += dim; + } + + return 1; +} + +static const struct luaL_Reg nn_(SoftMax__) [] = { + {"SoftMax_updateOutput", nn_(SoftMax_updateOutput)}, + {"SoftMax_updateGradInput", nn_(SoftMax_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(SoftMax_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(SoftMax__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/SoftPlus.c b/generic/SoftPlus.c new file mode 100644 index 0000000..7a097fb --- /dev/null +++ b/generic/SoftPlus.c @@ -0,0 +1,44 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SoftPlus.c" +#else + +static int nn_(SoftPlus_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THTensor_(resizeAs)(output, input); + + TH_TENSOR_APPLY2(real, output, real, input, \ + *output_data = log1p(exp(*input_data));) + + return 1; +} + +static int nn_(SoftPlus_updateGradInput)(lua_State *L) +{ + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor_(resizeAs)(gradInput, output); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \ + real z = exp(*output_data); \ + *gradInput_data = *gradOutput_data * (z - 1.)/z;) + return 1; +} + +static const struct luaL_Reg nn_(SoftPlus__) [] = { + {"SoftPlus_updateOutput", nn_(SoftPlus_updateOutput)}, + {"SoftPlus_updateGradInput", nn_(SoftPlus_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(SoftPlus_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(SoftPlus__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/SoftShrink.c b/generic/SoftShrink.c new file mode 100644 index 0000000..0bc4075 --- /dev/null +++ b/generic/SoftShrink.c @@ -0,0 +1,50 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SoftShrink.c" +#else + +static int nn_(SoftShrink_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + real lambda = luaT_getfieldchecknumber(L, 1, "lambda"); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THTensor_(resizeAs)(output, input); + + TH_TENSOR_APPLY2(real, output, real, input, \ + if ((*input_data) > lambda) *output_data = *input_data - lambda; \ + else if ((*input_data) < -lambda) *output_data = *input_data + lambda; \ + else *output_data = 0;); + return 1; +} + +static int nn_(SoftShrink_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + real lambda = luaT_getfieldchecknumber(L, 1, "lambda"); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \ + if ((*input_data) > lambda || (*input_data) < -lambda) \ + *gradInput_data = (*gradOutput_data); \ + else \ + *gradInput_data = 0; \ + ); + return 1; +} + +static const struct luaL_Reg nn_(SoftShrink__) [] = { + {"SoftShrink_updateOutput", nn_(SoftShrink_updateOutput)}, + {"SoftShrink_updateGradInput", nn_(SoftShrink_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(SoftShrink_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(SoftShrink__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/SparseLinear.c b/generic/SparseLinear.c new file mode 100644 index 0000000..d29a1aa --- /dev/null +++ b/generic/SparseLinear.c @@ -0,0 +1,130 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SparseLinear.c" +#else + +static int nn_(SparseLinear_updateOutput)(lua_State *L) +{ + long i; + THTensor * input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id)); + THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + long dim = weight->size[0]; /* number of weights.. */ + + THTensor_(copy)(output, bias); + for(i = 0; i < input->size[1]; i++) + { + long offset = (long)(THTensor_(get2d)(input, 0, i))-1; + + if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */ + { + real val = THTensor_(get2d)(input, 1, i); + THBlas_(axpy)(output->size[0], + val, + THTensor_(data)(weight)+offset*weight->stride[0], + weight->stride[1], + THTensor_(data)(output), + output->stride[0]); + } + else + luaL_error(L, "index out of bound"); + } + return 1; +} + +static int nn_(SparseLinear_accGradParameters)(lua_State *L) +{ + long i; + THTensor * input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor * gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + real scale = luaL_optnumber(L, 4, 1); + THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + THTensor * gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id)); + THTensor * gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id)); + THTensor * lastInput = luaT_getfieldcheckudata(L, 1, "lastInput", torch_(Tensor_id)); + real weightDecay = luaT_getfieldchecknumber(L, 1, "weightDecay"); + long dim = gradWeight->size[0]; /* number of weights.. */ + + for(i = 0; i < input->size[1]; i++) + { + long offset = (long)(THTensor_(get2d)(input, 0, i))-1; + + if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */ + { + real val = scale*THTensor_(get2d)(input, 1, i); + THBlas_(scal)(gradOutput->size[0], + 0, + THTensor_(data)(gradWeight)+offset*gradWeight->stride[0], + gradWeight->stride[1]); /* zero */ + + THBlas_(axpy)(gradOutput->size[0], + val, + THTensor_(data)(gradOutput), + gradOutput->stride[0], + THTensor_(data)(gradWeight)+offset*gradWeight->stride[0], + gradWeight->stride[1]); + } + else + luaL_error(L, "index out of bound"); + } + + THTensor_(cadd)(gradBias, gradBias, 1, gradOutput); + + if(weightDecay != 0) + THTensor_(cadd)(gradWeight, gradWeight, weightDecay, weight); + + THTensor_(resizeAs)(lastInput, input); + THTensor_(copy)(lastInput, input); + + return 0; +} + +int nn_(SparseLinear_updateParameters)(lua_State *L) +{ + long i; + real learningRate = luaL_checknumber(L, 2); + THTensor * weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor * output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + THTensor * bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id)); + THTensor * gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id)); + THTensor * gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id)); + THTensor * lastInput = luaT_getfieldcheckudata(L, 1, "lastInput", torch_(Tensor_id)); + real weightDecay = luaT_getfieldchecknumber(L, 1, "weightDecay"); + + long dim = weight->size[0]; /* number of weights.. */ + THTensor_(cadd)(bias, bias, -learningRate, gradBias); + + for(i = 0; i < lastInput->size[1]; i++) + { + long offset = (long)(THTensor_(get2d)(lastInput, 0, i))-1; + + if(offset >= 0 && offset < dim) /* make sure indices are in bounds.. */ + { + THBlas_(axpy)(bias->size[0], + -learningRate, + THTensor_(data)(gradWeight)+offset*gradWeight->stride[0], + gradWeight->stride[1], + THTensor_(data)(weight)+offset*weight->stride[0], + weight->stride[1]); + } + else + luaL_error(L, "index out of bound"); + } + return 0; +} + +static const struct luaL_Reg nn_(SparseLinear__) [] = { + {"SparseLinear_updateOutput", nn_(SparseLinear_updateOutput)}, + {"SparseLinear_updateParameters", nn_(SparseLinear_updateParameters)}, + {NULL, NULL} +}; + +void nn_(SparseLinear_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(SparseLinear__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/SpatialConvolution.c b/generic/SpatialConvolution.c new file mode 100644 index 0000000..de0de1d --- /dev/null +++ b/generic/SpatialConvolution.c @@ -0,0 +1,201 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialConvolution.c" +#else + +static void nn_(convolution_updateOutput_)(THTensor *input, THTensor *output, THTensor *weight, THTensor *bias, int dH, int dW) +{ + /* add bias */ + long i; + THTensor *outn = THTensor_(new)(); + for (i=0; i<bias->size[0]; i++) { + THTensor_(select)(outn,output,0,i); + THTensor_(fill)(outn, THTensor_(get1d)(bias, i)); + } + THTensor_(free)(outn); + + /* do convolutions */ + THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "V","X"); +} + +static int nn_(SpatialConvolution_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + + THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); + + int dimw = 2; + int dimh = 1; + if (input->nDimension == 4) { + dimw++; + dimh++; + } + + long nOutputPlane = weight->size[0]; + long nInputPlane = weight->size[1]; + long kW = weight->size[3]; + long kH = weight->size[2]; + long inputWidth = input->size[dimw]; + long inputHeight = input->size[dimh]; + long outputWidth = (inputWidth - kW) / dW + 1; + long outputHeight = (inputHeight - kH) / dH + 1; + + if (input->nDimension == 3) + { + THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); +/* printf("\n*************\nstochastic\n"); */ +/* printf("no=%d\n",output->nDimension); */ +/* printf("no=%ld,%ld,%ld\n",nOutputPlane,outputHeight,outputWidth); */ +/* printf("ni=%d\n",input->nDimension); */ + nn_(convolution_updateOutput_)(input,output,weight,bias,dH,dW); +/* printf("stochastic\n");*/ + } + else + { + THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth); + THTensor *outn = THTensor_(new)(); + THTensor *inpn = THTensor_(new)(); + long i; + for (i=0; i<input->size[0]; i++) + { + THTensor_(select)(outn,output,0,i); + THTensor_(select)(inpn,input,0,i); + nn_(convolution_updateOutput_)(inpn,outn,weight,bias,dH,dW); + } + THTensor_(free)(outn); + THTensor_(free)(inpn); + } + +/* /\* add bias *\/ */ +/* long i; */ +/* THTensor *outn = THTensor_(new)(); */ +/* for (i=0; i<bias->size[0]; i++) { */ +/* THTensor_(select)(outn,output,0,i); */ +/* THTensor_(fill)(outn, THTensor_(get1d)(bias, i)); */ +/* } */ +/* THTensor_(free)(outn); */ + +/* /\* do convolutions *\/ */ +/* THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "vx"); */ + + return 1; +} + + +static int nn_(SpatialConvolution_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); + + THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); + + long k; + + /* gradient to input */ + THTensor *tweight = THTensor_(newTranspose)(weight,0,1); + + if(input->nDimension == 3) + { + THTensor_(conv2Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dH, dW, "F", "C"); + } + else + { + + THTensor_(resizeAs)(gradInput,input); + THTensor *outn = THTensor_(new)(); + THTensor *inpn = THTensor_(new)(); + long i; + for (i=0; i<input->size[0]; i++) + { + THTensor_(select)(outn,gradOutput,0,i); + THTensor_(select)(inpn,gradInput,0,i); + THTensor_(conv2Dmv)(inpn, 0.0, 1.0, outn, tweight, dH, dW, "F", "C"); + } + THTensor_(free)(outn); + THTensor_(free)(inpn); + } + THTensor_(free)(tweight); + + return 1; +} + +static void nn_(convolution_accGradParameters_)(THTensor *input, THTensor *gradOutput, THTensor *gradWeight, THTensor *gradBias, real scale, int dH, int dW) +{ + long k; + + /* gradient to bias */ + real *gradBias_data = THTensor_(data)(gradBias); + THTensor* gradOutSlice = THTensor_(new)(); + for(k = 0; k < gradOutput->size[0]; k++) + { + THTensor_(select)(gradOutSlice, gradOutput, 0, k); + gradBias_data[k] += scale*THTensor_(sumall)(gradOutSlice); + } + THTensor_(free)(gradOutSlice); + + /* gradient to kernels */ + THTensor_(conv2DRevger)(gradWeight, 1.0, scale, input, gradOutput, dH, dW); +} + +static int nn_(SpatialConvolution_accGradParameters)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + real scale = luaL_optnumber(L, 4, 1); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); + + THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id)); + THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id)); + + THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); + + if(input->nDimension == 3) + { + nn_(convolution_accGradParameters_)(input,gradOutput,gradWeight,gradBias,scale,dH,dW); + } + else + { + THTensor *outn = THTensor_(new)(); + THTensor *inpn = THTensor_(new)(); + long i; + for (i=0; i<input->size[0]; i++) + { + THTensor_(select)(outn,gradOutput,0,i); + THTensor_(select)(inpn,input,0,i); + nn_(convolution_accGradParameters_)(inpn,outn,gradWeight,gradBias,scale,dH,dW); + } + THTensor_(free)(outn); + THTensor_(free)(inpn); + } + + return 0; +} + +static const struct luaL_Reg nn_(SpatialConvolution__) [] = { + {"SpatialConvolution_updateOutput", nn_(SpatialConvolution_updateOutput)}, + {"SpatialConvolution_updateGradInput", nn_(SpatialConvolution_updateGradInput)}, + {"SpatialConvolution_accGradParameters", nn_(SpatialConvolution_accGradParameters)}, + {NULL, NULL} +}; + +static void nn_(SpatialConvolution_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(SpatialConvolution__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/SpatialConvolutionMap.c b/generic/SpatialConvolutionMap.c new file mode 100644 index 0000000..ff7d8ca --- /dev/null +++ b/generic/SpatialConvolutionMap.c @@ -0,0 +1,229 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialConvolutionMap.c" +#else + +static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int kH = luaT_getfieldcheckint(L, 1, "kH"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); + int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); + + THTensor *connTable = luaT_getfieldcheckudata(L, 1, "connTable", torch_(Tensor_id)); + THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected"); + luaL_argcheck(L, input->size[0] == nInputPlane, 2, "invalid number of input planes"); + luaL_argcheck(L, input->size[2] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size"); + + THTensor_(resize3d)(output, nOutputPlane, + (input->size[1] - kH) / dH + 1, + (input->size[2] - kW) / dW + 1); + + // contiguous + input = THTensor_(newContiguous)(input); + output = THTensor_(newContiguous)(output); + + // get raw pointers + real *input_data = THTensor_(data)(input); + real *output_data = THTensor_(data)(output); + real *weight_data = THTensor_(data)(weight); + + // and dims + long input_n = input->size[0]; + long input_h = input->size[1]; + long input_w = input->size[2]; + long output_n = output->size[0]; + long output_h = output->size[1]; + long output_w = output->size[2]; + long weight_n = weight->size[0]; + long weight_h = weight->size[1]; + long weight_w = weight->size[2]; + + // add bias + THTensor *outputPlane = THTensor_(new)(); + int k; + for (k = 0; k < nOutputPlane; k++) { + THTensor_(select)(outputPlane,output,0,k); + THTensor_(fill)(outputPlane, THTensor_(get1d)(bias, k)); + } + THTensor_(free)(outputPlane); + + // convolve all maps + int i,o; + int nweight = connTable->size[0]; + for (k = 0; k < nweight; k++) { + // get offsets for input/output + o = (int)THTensor_(get2d)(connTable,k,1)-1; + i = (int)THTensor_(get2d)(connTable,k,0)-1; + + // convolve each map + THTensor_(validXCorr2Dptr)(output_data + o*output_w*output_h, + 1.0, + input_data + i*input_w*input_h, input_h, input_w, + weight_data + k*weight_w*weight_h, weight_h, weight_w, + dH, dW); + } + + // clean up + THTensor_(free)(input); + THTensor_(free)(output); + + return 1; +} + +static int nn_(SpatialConvolutionMap_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int kH = luaT_getfieldcheckint(L, 1, "kH"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); + int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); + + THTensor *connTable = luaT_getfieldcheckudata(L, 1, "connTable", torch_(Tensor_id)); + THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id)); + THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + // contiguous + gradInput = THTensor_(newContiguous)(gradInput); + gradOutput = THTensor_(newContiguous)(gradOutput); + + // Resize/Zero + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + // get raw pointers + real *gradInput_data = THTensor_(data)(gradInput); + real *gradOutput_data = THTensor_(data)(gradOutput); + real *weight_data = THTensor_(data)(weight); + real *gradWeight_data = THTensor_(data)(gradWeight); + + // and dims + long input_n = input->size[0]; + long input_h = input->size[1]; + long input_w = input->size[2]; + long output_n = gradOutput->size[0]; + long output_h = gradOutput->size[1]; + long output_w = gradOutput->size[2]; + long weight_n = weight->size[0]; + long weight_h = weight->size[1]; + long weight_w = weight->size[2]; + + // updateGradInput all + int k; + int nkernel = connTable->size[0]; + for(k = 0; k < nkernel; k++) + { + int o = (int)THTensor_(get2d)(connTable,k,1)-1; + int i = (int)THTensor_(get2d)(connTable,k,0)-1; + + // gradient to input + THTensor_(fullConv2Dptr)(gradInput_data + i*input_w*input_h, + 1.0, + gradOutput_data + o*output_w*output_h, output_h, output_w, + weight_data + k*weight_w*weight_h, weight_h, weight_w, + dH, dW); + } + + // clean up + THTensor_(free)(gradInput); + THTensor_(free)(gradOutput); + + return 1; +} + +static int nn_(SpatialConvolutionMap_accGradParameters)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int kH = luaT_getfieldcheckint(L, 1, "kH"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); + int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); + real scale = luaL_optnumber(L, 4, 1); + + THTensor *connTable = luaT_getfieldcheckudata(L, 1, "connTable", torch_(Tensor_id)); + THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id)); + THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + // contiguous + input = THTensor_(newContiguous)(input); + gradOutput = THTensor_(newContiguous)(gradOutput); + + // get raw pointers + real *input_data = THTensor_(data)(input); + real *gradOutput_data = THTensor_(data)(gradOutput); + real *weight_data = THTensor_(data)(weight); + real *gradWeight_data = THTensor_(data)(gradWeight); + + // and dims + long input_n = input->size[0]; + long input_h = input->size[1]; + long input_w = input->size[2]; + long output_n = gradOutput->size[0]; + long output_h = gradOutput->size[1]; + long output_w = gradOutput->size[2]; + long weight_n = weight->size[0]; + long weight_h = weight->size[1]; + long weight_w = weight->size[2]; + + // gradients wrt bias + int k; + THTensor *gradOutputPlane = THTensor_(new)(); + real *gradBias_data = THTensor_(data)(gradBias); + for(k = 0; k < nOutputPlane; k++) { + THTensor_(select)(gradOutputPlane, gradOutput, 0, k); + gradBias_data[k] += scale * THTensor_(sumall)(gradOutputPlane); + } + THTensor_(free)(gradOutputPlane); + + // gradients wrt weight + int nkernel = connTable->size[0]; + for(k = 0; k < nkernel; k++) + { + int o = (int)THTensor_(get2d)(connTable,k,1)-1; + int i = (int)THTensor_(get2d)(connTable,k,0)-1; + + // gradient to kernel + THTensor_(validXCorr2DRevptr)(gradWeight_data + k*weight_w*weight_h, + scale, + input_data + i*input_w*input_h, input_h, input_w, + gradOutput_data + o*output_w*output_h, output_h, output_w, + dH, dW); + } + + // clean up + THTensor_(free)(input); + THTensor_(free)(gradOutput); + return 0; +} + +static const struct luaL_Reg nn_(SpatialConvolutionMap__) [] = { + {"SpatialConvolutionMap_updateOutput", nn_(SpatialConvolutionMap_updateOutput)}, + {"SpatialConvolutionMap_updateGradInput", nn_(SpatialConvolutionMap_updateGradInput)}, + {"SpatialConvolutionMap_accGradParameters", nn_(SpatialConvolutionMap_accGradParameters)}, + {NULL, NULL} +}; + +static void nn_(SpatialConvolutionMap_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(SpatialConvolutionMap__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/SpatialMaxPooling.c b/generic/SpatialMaxPooling.c new file mode 100644 index 0000000..b9fab3b --- /dev/null +++ b/generic/SpatialMaxPooling.c @@ -0,0 +1,163 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialMaxPooling.c" +#else + +static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int kH = luaT_getfieldcheckint(L, 1, "kH"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected"); + luaL_argcheck(L, input->size[2] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size"); + + // sizes + long nslices = input->size[0]; + long iheight = input->size[1]; + long iwidth = input->size[2]; + long oheight = (iheight - kH) / dH + 1; + long owidth = (iwidth - kW) / dW + 1; + + // get contiguous input + input = THTensor_(newContiguous)(input); + + // resize output + THTensor_(resize3d)(output, nslices, oheight, owidth); + + // indices will contain i,j locatyions for each output point + THTensor_(resize4d)(indices, 2, nslices, oheight, owidth); + + // get raw pointers + real *input_data = THTensor_(data)(input); + real *output_data = THTensor_(data)(output); + real *indices_data = THTensor_(data)(indices); + + // compute max pooling for each input slice + long k; + for (k = 0; k < nslices; k++) { + // pointers to slices + real *input_p = input_data + k*iwidth*iheight; + real *output_p = output_data + k*owidth*oheight; + real *indy_p = indices_data + k*owidth*oheight; + real *indx_p = indices_data + (k+nslices)*owidth*oheight; + + // loop over output + int i,j; + for(i = 0; i < oheight; i++) { + for(j = 0; j < owidth; j++) { + // local pointers + real *ip = input_p + i*iwidth*dH + j*dW; + real *op = output_p + i*owidth + j; + real *indyp = indy_p + i*owidth + j; + real *indxp = indx_p + i*owidth + j; + + // compute local max: + long maxindex = -1; + real maxval = -THInf; + long tcntr = 0; + int x,y; + for(y = 0; y < kH; y++) { + for(x = 0; x < kW; x++) { + real val = *(ip + y*iwidth + x); + if (val > maxval) { + maxval = val; + maxindex = tcntr; + } + tcntr++; + } + } + + // set output to local max + *op = maxval; + + // store location of max (x,y) + *indyp = (int)(maxindex / dW)+1; + *indxp = (maxindex % dW) +1; + } + } + } + + // cleanup + THTensor_(free)(input); + + return 1; +} + +static int nn_(SpatialMaxPooling_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int kH = luaT_getfieldcheckint(L, 1, "kH"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + // get contiguous gradOutput + gradOutput = THTensor_(newContiguous)(gradOutput); + + // resize + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + // sizes + int ichannels = input->size[0]; + int iheight = input->size[1]; + int iwidth = input->size[2]; + int ochannels = ichannels; + int oheight = gradOutput->size[1]; + int owidth = gradOutput->size[2]; + + // get raw pointers + real *gradInput_data = THTensor_(data)(gradInput); + real *gradOutput_data = THTensor_(data)(gradOutput); + real *indices_data = THTensor_(data)(indices); + + // backprop + long k; + for (k = 0; k < input->size[0]; k++) { + // pointers to slices + real *gradOutput_p = gradOutput_data + k*owidth*oheight; + real *gradInput_p = gradInput_data + k*iwidth*iheight; + real *indy_p = indices_data + k*owidth*oheight; + real *indx_p = indices_data + (k+ochannels)*owidth*oheight; + + // calculate max points + int i,j; + for(i = 0; i < oheight; i++) { + for(j = 0; j < owidth; j++) { + // retrieve position of max + long maxi = *(indy_p + i*owidth + j) - 1 + i*dH; + long maxj = *(indx_p + i*owidth + j) - 1 + j*dW; + + // update gradient + *(gradInput_p + maxi*iwidth + maxj) += *(gradOutput_p + i*owidth + j); + } + } + } + + // cleanup + THTensor_(free)(gradOutput); + + return 1; +} + +static const struct luaL_Reg nn_(SpatialMaxPooling__) [] = { + {"SpatialMaxPooling_updateOutput", nn_(SpatialMaxPooling_updateOutput)}, + {"SpatialMaxPooling_updateGradInput", nn_(SpatialMaxPooling_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(SpatialMaxPooling_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(SpatialMaxPooling__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/SpatialSubSampling.c b/generic/SpatialSubSampling.c new file mode 100644 index 0000000..705253f --- /dev/null +++ b/generic/SpatialSubSampling.c @@ -0,0 +1,278 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/SpatialSubSampling.c" +#else + +static int nn_(SpatialSubSampling_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int kH = luaT_getfieldcheckint(L, 1, "kH"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); + + THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + real *weight_data = THTensor_(data)(weight); + real *bias_data = THTensor_(data)(bias); + real *output_data; + real *input_data; + + + luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); + + int dimw = 2; + int dimh = 1; + if (input->nDimension == 4) { + dimw++; + dimh++; + } + + long inputWidth = input->size[dimw]; + long inputHeight = input->size[dimh]; + long outputWidth = (inputWidth - kW) / dW + 1; + long outputHeight = (inputHeight - kH) / dH + 1; + + + luaL_argcheck(L, input->size[dimh-1] == nInputPlane, 2, "invalid number of input planes"); + luaL_argcheck(L, inputWidth >= kW && inputHeight >= kH, 2, "input image smaller than kernel size"); + + input = THTensor_(newContiguous)(input); + input_data = THTensor_(data)(input); + + long nbatch = 1; + if (input->nDimension == 3) + { + THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth); + } + else + { + nbatch = input->size[0]; + THTensor_(resize4d)(output, nbatch, nInputPlane, outputHeight, outputWidth); + } + + output_data = THTensor_(data)(output); + + long i, k, p; + + for(p = 0; p < nbatch; p++) + { + //input_data += p*nInputPlane*inputWidth*inputHeight; + //output_data += p*nInputPlane*outputHeight*outputWidth; + for(k = 0; k < nInputPlane; k++) + { + real *ptr_output; + long xx, yy; + + /* Get the good mask for (k,i) (k out, i in) */ + real the_weight = weight_data[k]; + + /* Initialize to the bias */ + real z = bias_data[k]; + for(i = 0; i < outputWidth*outputHeight; i++) + output_data[i] = z; + + /* For all output pixels... */ + ptr_output = output_data; + for(yy = 0; yy < outputHeight; yy++) + { + for(xx = 0; xx < outputWidth; xx++) + { + // Compute the mean of the input image... + real *ptr_input = input_data+yy*dH*inputWidth+xx*dW; + real sum = 0; + long kx, ky; + + for(ky = 0; ky < kH; ky++) + { + for(kx = 0; kx < kW; kx++) + sum += ptr_input[kx]; + ptr_input += inputWidth; // next input line + } + + // Update output + *ptr_output++ += the_weight*sum; + } + } + + // Next input/output plane + output_data += outputWidth*outputHeight; + input_data += inputWidth*inputHeight; + } + } + + THTensor_(free)(input); + + return 1; +} + +static int nn_(SpatialSubSampling_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int kH = luaT_getfieldcheckint(L, 1, "kH"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); + + THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + int dimw = 2; + int dimh = 1; + long nbatch = 1; + if (input->nDimension == 4) { + dimw++; + dimh++; + nbatch = input->size[0]; + } + + long inputWidth = input->size[dimw]; + long inputHeight = input->size[dimh]; + long outputWidth = (inputWidth - kW) / dW + 1; + long outputHeight = (inputHeight - kH) / dH + 1; + + real *weight_data = THTensor_(data)(weight); + real *gradOutput_data = THTensor_(data)(gradOutput); + real *gradInput_data; + + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + + long i, k, p; + + for(p = 0; p < nbatch; p++) + { + //gradInput_data += p*nInputPlane*inputWidth*inputHeight; + //gradOutput_data += p*nInputPlane*outputWidth*outputHeight; + for(k = 0; k < nInputPlane; k++) + { + real the_weight = weight_data[k]; + real *ptr_gradOutput = gradOutput_data; + long xx, yy; + + for(yy = 0; yy < outputHeight; yy++) + { + for(xx = 0; xx < outputWidth; xx++) + { + real *ptr_gradInput = gradInput_data+yy*dH*inputWidth+xx*dW; + real z = *ptr_gradOutput++ * the_weight; + long kx, ky; + + for(ky = 0; ky < kH; ky++) + { + for(kx = 0; kx < kW; kx++) + ptr_gradInput[kx] += z; + ptr_gradInput += inputWidth; + } + } + } + gradOutput_data += outputWidth*outputHeight; + gradInput_data += inputWidth*inputHeight; + } + } + + return 1; +} + +static int nn_(SpatialSubSampling_accGradParameters)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + real scale = luaL_optnumber(L, 4, 1); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int kH = luaT_getfieldcheckint(L, 1, "kH"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane"); + + THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id)); + THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id)); + + int dimw = 2; + int dimh = 1; + long nbatch = 1; + if (input->nDimension == 4) { + dimw++; + dimh++; + nbatch = input->size[0]; + } + + long inputWidth = input->size[dimw]; + long inputHeight = input->size[dimh]; + long outputWidth = (inputWidth - kW) / dW + 1; + long outputHeight = (inputHeight - kH) / dH + 1; + + real *gradWeight_data = THTensor_(data)(gradWeight); + real *gradBias_data = THTensor_(data)(gradBias); + real *gradOutput_data = THTensor_(data)(gradOutput); + real *input_data; + + input = THTensor_(newContiguous)(input); + input_data = THTensor_(data)(input); + + long i, k, p; + for(p = 0; p < nbatch; p++) + { + //input_data += p*nInputPlane*inputWidth*inputHeight; + //gradOutput_data += p*nInputPlane*inputWidth*inputHeight; + for(k = 0; k < nInputPlane; k++) + { + real *ptr_gradOutput = gradOutput_data; + real sum; + long xx, yy; + + sum = 0; + for(i = 0; i < outputWidth*outputHeight; i++) + sum += gradOutput_data[i]; + gradBias_data[k] += scale*sum; + + sum = 0; + for(yy = 0; yy < outputHeight; yy++) + { + for(xx = 0; xx < outputWidth; xx++) + { + real *ptr_input = input_data+yy*dH*inputWidth+xx*dW; + real z = *ptr_gradOutput++; + long kx, ky; + + for(ky = 0; ky < kH; ky++) + { + for(kx = 0; kx < kW; kx++) + sum += z * ptr_input[kx]; + ptr_input += inputWidth; + } + } + } + gradWeight_data[k] += scale*sum; + gradOutput_data += outputWidth*outputHeight; + input_data += inputWidth*inputHeight; + } + } + + + THTensor_(free)(input); + + return 0; +} + +static const struct luaL_Reg nn_(SpatialSubSampling__) [] = { + {"SpatialSubSampling_updateOutput", nn_(SpatialSubSampling_updateOutput)}, + {"SpatialSubSampling_updateGradInput", nn_(SpatialSubSampling_updateGradInput)}, + {"SpatialSubSampling_accGradParameters", nn_(SpatialSubSampling_accGradParameters)}, + {NULL, NULL} +}; + +static void nn_(SpatialSubSampling_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(SpatialSubSampling__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/Sqrt.c b/generic/Sqrt.c new file mode 100644 index 0000000..a739e96 --- /dev/null +++ b/generic/Sqrt.c @@ -0,0 +1,46 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Sqrt.c" +#else + +static int nn_(Sqrt_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THTensor_(resizeAs)(output, input); + + TH_TENSOR_APPLY2(real, output, real, input, \ + *output_data = sqrt(*input_data);); + + return 1; +} + +static int nn_(Sqrt_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor_(resizeAs)(gradInput, input); + + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \ + *gradInput_data = 0.5 * (*gradOutput_data / *output_data);); + + return 1; +} + +static const struct luaL_Reg nn_(Sqrt__) [] = { + {"Sqrt_updateOutput", nn_(Sqrt_updateOutput)}, + {"Sqrt_updateGradInput", nn_(Sqrt_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(Sqrt_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(Sqrt__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/Square.c b/generic/Square.c new file mode 100644 index 0000000..409055d --- /dev/null +++ b/generic/Square.c @@ -0,0 +1,45 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Square.c" +#else + +static int nn_(Square_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THTensor_(resizeAs)(output, input); + + TH_TENSOR_APPLY2(real, output, real, input, \ + *output_data = *input_data * *input_data;); + + return 1; +} + +static int nn_(Square_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor_(resizeAs)(gradInput, input); + + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \ + *gradInput_data = 2.0 * (*gradOutput_data) * (*input_data);); + + return 1; +} + +static const struct luaL_Reg nn_(Square__) [] = { + {"Square_updateOutput", nn_(Square_updateOutput)}, + {"Square_updateGradInput", nn_(Square_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(Square_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(Square__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/Tanh.c b/generic/Tanh.c new file mode 100644 index 0000000..5c24d15 --- /dev/null +++ b/generic/Tanh.c @@ -0,0 +1,45 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Tanh.c" +#else + +static int nn_(Tanh_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THTensor_(resizeAs)(output, input); + + TH_TENSOR_APPLY2(real, output, real, input, \ + *output_data = tanh(*input_data);) + + return 1; +} + +static int nn_(Tanh_updateGradInput)(lua_State *L) +{ + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor_(resizeAs)(gradInput, output); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, output, \ + real z = *output_data; \ + *gradInput_data = *gradOutput_data * (1. - z*z);); + return 1; +} + +static const struct luaL_Reg nn_(Tanh__) [] = { + {"Tanh_updateOutput", nn_(Tanh_updateOutput)}, + {"Tanh_updateGradInput", nn_(Tanh_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(Tanh_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(Tanh__), "nn"); + lua_pop(L,1); + +} + +#endif diff --git a/generic/TemporalConvolution.c b/generic/TemporalConvolution.c new file mode 100644 index 0000000..fa14a22 --- /dev/null +++ b/generic/TemporalConvolution.c @@ -0,0 +1,194 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/TemporalConvolution.c" +#else + +static int nn_(TemporalConvolution_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int inputFrameSize = luaT_getfieldcheckint(L, 1, "inputFrameSize"); + int outputFrameSize = luaT_getfieldcheckint(L, 1, "outputFrameSize"); + + THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THTensor *outputWindow, *inputWindow; + int nInputFrame, nOutputFrame; + long k; + + luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected"); + luaL_argcheck(L, input->size[1] == inputFrameSize, 2, "invalid input frame size"); + luaL_argcheck(L, input->size[0] >= kW, 2, "input sequence smaller than kernel size"); + + input = THTensor_(newContiguous)(input); + outputWindow = THTensor_(new)(); + inputWindow = THTensor_(new)(); + + nInputFrame = input->size[0]; + nOutputFrame = (nInputFrame - kW) / dW + 1; + + THTensor_(resize2d)(output, + nOutputFrame, + outputFrameSize); + + /* bias first */ + for(k = 0; k < nOutputFrame; k++) + { + THTensor_(select)(outputWindow, output, 0, k); + THTensor_(copy)(outputWindow, bias); + } + + /* ouch */ + for(k = 0; nOutputFrame > 0; k++) + { + long outputFrameStride = (kW-1)/dW+1; + long inputFrameStride = outputFrameStride*dW; + long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1; + nOutputFrame -= nFrame; + + THTensor_(setStorage2d)(inputWindow, input->storage, + input->storageOffset+k*dW*input->size[1], + nFrame, inputFrameStride*input->size[1], + kW*input->size[1], 1); + + THTensor_(setStorage2d)(outputWindow, output->storage, + output->storageOffset + k*output->size[1], + nFrame, outputFrameStride*output->size[1], + output->size[1], 1); + + THTensor_(transpose)(weight, NULL, 0, 1); + THTensor_(addmm)(outputWindow, 1, outputWindow, 1, inputWindow, weight); + THTensor_(transpose)(weight, NULL, 0, 1); + } + + THTensor_(free)(outputWindow); + THTensor_(free)(inputWindow); + THTensor_(free)(input); + + return 1; +} + +static int nn_(TemporalConvolution_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + long nInputFrame = input->size[0]; + long nOutputFrame = gradOutput->size[0]; + + THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor *gradOutputWindow; + THTensor *gradInputWindow; + long k; + + gradOutputWindow = THTensor_(new)(); + gradInputWindow = THTensor_(new)(); + + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + /* ouch */ + for(k = 0; nOutputFrame > 0; k++) + { + long outputFrameStride = (kW-1)/dW+1; + long inputFrameStride = outputFrameStride*dW; + long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1; + nOutputFrame -= nFrame; + + THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage, + gradOutput->storageOffset + k*gradOutput->size[1], + nFrame, outputFrameStride*gradOutput->size[1], + gradOutput->size[1], 1); + + THTensor_(setStorage2d)(gradInputWindow, gradInput->storage, + gradInput->storageOffset+k*dW*gradInput->size[1], + nFrame, inputFrameStride*gradInput->size[1], + kW*gradInput->size[1], 1); + + THTensor_(addmm)(gradInputWindow, 1, gradInputWindow, 1, gradOutputWindow, weight); + } + + THTensor_(free)(gradOutputWindow); + THTensor_(free)(gradInputWindow); + + return 1; +} + +static int nn_(TemporalConvolution_accGradParameters)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + real scale = luaL_optnumber(L, 4, 1); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + long nInputFrame = input->size[0]; + long nOutputFrame = gradOutput->size[0]; + + THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id)); + THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id)); + + THTensor *gradOutputWindow; + THTensor *inputWindow; + long k; + + input = THTensor_(newContiguous)(input); + gradOutputWindow = THTensor_(new)(); + inputWindow = THTensor_(new)(); + + /* bias first */ + for(k = 0; k < nOutputFrame; k++) + { + THTensor_(select)(gradOutputWindow, gradOutput, 0, k); + THTensor_(cadd)(gradBias, gradBias, scale, gradOutputWindow); + } + + /* ouch */ + for(k = 0; nOutputFrame > 0; k++) + { + long outputFrameStride = (kW-1)/dW+1; + long inputFrameStride = outputFrameStride*dW; + long nFrame = (nInputFrame-k*dW-kW)/inputFrameStride + 1; + nOutputFrame -= nFrame; + + THTensor_(setStorage2d)(inputWindow, input->storage, + input->storageOffset+k*dW*input->size[1], + nFrame, inputFrameStride*input->size[1], + kW*input->size[1], 1); + + THTensor_(setStorage2d)(gradOutputWindow, gradOutput->storage, + gradOutput->storageOffset + k*gradOutput->size[1], + nFrame, outputFrameStride*gradOutput->size[1], + gradOutput->size[1], 1); + + THTensor_(transpose)(gradOutputWindow, NULL, 0, 1); + THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutputWindow, inputWindow); + THTensor_(transpose)(gradOutputWindow, NULL, 0, 1); + } + + THTensor_(free)(gradOutputWindow); + THTensor_(free)(inputWindow); + THTensor_(free)(input); + + return 0; +} + +static const struct luaL_Reg nn_(TemporalConvolution__) [] = { + {"TemporalConvolution_updateOutput", nn_(TemporalConvolution_updateOutput)}, + {"TemporalConvolution_updateGradInput", nn_(TemporalConvolution_updateGradInput)}, + {"TemporalConvolution_accGradParameters", nn_(TemporalConvolution_accGradParameters)}, + {NULL, NULL} +}; + +static void nn_(TemporalConvolution_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(TemporalConvolution__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/TemporalSubSampling.c b/generic/TemporalSubSampling.c new file mode 100644 index 0000000..39e7f3b --- /dev/null +++ b/generic/TemporalSubSampling.c @@ -0,0 +1,139 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/TemporalSubSampling.c" +#else + +static int nn_(TemporalSubSampling_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int inputFrameSize = luaT_getfieldcheckint(L, 1, "inputFrameSize"); + + THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THTensor *outputFrame, *inputWindow; + int nInputFrame, nOutputFrame; + long k; + + luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected"); + luaL_argcheck(L, input->size[1] == inputFrameSize, 2, "invalid input frame size"); + luaL_argcheck(L, input->size[0] >= kW, 2, "input sequence smaller than kernel size"); + + outputFrame = THTensor_(new)(); + inputWindow = THTensor_(new)(); + + nInputFrame = input->size[0]; + nOutputFrame = (nInputFrame - kW) / dW + 1; + + THTensor_(resize2d)(output, + nOutputFrame, + inputFrameSize); + + for(k = 0; k < nOutputFrame; k++) + { + THTensor_(narrow)(inputWindow, input, 0, k*dW, kW); + THTensor_(select)(outputFrame, output, 0, k); + THTensor_(sum)(outputFrame, inputWindow, 0); + THTensor_(cmul)(outputFrame, outputFrame, weight); + THTensor_(cadd)(outputFrame, outputFrame, 1, bias); + } + + THTensor_(free)(outputFrame); + THTensor_(free)(inputWindow); + + return 1; +} + +static int nn_(TemporalSubSampling_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + + THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor *gradOutputFrame; + THTensor *gradInputWindow, *buffer, *kwunit; + long k; + + gradOutputFrame = THTensor_(new)(); + gradInputWindow = THTensor_(new)(); + buffer = THTensor_(new)(); + kwunit = THTensor_(newWithSize1d)(kW); + + THTensor_(fill)(kwunit, 1); + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + for(k = 0; k < gradOutput->size[0]; k++) + { + THTensor_(narrow)(gradInputWindow, gradInput, 0, k*dW, kW); + THTensor_(select)(gradOutputFrame, gradOutput, 0, k); + THTensor_(cmul)(buffer, weight, gradOutputFrame); + THTensor_(addr)(gradInputWindow, 1, gradInputWindow, 1, kwunit, buffer); + } + + THTensor_(free)(gradOutputFrame); + THTensor_(free)(gradInputWindow); + THTensor_(free)(buffer); + THTensor_(free)(kwunit); + + return 1; +} + +static int nn_(TemporalSubSampling_accGradParameters)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + real scale = luaL_optnumber(L, 4, 1); + + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + + THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id)); + THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id)); + + THTensor *gradOutputFrame; + THTensor *inputWindow, *buffer; + long k; + + + gradOutputFrame = THTensor_(new)(); + inputWindow = THTensor_(new)(); + buffer = THTensor_(new)(); + + for(k = 0; k < gradOutput->size[0]; k++) + { + THTensor_(narrow)(inputWindow, input, 0, k*dW, kW); + THTensor_(select)(gradOutputFrame, gradOutput, 0, k); + THTensor_(sum)(buffer, inputWindow, 0); + THTensor_(addcmul)(gradWeight, gradWeight, scale, buffer, gradOutputFrame); + THTensor_(cadd)(gradBias, gradBias, scale, gradOutputFrame); + } + + THTensor_(free)(gradOutputFrame); + THTensor_(free)(inputWindow); + THTensor_(free)(buffer); + + return 0; +} + +static const struct luaL_Reg nn_(TemporalSubSampling__) [] = { + {"TemporalSubSampling_updateOutput", nn_(TemporalSubSampling_updateOutput)}, + {"TemporalSubSampling_updateGradInput", nn_(TemporalSubSampling_updateGradInput)}, + {"TemporalSubSampling_accGradParameters", nn_(TemporalSubSampling_accGradParameters)}, + {NULL, NULL} +}; + +static void nn_(TemporalSubSampling_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(TemporalSubSampling__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/Threshold.c b/generic/Threshold.c new file mode 100644 index 0000000..760e842 --- /dev/null +++ b/generic/Threshold.c @@ -0,0 +1,47 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/Threshold.c" +#else + +static int nn_(Threshold_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + real val = luaT_getfieldchecknumber(L, 1, "val"); + real threshold = luaT_getfieldchecknumber(L, 1, "threshold"); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + THTensor_(resizeAs)(output, input); + TH_TENSOR_APPLY2(real, output, real, input, \ + *output_data = (*input_data > threshold) ? *input_data : val;); + + return 1; +} + +static int nn_(Threshold_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + real threshold = luaT_getfieldchecknumber(L, 1, "threshold"); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THTensor_(resizeAs)(gradInput, input); + TH_TENSOR_APPLY3(real, gradInput, real, gradOutput, real, input, \ + if ((*input_data) > threshold) *gradInput_data = 1; \ + else *gradInput_data = 0; \ + *gradInput_data = (*gradOutput_data) * (*gradInput_data);); + return 1; +} + +static const struct luaL_Reg nn_(Threshold__) [] = { + {"Threshold_updateOutput", nn_(Threshold_updateOutput)}, + {"Threshold_updateGradInput", nn_(Threshold_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(Threshold_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(Threshold__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/VolumetricConvolution.c b/generic/VolumetricConvolution.c new file mode 100644 index 0000000..0ec2247 --- /dev/null +++ b/generic/VolumetricConvolution.c @@ -0,0 +1,118 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricConvolution.c" +#else + +static int nn_(VolumetricConvolution_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + int dT = luaT_getfieldcheckint(L, 1, "dT"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + + THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_(Tensor_id)); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_(Tensor_id)); + + luaL_argcheck(L, input->nDimension == 4, 2, "4D tensor expected"); + + long nOutputPlane = weight->size[0]; + long nInputPlane = weight->size[1]; + long kT = weight->size[2]; + long kH = weight->size[3]; + long kW = weight->size[4]; + long inputDepth = input->size[1]; + long inputHeight = input->size[2]; + long inputWidth = input->size[3]; + long outputDepth = (inputDepth - kT) / dT + 1; + long outputWidth = (inputWidth - kW) / dW + 1; + long outputHeight = (inputHeight - kH) / dH + 1; + + THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth); + + /* add bias */ + long i; + THTensor *outn = THTensor_(new)(); + for (i=0; i<bias->size[0]; i++) { + THTensor_(select)(outn,output,0,i); + THTensor_(fill)(outn, THTensor_(get1d)(bias, i)); + } + THTensor_(free)(outn); + + /* do convolutions */ + THTensor_(conv3Dmv)(output, 1.0, 1.0, input, weight, dT, dH, dW, "V", "X"); + + return 1; +} + + +static int nn_(VolumetricConvolution_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + int dT = luaT_getfieldcheckint(L, 1, "dT"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); + + THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_(Tensor_id)); + + THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" ); + + /* gradient to input */ + THTensor *tweight = THTensor_(newTranspose)(weight,0,1); + THTensor_(conv3Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dT, dH, dW, "F", "C"); + THTensor_(free)(tweight); + + return 1; +} + +static int nn_(VolumetricConvolution_accGradParameters)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_(Tensor_id)); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_(Tensor_id)); + real scale = luaL_optnumber(L, 4, 1); + int dT = luaT_getfieldcheckint(L, 1, "dT"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane"); + + THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_(Tensor_id)); + THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_(Tensor_id)); + THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_(Tensor_id)); + + THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" ); + + long k; + + /* gradient to bias */ + real *gradBias_data = THTensor_(data)(gradBias); + THTensor* gradOutSlice = THTensor_(new)(); + for(k = 0; k < nOutputPlane; k++) + { + THTensor_(select)(gradOutSlice, gradOutput, 0, k); + gradBias_data[k] += scale*THTensor_(sumall)(gradOutSlice); + } + THTensor_(free)(gradOutSlice); + + /* gradient to kernels */ + THTensor_(conv3DRevger)(gradWeight, 1.0, scale, input, gradOutput, dT, dH, dW); + + return 0; +} + +static const struct luaL_Reg nn_(VolumetricConvolution__) [] = { + {"VolumetricConvolution_updateOutput", nn_(VolumetricConvolution_updateOutput)}, + {"VolumetricConvolution_updateGradInput", nn_(VolumetricConvolution_updateGradInput)}, + {"VolumetricConvolution_accGradParameters", nn_(VolumetricConvolution_accGradParameters)}, + {NULL, NULL} +}; + +static void nn_(VolumetricConvolution_init)(lua_State *L) +{ + luaT_pushmetaclass(L, torch_(Tensor_id)); + luaT_registeratname(L, nn_(VolumetricConvolution__), "nn"); + lua_pop(L,1); +} + +#endif @@ -0,0 +1,163 @@ +#include "TH.h" +#include "luaT.h" + +#define torch_(NAME) TH_CONCAT_3(torch_, Real, NAME) +#define torch_string_(NAME) TH_CONCAT_STRING_3(torch., Real, NAME) +#define nn_(NAME) TH_CONCAT_3(nn_, Real, NAME) + +static const void* torch_FloatTensor_id = NULL; +static const void* torch_DoubleTensor_id = NULL; + +#include "generic/Square.c" +#include "THGenerateFloatTypes.h" + +#include "generic/Sqrt.c" +#include "THGenerateFloatTypes.h" + +#include "generic/HardTanh.c" +#include "THGenerateFloatTypes.h" + +#include "generic/Exp.c" +#include "THGenerateFloatTypes.h" + +#include "generic/LogSigmoid.c" +#include "THGenerateFloatTypes.h" + +#include "generic/LogSoftMax.c" +#include "THGenerateFloatTypes.h" + +#include "generic/Sigmoid.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SoftPlus.c" +#include "THGenerateFloatTypes.h" + +#include "generic/Tanh.c" +#include "THGenerateFloatTypes.h" + +#include "generic/Abs.c" +#include "THGenerateFloatTypes.h" + +#include "generic/HardShrink.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SoftShrink.c" +#include "THGenerateFloatTypes.h" + +#include "generic/Threshold.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SoftMax.c" +#include "THGenerateFloatTypes.h" + +#include "generic/Max.c" +#include "THGenerateFloatTypes.h" + +#include "generic/Min.c" +#include "THGenerateFloatTypes.h" + +#include "generic/MSECriterion.c" +#include "THGenerateFloatTypes.h" + +#include "generic/AbsCriterion.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SparseLinear.c" +#include "THGenerateFloatTypes.h" + +#include "generic/TemporalConvolution.c" +#include "THGenerateFloatTypes.h" + +#include "generic/TemporalSubSampling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialConvolution.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialConvolutionMap.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialSubSampling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/SpatialMaxPooling.c" +#include "THGenerateFloatTypes.h" + +#include "generic/VolumetricConvolution.c" +#include "THGenerateFloatTypes.h" + +#include "generic/MultiMarginCriterion.c" +#include "THGenerateFloatTypes.h" + +#include "generic/MultiLabelMarginCriterion.c" +#include "THGenerateFloatTypes.h" + +DLL_EXPORT int luaopen_libnn(lua_State *L) +{ + torch_FloatTensor_id = luaT_checktypename2id(L, "torch.FloatTensor"); + torch_DoubleTensor_id = luaT_checktypename2id(L, "torch.DoubleTensor"); + + lua_newtable(L); + lua_pushvalue(L, -1); + lua_setfield(L, LUA_GLOBALSINDEX, "nn"); + + nn_FloatMin_init(L); + nn_FloatMax_init(L); + nn_FloatExp_init(L); + nn_FloatSqrt_init(L); + nn_FloatSquare_init(L); + nn_FloatHardTanh_init(L); + nn_FloatLogSoftMax_init(L); + nn_FloatMSECriterion_init(L); + nn_FloatAbsCriterion_init(L); + nn_FloatLogSigmoid_init(L); + nn_FloatSigmoid_init(L); + nn_FloatSoftMax_init(L); + nn_FloatSoftPlus_init(L); + nn_FloatTanh_init(L); + nn_FloatAbs_init(L); + nn_FloatHardShrink_init(L); + nn_FloatSoftShrink_init(L); + nn_FloatThreshold_init(L); + nn_FloatSparseLinear_init(L); + nn_FloatTemporalConvolution_init(L); + nn_FloatTemporalSubSampling_init(L); + nn_FloatSpatialConvolution_init(L); + nn_FloatSpatialConvolutionMap_init(L); + nn_FloatSpatialSubSampling_init(L); + nn_FloatSpatialMaxPooling_init(L); + nn_FloatVolumetricConvolution_init(L); + nn_FloatMultiMarginCriterion_init(L); + nn_FloatMultiLabelMarginCriterion_init(L); + + nn_DoubleMin_init(L); + nn_DoubleMax_init(L); + nn_DoubleExp_init(L); + nn_DoubleSqrt_init(L); + nn_DoubleSquare_init(L); + nn_DoubleHardTanh_init(L); + nn_DoubleLogSoftMax_init(L); + nn_DoubleMSECriterion_init(L); + nn_DoubleAbsCriterion_init(L); + nn_DoubleLogSigmoid_init(L); + nn_DoubleSigmoid_init(L); + nn_DoubleSoftMax_init(L); + nn_DoubleSoftPlus_init(L); + nn_DoubleTanh_init(L); + nn_DoubleAbs_init(L); + nn_DoubleHardShrink_init(L); + nn_DoubleSoftShrink_init(L); + nn_DoubleThreshold_init(L); + nn_DoubleSparseLinear_init(L); + nn_DoubleTemporalConvolution_init(L); + nn_DoubleTemporalSubSampling_init(L); + nn_DoubleSpatialConvolution_init(L); + nn_DoubleSpatialConvolutionMap_init(L); + nn_DoubleSpatialSubSampling_init(L); + nn_DoubleSpatialMaxPooling_init(L); + nn_DoubleVolumetricConvolution_init(L); + nn_DoubleMultiMarginCriterion_init(L); + nn_DoubleMultiLabelMarginCriterion_init(L); + + return 1; +} diff --git a/init.lua b/init.lua new file mode 100644 index 0000000..c6e7df0 --- /dev/null +++ b/init.lua @@ -0,0 +1,91 @@ +require('torch') +require('libnn') + +torch.include('nn', 'Module.lua') + +torch.include('nn', 'Concat.lua') +torch.include('nn', 'Parallel.lua') +torch.include('nn', 'Sequential.lua') + +torch.include('nn', 'Linear.lua') +torch.include('nn', 'SparseLinear.lua') +torch.include('nn', 'Reshape.lua') +torch.include('nn', 'Select.lua') +torch.include('nn', 'Narrow.lua') +torch.include('nn', 'Replicate.lua') + +torch.include('nn', 'Copy.lua') +torch.include('nn', 'Min.lua') +torch.include('nn', 'Max.lua') +torch.include('nn', 'Mean.lua') +torch.include('nn', 'Sum.lua') +torch.include('nn', 'CMul.lua') +torch.include('nn', 'Mul.lua') +torch.include('nn', 'Add.lua') + +torch.include('nn', 'CAddTable.lua') +torch.include('nn', 'CDivTable.lua') +torch.include('nn', 'CMulTable.lua') +torch.include('nn', 'CSubTable.lua') + +torch.include('nn', 'Euclidean.lua') +torch.include('nn', 'WeightedEuclidean.lua') +torch.include('nn', 'PairwiseDistance.lua') +torch.include('nn', 'CosineDistance.lua') +torch.include('nn', 'DotProduct.lua') + +torch.include('nn', 'Exp.lua') +torch.include('nn', 'HardTanh.lua') +torch.include('nn', 'LogSigmoid.lua') +torch.include('nn', 'LogSoftMax.lua') +torch.include('nn', 'Sigmoid.lua') +torch.include('nn', 'SoftMax.lua') +torch.include('nn', 'SoftMin.lua') +torch.include('nn', 'SoftPlus.lua') +torch.include('nn', 'SoftSign.lua') +torch.include('nn', 'Tanh.lua') +torch.include('nn', 'Abs.lua') +torch.include('nn', 'Power.lua') +torch.include('nn', 'Square.lua') +torch.include('nn', 'Sqrt.lua') +torch.include('nn', 'HardShrink.lua') +torch.include('nn', 'SoftShrink.lua') +torch.include('nn', 'Threshold.lua') + +torch.include('nn', 'LookupTable.lua') +torch.include('nn', 'SpatialConvolution.lua') +torch.include('nn', 'SpatialConvolutionMap.lua') +torch.include('nn', 'SpatialSubSampling.lua') +torch.include('nn', 'SpatialMaxPooling.lua') +torch.include('nn', 'SpatialLPPooling.lua') +torch.include('nn', 'TemporalConvolution.lua') +torch.include('nn', 'TemporalSubSampling.lua') +torch.include('nn', 'SpatialSubtractiveNormalization.lua') +torch.include('nn', 'SpatialZeroPadding.lua') + +torch.include('nn', 'VolumetricConvolution.lua') + +torch.include('nn', 'ParallelTable.lua') +torch.include('nn', 'ConcatTable.lua') +torch.include('nn', 'SplitTable.lua') +torch.include('nn', 'JoinTable.lua') +torch.include('nn', 'CriterionTable.lua') +torch.include('nn', 'Identity.lua') + +torch.include('nn', 'Criterion.lua') +torch.include('nn', 'MSECriterion.lua') +torch.include('nn', 'MarginCriterion.lua') +torch.include('nn', 'AbsCriterion.lua') +torch.include('nn', 'ClassNLLCriterion.lua') +torch.include('nn', 'MultiCriterion.lua') +torch.include('nn', 'L1HingeEmbeddingCriterion.lua') +torch.include('nn', 'HingeEmbeddingCriterion.lua') +torch.include('nn', 'CosineEmbeddingCriterion.lua') +torch.include('nn', 'MarginRankingCriterion.lua') +torch.include('nn', 'MultiMarginCriterion.lua') +torch.include('nn', 'MultiLabelMarginCriterion.lua') + +torch.include('nn', 'StochasticGradient.lua') + +torch.include('nn', 'Jacobian.lua') +torch.include('nn', 'test.lua') diff --git a/test/test.lua b/test/test.lua new file mode 100644 index 0000000..c18d3a2 --- /dev/null +++ b/test/test.lua @@ -0,0 +1,1029 @@ +require 'torch' +require 'random' + +local mytester = torch.Tester() +local jac + +local precision = 1e-5 + +local nntest = {} +local nntestx = {} + +function nntest.Add() + local ini = math.random(10,20) + local inj = math.random(10,20) + local ink = math.random(10,20) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.Add(ini*inj*ink) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err,precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err,precision, 'error on bias [direct update]') + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.CMul() + local ini = math.random(5,15) + local inj = math.random(5,15) + local ink = math.random(5,15) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.CMul(ini*inj*ink) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err,precision, 'error on weight [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Exp() + local ini = math.random(10,20) + local inj = math.random(10,20) + local ink = math.random(10,20) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.Exp() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.HardTanh() + local ini = math.random(5,10) + local inj = math.random(5,10) + local ink = math.random(5,10) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.HardTanh() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision , 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Abs() + local ini = math.random(5,10) + local inj = math.random(5,10) + local ink = math.random(5,10) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.Abs() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision , 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Threshold() + local ini = math.random(5,10) + local inj = math.random(5,10) + local ink = math.random(5,10) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.Threshold(random.uniform(-2,2),random.uniform(-2,2)) + + local err = nn.Jacobian.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = nn.Jacobian.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.HardShrink() + local ini = math.random(5,10) + local inj = math.random(5,10) + local ink = math.random(5,10) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.HardShrink(math.random()/2) + + local err = nn.Jacobian.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = nn.Jacobian.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.SoftShrink() + local ini = math.random(5,10) + local inj = math.random(5,10) + local ink = math.random(5,10) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.SoftShrink(math.random()/2) + + local err = nn.Jacobian.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = nn.Jacobian.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Power() + local in1 = torch.rand(10,20) + local module = nn.Power(2) + local out = module:forward(in1) + local err = out:dist(in1:cmul(in1)) + mytester:asserteq(err, 0, torch.typename(module) .. ' - forward err ') + + local ini = math.random(5,10) + local inj = math.random(5,10) + local ink = math.random(5,10) + local pw = random.uniform()*math.random(1,10) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.Power(pw) + + local err = nn.Jacobian.testJacobian(module, input, 0.1, 2) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = nn.Jacobian.testIO(module,input, 0.1, 2) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Square() + local in1 = torch.rand(10,20) + local module = nn.Square() + local out = module:forward(in1) + local err = out:dist(in1:cmul(in1)) + mytester:asserteq(err, 0, torch.typename(module) .. ' - forward err ') + + local ini = math.random(5,10) + local inj = math.random(5,10) + local ink = math.random(5,10) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.Square() + + local err = nn.Jacobian.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = nn.Jacobian.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Sqrt() + local in1 = torch.rand(10,20) + local module = nn.Sqrt() + local out = module:forward(in1) + local err = out:dist(in1:sqrt()) + mytester:asserteq(err, 0, torch.typename(module) .. ' - forward err ') + + local ini = math.random(5,10) + local inj = math.random(5,10) + local ink = math.random(5,10) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.Sqrt() + + local err = nn.Jacobian.testJacobian(module, input, 0.1, 2) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = nn.Jacobian.testIO(module, input, 0, 2) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Linear() + local ini = math.random(50,70) + local inj = math.random(50,70) + local input = torch.Tensor(ini):zero() + local module = nn.Linear(ini,inj) + + -- 1D + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err,precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err,precision, 'error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err,precision, 'error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + -- 2D + local nframe = math.random(50,70) + local input = torch.Tensor(nframe, ini):zero() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err,precision, 'error on weight ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err,precision, 'error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err,precision, 'error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + -- IO + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Euclidean() + local ini = math.random(50,70) + local inj = math.random(50,70) + local input = torch.Tensor(ini):zero() + local module = nn.Euclidean(ini,inj) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.WeightedEuclidean() + local ini = math.random(10,20) + local inj = math.random(10,20) + local input = torch.Tensor(ini):zero() + local module = nn.WeightedEuclidean(ini,inj) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err,precision, 'error on bias ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.LogSigmoid() + local ini = math.random(10,20) + local inj = math.random(10,20) + local ink = math.random(10,20) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.LogSigmoid() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.LogSoftmax() + local ini = math.random(10,20) + local inj = math.random(10,20) + local input = torch.Tensor(ini,inj):zero() + local module = nn.LogSoftMax() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +-- function nntest.TemporalLogSoftmax() +-- local ini = math.random(10,20) +-- local inj = math.random(10,20) +-- local input = torch.Tensor(ini,inj):zero() +-- local module = nn.TemporalLogSoftMax() + +-- local err = jac.testJacobian(module,input) +-- mytester:assertlt(err,precision, 'error on state ') + +-- local ferr,berr = jac.testIO(module,input) +-- mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') +-- mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +-- end + +function nntest.Max() + local ini = math.random(10,20) + local inj = math.random(10,20) + local ink = math.random(10,20) + local input = torch.Tensor(ini,inj*ink):zero() + local module = nn.Max(1) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Min() + local ini = math.random(10,20) + local inj = math.random(10,20) + local ink = math.random(10,20) + local input = torch.Tensor(ini,inj*ink):zero() + local module = nn.Min(1) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Mean() + local ini = math.random(10,20) + local inj = math.random(10,20) + local ink = math.random(10,20) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.Mean(random.random(1,3)) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Mul() + local ini = math.random(10,20) + local inj = math.random(10,20) + local ink = math.random(10,20) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.Mul(ini*inj*ink) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err,precision, 'error on weight ') + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err,precision, 'error on weight [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Sigmoid() + local ini = math.random(10,20) + local inj = math.random(10,20) + local ink = math.random(10,20) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.Sigmoid() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Softmax() + local ini = math.random(10,20) + local ink = math.random(10,20) + local input = torch.Tensor(ink, ini):zero() + local module = nn.SoftMax() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Softmin() + local ini = math.random(10,20) + local ink = math.random(10,20) + local input = torch.Tensor(ink, ini):zero() + local module = nn.SoftMin() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Softsign() + local ini = math.random(10,20) + local ink = math.random(10,20) + local input = torch.Tensor(ink, ini):zero() + local module = nn.SoftSign() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.SoftPlus() + local ini = math.random(10,20) + local inj = math.random(10,20) + local ink = math.random(10,20) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.SoftPlus() + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.SpatialSubtractiveNormalization_2dkernel() + local inputSize = math.random(11,20) + local kersize = 9 + local nbfeatures = math.random(5,10) + local kernel = torch.Tensor(kersize,kersize):fill(1) + local module = nn.SpatialSubtractiveNormalization(nbfeatures,kernel) + local input = torch.rand(nbfeatures,inputSize,inputSize) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.SpatialSubtractiveNormalization_1dkernel() + local inputSize = math.random(11,20) + local kersize = 9 + local nbfeatures = math.random(5,10) + local kernel = torch.Tensor(kersize):fill(1) + local module = nn.SpatialSubtractiveNormalization(nbfeatures,kernel) + local input = torch.rand(nbfeatures,inputSize,inputSize) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.SpatialConvolution() + local from = math.random(1,10) + local to = math.random(1,10) + local ki = math.random(1,10) + local kj = math.random(1,10) + local si = math.random(1,4) + local sj = math.random(1,4) + local outi = math.random(10,20) + local outj = math.random(10,20) + local ini = (outi-1)*si+ki + local inj = (outj-1)*sj+kj + local module = nn.SpatialConvolution(from, to, ki, kj, si, sj) + local input = torch.Tensor(from, inj, ini):zero() + + -- stochastic + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + -- batch + + --verbose = true + local batch = math.random(2,5) + outi = math.random(4,8) + outj = math.random(4,8) + ini = (outi-1)*si+ki + inj = (outj-1)*sj+kj + module = nn.SpatialConvolution(from, to, ki, kj, si, sj) + input = torch.Tensor(batch,from,inj,ini):zero() + +-- print(from, to, ki, kj, si, sj, batch, ini, inj) +-- print(module.weight:size()) +-- print(module.gradWeight:size()) + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'batch error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'batch error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'batch error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'batch error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'batch error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'batch error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.SpatialConvolutionMap() + local from = math.random(1,10) + local fanin = math.random(1, from) + local to = math.random(1,10) + local ki = math.random(1,10) + local kj = math.random(1,10) + local si = math.random(1,4) + local sj = math.random(1,4) + local outi = math.random(10,20) + local outj = math.random(10,20) + local ini = (outi-1)*si+ki + local inj = (outj-1)*sj+kj + + local module = nn.SpatialConvolutionMap(nn.tables.random(from, to, fanin), ki, kj, si, sj) + local input = torch.Tensor(from, inj, ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ') +end + +function batchcompare(smod, sin, plist) + local bs = torch.LongStorage(sin:size():size()+1) + bs[1] = 1 + for i=1,sin:size():size() do bs[i+1] = sin:size()[i] end + local bin = torch.Tensor(bs):copy(sin) + local bmod = smod:clone() + + local sout = smod:forward(sin):clone() + local bout = bmod:forward(bin):clone() + + local sgout = torch.randn(sout:size()) + local bgout = torch.Tensor(bout:size()) + bgout:copy(sgout) + + local sgin = smod:backward(sin, sgout) + local bgin = bmod:backward(bin, bgout) + + smod:accGradParameters(sin, sgout, 1) + bmod:accGradParameters(bin, bgout, 1) + + mytester:assertTensorEq(sout,bout:select(1,1), 1e-8, 'batchcompare error on output') + mytester:assertTensorEq(sgin,bgin:select(1,1), 1e-8, 'batchcompare error on gradInput') + + for i,v in pairs(plist) do + mytester:assertTensorEq(smod[v],bmod[v], 1e-8, 'batchcompare error on ' .. v) + end +end + +function nntest.SpatialConvolutionBatchCompare() + local from = math.random(1,10) + local to = math.random(1,10) + local ki = math.random(1,10) + local kj = math.random(1,10) + local si = math.random(1,4) + local sj = math.random(1,4) + local outi = math.random(10,20) + local outj = math.random(10,20) + local ini = (outi-1)*si+ki + local inj = (outj-1)*sj+kj + + local module = nn.SpatialConvolution(from, to, ki, kj, si, sj) + local input = torch.randn(from,inj,ini) + + batchcompare(module,input, {'weight','bias','gradWeight','gradBias'}) +end + +function nntest.SpatialSubSamplingBatchCompare() + local from = math.random(1,10) + local ki = math.random(1,10) + local kj = math.random(1,10) + local si = math.random(1,4) + local sj = math.random(1,4) + local outi = math.random(10,20) + local outj = math.random(10,20) + local ini = (outi-1)*si+ki + local inj = (outj-1)*sj+kj + local module = nn.SpatialSubSampling(from, ki, kj, si, sj) + local input = torch.randn(from,inj,ini)--torch.Tensor(from, inj, ini):zero() + + batchcompare(module,input, {'weight','bias','gradWeight','gradBias'}) +end + +function nntest.SpatialSubSampling() + local from = math.random(1,10) + local ki = math.random(1,10) + local kj = math.random(1,10) + local si = math.random(1,4) + local sj = math.random(1,4) + local outi = math.random(10,20) + local outj = math.random(10,20) + local ini = (outi-1)*si+ki + local inj = (outj-1)*sj+kj + local module = nn.SpatialSubSampling(from, ki, kj, si, sj) + local input = torch.Tensor(from, inj, ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + --verbose = true + local batch = math.random(2,5) + outi = math.random(4,8) + outj = math.random(4,8) + ini = (outi-1)*si+ki + inj = (outj-1)*sj+kj + module = nn.SpatialSubSampling(from, ki, kj, si, sj) + input = torch.Tensor(batch,from,inj,ini):zero() + +-- print(from, to, ki, kj, si, sj, batch, ini, inj) +-- print(module.weight:size()) +-- print(module.gradWeight:size()) + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'batch error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'batch error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'batch error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'batch error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'batch error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'batch error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'batch error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.SpatialMaxPooling() + local fanin = math.random(1,4) + local osizex = math.random(1,20) + local osizey = math.random(1,20) + local mx = math.random(2,4) + local my = math.random(2,4) + local sizex = osizex*mx + local sizey = osizey*my + local module = nn.SpatialMaxPooling(mx,my,mx,my) + local input = torch.rand(fanin,sizey,sizex) + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.SpatialLPPooling() + local fanin = math.random(1,4) + local osizex = math.random(1,4) + local osizey = math.random(1,4) + local p = math.random(1,4) + local mx = math.random(2,8) + local my = math.random(2,8) + local dx = math.random(2,mx) + local dy = math.random(2,my) + local sizex = osizex*mx + local sizey = osizey*my + local module = nn.SpatialLPPooling(fanin,p,mx,my,dx,dy) + local input = torch.rand(fanin,sizey,sizex) + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Sum() + local ini = math.random(10,20) + local inj = math.random(10,20) + local ink = math.random(10,20) + local input = torch.Tensor(ini,inj,ink):zero() + local module = nn.Sum(random.random(1,3)) + + local err = jac.testJacobian(module,input) + mytester:assertlt(err,precision, 'error on state ') + + local ferr,berr = jac.testIO(module,input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.Tanh() + local ini = math.random(5,10) + local inj = math.random(5,10) + local ink = math.random(5,10) + local input = torch.Tensor(ink, inj, ini):zero() + + local module = nn.Tanh() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision , 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.TemporalConvolution() + local from = math.random(1,10) + local to = math.random(1,10) + local ki = math.random(1,10) + local si = math.random(1,4) + local outi = math.random(10,20) + local ini = (outi-1)*si+ki + local module = nn.TemporalConvolution(from, to, ki,si) + local input = torch.Tensor(ini, from):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update]') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update]') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.TemporalSubSampling() + local from = math.random(1,10) + local ki = math.random(1,10) + local si = math.random(1,4) + local outi = math.random(10,20) + local ini = (outi-1)*si+ki + local module = nn.TemporalSubSampling(from, ki, si) + local input = torch.Tensor(ini, from):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ') +end + +function nntest.VolumetricConvolution() + local from = math.random(2,5) + local to = math.random(2,5) + local kt = math.random(3,7) + local ki = math.random(3,7) + local kj = math.random(3,7) + local st = math.random(2,4) + local si = math.random(2,4) + local sj = math.random(2,4) + local outt = math.random(3,7) + local outi = math.random(3,7) + local outj = math.random(3,7) + local int = (outt-1)*st+kt + local ini = (outi-1)*si+ki + local inj = (outj-1)*sj+kj + local module = nn.VolumetricConvolution(from, to, kt, ki, kj, st, si, sj) + local input = torch.Tensor(from, int, inj, ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight) + mytester:assertlt(err , precision, 'error on weight ') + + local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias) + mytester:assertlt(err , precision, 'error on bias ') + + local err = jac.testJacobianUpdateParameters(module, input, module.weight) + mytester:assertlt(err , precision, 'error on weight [direct update] ') + + local err = jac.testJacobianUpdateParameters(module, input, module.bias) + mytester:assertlt(err , precision, 'error on bias [direct update] ') + + for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do + mytester:assertlt(err, precision, string.format( + 'error on weight [%s]', t)) + end + + for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do + mytester:assertlt(err, precision, string.format( + 'error on bias [%s]', t)) + end + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ') +end + + +mytester:add(nntest) +--mytester:add(test_SpatialConvolution) +--mytester:add(test_AbsCriterion) + +if not nn then + require 'nn' + jac = nn.Jacobian + mytester:run() +else + jac = nn.Jacobian + function nn.test() + -- randomize stuff + math.randomseed(os.time()) + mytester:run() + end +end |