Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/nn.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZeming Lin <misterabc@devgpu029.prn2.facebook.com>2016-03-08 00:12:37 +0300
committerZeming Lin <misterabc@devgpu013.ash5.facebook.com>2016-03-12 00:06:21 +0300
commit497e8cdc261d1ca9708722dcf2d723ed5f9f1e14 (patch)
treee2fdacf89df4977e03fa82780ef64d09de599e8c /SparseLinear.lua
parent3656a5762054897eeea52922d24b9f3c08c27819 (diff)
Adding table input support for batched SparseLinear, implementing gradInput correctly, fixing other bugs
Diffstat (limited to 'SparseLinear.lua')
-rw-r--r--SparseLinear.lua196
1 files changed, 137 insertions, 59 deletions
diff --git a/SparseLinear.lua b/SparseLinear.lua
index 77ef7c3..6185df9 100644
--- a/SparseLinear.lua
+++ b/SparseLinear.lua
@@ -1,19 +1,25 @@
local THNN = require 'nn.THNN'
local SparseLinear, parent = torch.class('nn.SparseLinear', 'nn.Module')
-function SparseLinear:__init(inputSize, outputSize)
+local NO_LAST_INPUT = 0
+local ONE_LAST_INPUT = 1
+local ACC_MULTIPLE_TIMES = 2
+
+function SparseLinear:__init(inputSize, outputSize, doGradInput)
parent.__init(self)
self.weightDecay = 0
+ self.doGradInput = doGradInput or false
self.weight = torch.Tensor(outputSize, inputSize):zero()
self.bias = torch.Tensor(outputSize):zero()
self.gradWeight = torch.Tensor(outputSize, inputSize):zero()
self.gradBias = torch.Tensor(outputSize):zero()
- self.lastInput = nil
- if torch.getnumthreads() > 1 and outputSize >= 128 then
- self.shardBuffer = torch.Tensor(outputSize, torch.getnumthreads())
- end
+ assert(type(self.doGradInput) == type(true))
+
+ self.lastInput = nil
+ self.sparseUpdate = NO_LAST_INPUT
+ self.formatted_input = nil
-- state
self.gradInput:resize(inputSize)
@@ -33,78 +39,148 @@ function SparseLinear:reset(stdv)
end
function SparseLinear:reshapeInput(input)
- if input:dim() == 2 then
- return input:view(1, input:size(1), input:size(2)), false
+ if type(input) == 'table' then
+ return input, true, false
else
- return input, true
+ if input:dim() == 2 then
+ return {input}, false, false
+ else
+ return input, true, true
+ end
end
end
function SparseLinear:updateOutput(input)
- self.cudaBuffer = self.cudaBuffer or input.new()
- local input, batchMode = self:reshapeInput(input)
-
- input.THNN.SparseLinear_updateOutput(
- input:cdata(),
- self.output:cdata(),
- self.weight:cdata(),
- self.bias:cdata(),
- self.cudaBuffer:cdata(),
- THNN.optionalTensor(self.shardBuffer)
- )
-
- -- fix output size for batchSize = 1
- if not batchMode then
- self.output:set(self.output:view(self.output:size(2)))
- end
+ local input, batchMode, legacyMode = self:reshapeInput(input)
+ self.legacyMode = legacyMode
- return self.output
-end
+ if legacyMode then
+ input.THNN.SparseLinear_legacyUpdateOutput(
+ input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata()
+ )
+ else
+ local nbatches = #input
+ if nbatches == 0 then
+ self.output:copy(self.bias)
+ return self.output
+ end
-function SparseLinear:accGradParameters(input, gradOutput, scale)
- local input, batchMode = self:reshapeInput(input)
+ local size = 0
+ local marker = 1
+ self.formatted_input = self.formatted_input or input[1].new()
+
+ for i,v in ipairs(input) do size = size + input[i]:size(1) end
+ self.formatted_input:resize(size, 3)
+ for i,v in ipairs(input) do
+ local buf = self.formatted_input:narrow(1, marker, input[i]:size(1))
+ buf:narrow(2,2,2):copy(input[i])
+ buf:select(2,1):fill(i)
+ marker = marker + input[i]:size(1)
+ end
- self.lastInput = self.lastInput or input.new()
- self.lastInput:resizeAs(input):copy(input)
- if not batchMode then
- gradOutput = gradOutput:view(1, gradOutput:size(1))
+ self.output:resize(nbatches, self.weight:size(1))
+ input[1].THNN.SparseLinear_updateOutput(
+ self.formatted_input:cdata(),
+ self.output:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata()
+ )
+
+ -- fix output size for batchSize = 1
+ if not batchMode then
+ self.output = self.output[1]
+ end
end
- input.THNN.SparseLinear_accGradParameters(
- input:cdata(),
- gradOutput:cdata(),
- self.gradWeight:cdata(),
- self.gradBias:cdata(),
- self.weight:cdata(),
- self.bias:cdata(),
- self.weightDecay or 0,
- scale or 1
- )
+ return self.output
end
-function SparseLinear:updateGradInput(input, gradOutput)
- if self.gradInput then
- local input, batchMode = self:reshapeInput(input)
- if not batchMode then
- gradOutput = gradOutput:view(1, gradOutput:size(1))
+function SparseLinear:accGradParameters(input, gradOutput, scale)
+ local input, batchMode, legacyMode = self:reshapeInput(input)
+ self.legacyMode = legacyMode
+
+ if legacyMode then
+ self.lastInput = self.lastInput or input.new()
+ if self.sparseUpdate == NO_LAST_INPUT then
+ self.lastInput:resizeAs(input):copy(input)
+ self.sparseUpdate = ONE_LAST_INPUT
+ elseif self.sparseUpdate == ONE_LAST_INPUT then
+ self.sparseUpdate = ACC_MULTIPLE_TIMES
end
- input.THNN.SparseLinear_updateGradInput(
+
+ input.THNN.SparseLinear_legacyAccGradParameters(
input:cdata(),
gradOutput:cdata(),
- self.gradInput:cdata(),
- self.weight:cdata()
+ self.gradWeight:cdata(),
+ self.gradBias:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.weightDecay or 0,
+ scale or 1
)
- -- fix gradInput size for batchSize = 1
+ else
if not batchMode then
- self.gradInput:set(self.gradInput:view(self.gradInput:size(2), self.gradInput:size(3)))
+ gradOutput:resize(1, gradOutput:size(1))
end
- return self.gradInput
+ input[1].THNN.SparseLinear_accGradParameters(
+ self.formatted_input:cdata(),
+ gradOutput:cdata(),
+ self.gradWeight:cdata(),
+ self.gradBias:cdata(),
+ self.weight:cdata(),
+ self.bias:cdata(),
+ self.weightDecay or 0,
+ scale or 1
+ )
end
end
+function SparseLinear:updateGradInput(input, gradOutput)
+ if self.legacyMode then
+ if type(self.gradInput) ~= type(gradOutput) then self.gradInput = gradOutput.new() end
+ self.gradInput:resizeAs(input)
+ else
+ self.gradInput = {}
+ end
+ if self.doGradInput then
+ -- GradInput should be dense anyway
+ local gi
+ local batchMode = true
+ if gradOutput:dim() == 1 then
+ gi = self.weight:t()*gradOutput
+ batchMode = false
+ elseif gradOutput:dim() == 2 then
+ gi = gradOutput*self.weight
+ end
+ local ini = self.weight:size(2)
+
+ if self.legacyMode then
+ local batches = self.gradInput:size(1)
+ self.gradInput:resize(batches, ini, 2)
+ self.gradInput:select(3,1):copy(torch.repeatTensor(torch.range(1, ini), batches, 1))
+ self.gradInput:select(3,2):copy(gi)
+ else
+ indicies = torch.range(1, ini)
+ if not batchMode then gi:resize(1, ini) end
+ for i = 1,gi:size(1) do
+ self.gradInput[i] = gradOutput.new(ini, 2)
+ self.gradInput[i]:select(2, 2):copy(gi[i])
+ self.gradInput[i]:select(2, 1):range(1, ini)
+ end
+ end
+ end
+ return self.gradInput
+end
+
+-- These functions do sparse updates / zeros. However, if we accumulated
+-- gradients multiple times, we can't depend on the last input to do sparse
+-- updates.
function SparseLinear:updateParameters(learningRate)
- if self.lastInput then
+ if self.lastInput and self.legacyMode and self.sparseUpdate == ONE_LAST_INPUT then
self.lastInput.THNN.SparseLinear_updateParameters(
self.weight:cdata(),
self.bias:cdata(),
@@ -116,22 +192,24 @@ function SparseLinear:updateParameters(learningRate)
else
parent.updateParameters(self, learningRate)
end
+ self.sparseUpdate = 0
end
function SparseLinear:zeroGradParameters()
- if self.lastInput then
+ if self.lastInput and self.legacyMode and self.sparseUpdate == ONE_LAST_INPUT then
self.lastInput.THNN.SparseLinear_zeroGradParameters(
- self.gradWeight:cdata(),
- self.gradBias:cdata(),
- self.lastInput:cdata()
+ self.gradWeight:cdata(),
+ self.gradBias:cdata(),
+ self.lastInput:cdata()
)
else
parent.zeroGradParameters(self)
end
+ self.sparseUpdate = 0
end
function SparseLinear:clearState()
if self.lastInput then self.lastInput:set() end
- if self.cudaBuffer then self.cudaBuffer:set() end
+ input.THNN.SparseLinear_cudaClearState()
return parent.clearState(self)
end