diff options
-rw-r--r-- | CMulTable.lua | 16 | ||||
-rw-r--r-- | Module.lua | 8 | ||||
-rw-r--r-- | SpatialConvolutionCUDA.lua | 63 | ||||
-rw-r--r-- | SpatialMaxPoolingCUDA.lua | 24 | ||||
-rw-r--r-- | Transpose.lua | 29 | ||||
-rw-r--r-- | init.lua | 3 |
6 files changed, 140 insertions, 3 deletions
diff --git a/CMulTable.lua b/CMulTable.lua index f82776b..0e327be 100644 --- a/CMulTable.lua +++ b/CMulTable.lua @@ -14,7 +14,7 @@ function CMulTable:updateOutput(input) return self.output end -function CMulTable:updateGradInput(input, gradOutput) +function CMulTable:updateGradInput_efficient(input, gradOutput) self.tout = self.tout or input[1].new() self.tout:resizeAs(self.output) for i=1,#input do @@ -25,3 +25,17 @@ function CMulTable:updateGradInput(input, gradOutput) end return self.gradInput end + +function CMulTable:updateGradInput(input, gradOutput) + for i=1,#input do + self.gradInput[i] = self.gradInput[i] or input[1].new() + self.gradInput[i]:resizeAs(input[i]):copy(gradOutput) + for j=1,#input do + if i~=j then + self.gradInput[i]:cmul(input[j]) + end + end + end + return self.gradInput +end + @@ -192,8 +192,12 @@ function Module:getParameters() local k, v = unpack(storageAndOffset) flatParameters[{{v+1,v+k:size()}}]:copy(Tensor():set(k)) end - for k = 1,flatUsedParameters:nElement() do - flatUsedParameters[k] = flatParameters[k+cumSumOfHoles[k] ] + if cumSumOfHoles:sum() == 0 then + flatUsedParameters:copy(flatParameters) + else + for k = 1,flatUsedParameters:nElement() do + flatUsedParameters[k] = flatParameters[k+cumSumOfHoles[k]] + end end return flatUsedParameters end diff --git a/SpatialConvolutionCUDA.lua b/SpatialConvolutionCUDA.lua new file mode 100644 index 0000000..30cb557 --- /dev/null +++ b/SpatialConvolutionCUDA.lua @@ -0,0 +1,63 @@ +local SpatialConvolutionCUDA, parent = torch.class('nn.SpatialConvolutionCUDA', 'nn.Module') + +function SpatialConvolutionCUDA:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH) + parent.__init(self) + + dW = dW or 1 + dH = dH or 1 + + self.nInputPlane = nInputPlane + self.nOutputPlane = nOutputPlane + self.kW = kW + self.kH = kH + self.dW = dW + self.dH = dH + + self.weight = torch.Tensor(nInputPlane, kH, kW, nOutputPlane) + self.bias = torch.Tensor(nOutputPlane) + self.gradWeight = torch.Tensor(nInputPlane, kH, kW, nOutputPlane) + self.gradBias = torch.Tensor(nOutputPlane) + + self:reset() +end + +function SpatialConvolutionCUDA:reset(stdv) + if stdv then + stdv = stdv * math.sqrt(3) + else + stdv = 1/math.sqrt(self.kW*self.kH*self.nInputPlane) + end + self.weight:uniform(-stdv, stdv) + self.bias:uniform(-stdv, stdv) +end + +function SpatialConvolutionCUDA:updateOutput(input) + input.nn.SpatialConvolutionCUDA_updateOutput(self, input) + for i = 1,self.nOutputPlane do + self.output[i]:add(self.bias[i]) + end + return self.output +end + +function SpatialConvolutionCUDA:updateGradInput(input, gradOutput) + input.nn.SpatialConvolutionCUDA_updateGradInput(self, input, gradOutput) + return self.gradInput +end + +function SpatialConvolutionCUDA:accGradParameters(input, gradOutput, scale) + input.nn.SpatialConvolutionCUDA_accGradParameters(self, input, gradOutput, scale) + for i = 1,self.nOutputPlane do + self.gradBias:narrow(1,i,1):add(scale * gradOutput[i]:sum() ) + end +end + +-- this routine copies weight+bias from a regular SpatialConvolution module +function SpatialConvolutionCUDA:copy(sc) + local weight = sc.weight:clone() + weight:resize(sc.nOutputPlane, sc.nInputPlane * sc.kH * sc.kW) + weight = weight:t():contiguous() + weight:resize(sc.nInputPlane, sc.kH, sc.kW, sc.nOutputPlane) + self.weight:copy(weight) + self.bias:copy(sc.bias) +end + diff --git a/SpatialMaxPoolingCUDA.lua b/SpatialMaxPoolingCUDA.lua new file mode 100644 index 0000000..f40dcc8 --- /dev/null +++ b/SpatialMaxPoolingCUDA.lua @@ -0,0 +1,24 @@ +local SpatialMaxPoolingCUDA, parent = torch.class('nn.SpatialMaxPoolingCUDA', 'nn.Module') + +function SpatialMaxPoolingCUDA:__init(kW, kH, dW, dH) + parent.__init(self) + + dW = dW or kW + dH = dH or kH + + self.kW = kW + self.kH = kH + self.dW = dW + self.dH = dH +end + +function SpatialMaxPoolingCUDA:updateOutput(input) + input.nn.SpatialMaxPoolingCUDA_updateOutput(self, input) + return self.output +end + +function SpatialMaxPoolingCUDA:updateGradInput(input, gradOutput) + input.nn.SpatialMaxPoolingCUDA_updateGradInput(self, input, gradOutput) + return self.gradInput +end + diff --git a/Transpose.lua b/Transpose.lua new file mode 100644 index 0000000..a43729b --- /dev/null +++ b/Transpose.lua @@ -0,0 +1,29 @@ +local Transpose, parent = torch.class('nn.Transpose', 'nn.Module') + +-- transpose dimensions: +-- n = nn.Transpose({1,4},{1,3}) +-- will transpose dims 1 and 4, then 1 and 3... + +function Transpose:__init(...) + parent.__init(self) + self.permutations = {...} +end + +function Transpose:updateOutput(input) + for _,perm in ipairs(self.permutations) do + input = input:transpose(perm[1],perm[2]) + end + self.output:resizeAs(input):copy(input) + return self.output +end + +function Transpose:updateGradInput(input, gradOutput) + local ndim = gradOutput:nDimension() + for i = #self.permutations,1,-1 do + local perm = self.permutations[i] + gradOutput = gradOutput:transpose(perm[1],perm[2]) + end + self.gradInput:resizeAs(gradOutput):copy(gradOutput) + return self.gradInput +end + @@ -13,6 +13,7 @@ include('Reshape.lua') include('Select.lua') include('Narrow.lua') include('Replicate.lua') +include('Transpose.lua') include('Copy.lua') include('Min.lua') @@ -59,9 +60,11 @@ include('SpatialConvolution.lua') include('SpatialFullConvolution.lua') include('SpatialFullConvolutionMap.lua') include('SpatialConvolutionMM.lua') +include('SpatialConvolutionCUDA.lua') include('SpatialConvolutionMap.lua') include('SpatialSubSampling.lua') include('SpatialMaxPooling.lua') +include('SpatialMaxPoolingCUDA.lua') include('SpatialLPPooling.lua') include('TemporalConvolution.lua') include('TemporalSubSampling.lua') |