Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/nn.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkoray kavukcuoglu <koray@kavukcuoglu.org>2013-01-03 18:11:51 +0400
committerkoray kavukcuoglu <koray@kavukcuoglu.org>2013-01-03 18:11:51 +0400
commit1ee64da4abbafd92c6bea5121e9a11e55ef1cbcb (patch)
tree0ac888ab1f86d49439d9a720df6a3c5329f2ff58
parent9e6f3d322212a4c62d869de6733701be8e676754 (diff)
New NN classes
extra/nn/L1Cost.lua : L1 penalty extra/nn/SpatialFullConvolution.lua : full convolution extra/nn/SpatialFullConvolutionMap.lua : full convolution with connection table extra/nn/TanhShrink.lua : shrinkage with x-tanh(x) extra/nn/WeightedMSECriterion.lua : mean squared error with weighting mask on the target Add new nn classes that are used commonly for unsupervised training of convolutional auto encoders
-rw-r--r--L1Cost.lua14
-rw-r--r--SpatialFullConvolution.lua53
-rw-r--r--SpatialFullConvolutionMap.lua62
-rw-r--r--TanhShrink.lua20
-rw-r--r--WeightedMSECriterion.lua31
-rw-r--r--generic/L1Cost.c49
-rw-r--r--generic/SpatialConvolutionMap.c2
-rw-r--r--generic/SpatialFullConvolution.c191
-rw-r--r--generic/SpatialFullConvolutionMap.c225
-rw-r--r--hessian.lua62
-rw-r--r--init.c15
-rw-r--r--init.lua5
-rw-r--r--test/test.lua198
13 files changed, 923 insertions, 4 deletions
diff --git a/L1Cost.lua b/L1Cost.lua
new file mode 100644
index 0000000..83526c9
--- /dev/null
+++ b/L1Cost.lua
@@ -0,0 +1,14 @@
+local L1Cost, parent = torch.class('nn.L1Cost','nn.Criterion')
+
+function L1Cost:__init()
+ parent.__init(self)
+end
+
+function L1Cost:updateOutput(input)
+ return input.nn.L1Cost_updateOutput(self,input)
+end
+
+function L1Cost:updateGradInput(input)
+ return input.nn.L1Cost_updateGradInput(self,input)
+end
+
diff --git a/SpatialFullConvolution.lua b/SpatialFullConvolution.lua
new file mode 100644
index 0000000..96517ab
--- /dev/null
+++ b/SpatialFullConvolution.lua
@@ -0,0 +1,53 @@
+local SpatialFullConvolution, parent = torch.class('nn.SpatialFullConvolution','nn.Module')
+
+function SpatialFullConvolution:__init(nInputPlane, nOutputPlane, kW, kH, dW, dH)
+ parent.__init(self)
+
+ dW = dW or 1
+ dH = dH or 1
+
+ self.nInputPlane = nInputPlane
+ self.nOutputPlane = nOutputPlane
+ self.kW = kW
+ self.kH = kH
+ self.dW = dW
+ self.dH = dH
+
+ self.weight = torch.Tensor(nInputPlane, nOutputPlane, kH, kW)
+ self.gradWeight = torch.Tensor(nInputPlane, nOutputPlane, kH, kW)
+ self.bias = torch.Tensor(self.nOutputPlane)
+ self.gradBias = torch.Tensor(self.nOutputPlane)
+
+ self:reset()
+end
+
+function SpatialFullConvolution:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ else
+ local nInputPlane = self.nInputPlane
+ local kH = self.kH
+ local kW = self.kW
+ stdv = 1/math.sqrt(kW*kH*nInputPlane)
+ end
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+end
+
+function SpatialFullConvolution:updateOutput(input)
+ return input.nn.SpatialFullConvolution_updateOutput(self, input)
+end
+
+function SpatialFullConvolution:updateGradInput(input, gradOutput)
+ if self.gradInput then
+ return input.nn.SpatialFullConvolution_updateGradInput(self, input, gradOutput)
+ end
+end
+function SpatialFullConvolution:accGradParameters(input, gradOutput, scale)
+ return input.nn.SpatialFullConvolution_accGradParameters(self, input, gradOutput, scale)
+end
+
diff --git a/SpatialFullConvolutionMap.lua b/SpatialFullConvolutionMap.lua
new file mode 100644
index 0000000..4ae8293
--- /dev/null
+++ b/SpatialFullConvolutionMap.lua
@@ -0,0 +1,62 @@
+local SpatialFullConvolutionMap, parent = torch.class('nn.SpatialFullConvolutionMap', 'nn.Module')
+
+function SpatialFullConvolutionMap:__init(conMatrix, kW, kH, dW, dH)
+ parent.__init(self)
+
+ dW = dW or 1
+ dH = dH or 1
+
+ self.kW = kW
+ self.kH = kH
+ self.dW = dW
+ self.dH = dH
+ self.connTable = conMatrix
+ self.nInputPlane = self.connTable:select(2,1):max()
+ self.nOutputPlane = self.connTable:select(2,2):max()
+
+ self.weight = torch.Tensor(self.connTable:size(1), kH, kW)
+ self.gradWeight = torch.Tensor(self.connTable:size(1), kH, kW)
+
+ self.bias = torch.Tensor(self.nOutputPlane)
+ self.gradBias = torch.Tensor(self.nOutputPlane)
+
+ self:reset()
+end
+
+function SpatialFullConvolutionMap:reset(stdv)
+ if stdv then
+ stdv = stdv * math.sqrt(3)
+ self.weight:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ self.bias:apply(function()
+ return torch.uniform(-stdv, stdv)
+ end)
+ else
+ local ninp = torch.Tensor(self.nOutputPlane):zero()
+ for i=1,self.connTable:size(1) do ninp[self.connTable[i][2]] = ninp[self.connTable[i][2]]+1 end
+ for k=1,self.connTable:size(1) do
+ stdv = 1/math.sqrt(self.kW*self.kH*ninp[self.connTable[k][2]])
+ self.weight:select(1,k):apply(function() return torch.uniform(-stdv,stdv) end)
+ end
+ for k=1,self.bias:size(1) do
+ stdv = 1/math.sqrt(self.kW*self.kH*ninp[k])
+ self.bias[k] = torch.uniform(-stdv,stdv)
+ end
+
+ end
+end
+
+function SpatialFullConvolutionMap:updateOutput(input)
+ input.nn.SpatialFullConvolutionMap_updateOutput(self, input)
+ return self.output
+end
+
+function SpatialFullConvolutionMap:updateGradInput(input, gradOutput)
+ input.nn.SpatialFullConvolutionMap_updateGradInput(self, input, gradOutput)
+ return self.gradInput
+end
+
+function SpatialFullConvolutionMap:accGradParameters(input, gradOutput, scale)
+ return input.nn.SpatialFullConvolutionMap_accGradParameters(self, input, gradOutput, scale)
+end
diff --git a/TanhShrink.lua b/TanhShrink.lua
new file mode 100644
index 0000000..96df6c5
--- /dev/null
+++ b/TanhShrink.lua
@@ -0,0 +1,20 @@
+local TanhShrink, parent = torch.class('nn.TanhShrink','nn.Module')
+
+function TanhShrink:__init()
+ parent.__init(self)
+ self.tanh = nn.Tanh()
+end
+
+function TanhShrink:updateOutput(input)
+ local th = self.tanh:updateOutput(input)
+ self.output:resizeAs(input):copy(input)
+ self.output:add(-1,th)
+ return self.output
+end
+
+function TanhShrink:updateGradInput(input, gradOutput)
+ local dth = self.tanh:updateGradInput(input,gradOutput)
+ self.gradInput:resizeAs(input):copy(gradOutput)
+ self.gradInput:add(-1,dth)
+ return self.gradInput
+end
diff --git a/WeightedMSECriterion.lua b/WeightedMSECriterion.lua
new file mode 100644
index 0000000..183b343
--- /dev/null
+++ b/WeightedMSECriterion.lua
@@ -0,0 +1,31 @@
+local WeightedMSECriterion, parent = torch.class('nn.WeightedMSECriterion','nn.MSECriterion')
+
+function WeightedMSECriterion:__init(w)
+ parent.__init(self)
+ self.weight = w:clone()
+ self.buffer = torch.Tensor()
+end
+
+function WeightedMSECriterion:updateOutput(input,target)
+ self.buffer:resizeAs(input):copy(target)
+ if input:dim() - 1 == self.weight:dim() then
+ for i=1,input:size(1) do
+ self.buffer[i]:cmul(self.weight)
+ end
+ else
+ self.buffer:cmul(self.weight)
+ end
+ return input.nn.MSECriterion_updateOutput(self, input, self.buffer)
+end
+
+function WeightedMSECriterion:updateGradInput(input, target)
+ self.buffer:resizeAs(input):copy(target)
+ if input:dim() - 1 == self.weight:dim() then
+ for i=1,input:size(1) do
+ self.buffer[i]:cmul(self.weight)
+ end
+ else
+ self.buffer:cmul(self.weight)
+ end
+ return input.nn.MSECriterion_updateGradInput(self, input, self.buffer)
+end
diff --git a/generic/L1Cost.c b/generic/L1Cost.c
new file mode 100644
index 0000000..a450e06
--- /dev/null
+++ b/generic/L1Cost.c
@@ -0,0 +1,49 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/L1Cost.c"
+#else
+
+static int nn_(L1Cost_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+ accreal sum;
+
+ sum = 0;
+ TH_TENSOR_APPLY(real, input, sum += fabs(*input_data););
+
+ lua_pushnumber(L, sum);
+ lua_setfield(L, 1, "output");
+
+ lua_pushnumber(L, sum);
+ return 1;
+}
+
+static int nn_(L1Cost_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
+
+ THTensor_(resizeAs)(gradInput, input);
+ TH_TENSOR_APPLY2(real, gradInput, real, input,
+ if (*input_data > 0)
+ *gradInput_data = 1;
+ else if (*input_data < 0)
+ *gradInput_data = -1;
+ else
+ *gradInput_data = 0;);
+ return 1;
+}
+
+static const struct luaL_Reg nn_(L1Cost__) [] = {
+ {"L1Cost_updateOutput", nn_(L1Cost_updateOutput)},
+ {"L1Cost_updateGradInput", nn_(L1Cost_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(L1Cost_init)(lua_State *L)
+{
+ luaT_pushmetatable(L, torch_Tensor);
+ luaT_registeratname(L, nn_(L1Cost__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SpatialConvolutionMap.c b/generic/SpatialConvolutionMap.c
index 4c289fb..a1d20bc 100644
--- a/generic/SpatialConvolutionMap.c
+++ b/generic/SpatialConvolutionMap.c
@@ -18,7 +18,7 @@ static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L)
THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected");
- luaL_argcheck(L, input->size[0] == nInputPlane, 2, "invalid number of input planes");
+ luaL_argcheck(L, input->size[0] >= nInputPlane, 2, "invalid number of input planes");
luaL_argcheck(L, input->size[2] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size");
THTensor_(resize3d)(output, nOutputPlane,
diff --git a/generic/SpatialFullConvolution.c b/generic/SpatialFullConvolution.c
new file mode 100644
index 0000000..cb2e340
--- /dev/null
+++ b/generic/SpatialFullConvolution.c
@@ -0,0 +1,191 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialFullConvolution.c"
+#else
+
+static int nn_(SpatialFullConvolution_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
+ THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
+
+ luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
+ int dimw = 2;
+ int dimh = 1;
+ if (input->nDimension == 4) {
+ dimw++;
+ dimh++;
+ }
+
+ long nOutputPlane = weight->size[1];
+ long kW = weight->size[3];
+ long kH = weight->size[2];
+ long inputWidth = input->size[dimw];
+ long inputHeight = input->size[dimh];
+ long outputWidth = (inputWidth - 1) * dW + kW;
+ long outputHeight = (inputHeight - 1) * dH + kH;
+
+ if (input->nDimension == 3)
+ {
+ THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
+ /* add bias */
+ long i;
+ real* bias_data = THTensor_(data)(bias);
+ real* output_data = THTensor_(data)(output);
+#pragma omp parallel for private(i)
+ for (i=0; i<bias->size[0]; i++)
+ {
+ /*THTensor_(select)(outn,output,0,i);*/
+ /*TH_TENSOR_APPLY(real,outn, *outn_data = bias_data[i];);*/
+ real *ptr_output = output_data + i*outputWidth*outputHeight;
+ long j;
+ for(j = 0; j < outputWidth*outputHeight; j++)
+ ptr_output[j] = bias_data[i];
+ }
+
+ /* do convolutions */
+ THTensor *tweight = THTensor_(newTranspose)(weight,0,1);
+ THTensor_(conv2Dmv)(output, 1.0, 1.0, input, tweight, dH, dW, "F", "C");
+ THTensor_(free)(tweight);
+ }
+ else
+ {
+ THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth);
+ real* bias_data = THTensor_(data)(bias);
+ real* output_data = THTensor_(data)(output);
+
+ long p;
+#pragma omp parallel for private(p)
+ for (p=0; p<input->size[0]; p++)
+ {
+ /* BIAS */
+ long i;
+ for (i=0; i<bias->size[0]; i++)
+ {
+ real *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight;
+ long j;
+ for(j = 0; j < outputWidth*outputHeight; j++)
+ ptr_output[j] = bias_data[i];
+ }
+ }
+ /* do convolutions */
+ THTensor *tweight = THTensor_(newTranspose)(weight,0,1);
+ THTensor_(conv2Dmm)(output, 1.0, 1.0, input, tweight, dH, dW, "F", "C");
+ THTensor_(free)(tweight);
+ }
+ return 1;
+}
+
+
+static int nn_(SpatialFullConvolution_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
+
+ long nOutputPlane = weight->size[1];
+ THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );
+
+ if (input->nDimension == 3)
+ {
+ /* gradient to input */
+ THTensor_(conv2Dmv)(gradInput, 0.0, 1.0, gradOutput, weight, dH, dW, "V", "X");
+ }
+ else
+ {
+ /* gradient to input */
+ THTensor_(conv2Dmm)(gradInput, 0.0, 1.0, gradOutput, weight, dH, dW, "V", "X");
+ }
+
+ return 1;
+}
+
+static int nn_(SpatialFullConvolution_accGradParameters)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
+ real scale = luaL_optnumber(L, 4, 1);
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
+ THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
+ THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);
+
+ long nOutputPlane = weight->size[1];
+ THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );
+
+ int dimw = 2;
+ int dimh = 1;
+
+ if (input->nDimension == 4)
+ {
+ dimw++;
+ dimh++;
+ }
+ /* gradient to bias */
+ real *gradBias_data = THTensor_(data)(gradBias);
+ real *gradOutput_data = THTensor_(data)(gradOutput);
+ long noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw];
+ /*THTensor* gradOutSlice = THTensor_(new)();*/
+
+ if (input->nDimension == 3)
+ {
+ long k;
+#pragma omp parallel for private(k)
+ for(k = 0; k < nOutputPlane; k++)
+ {
+ /*THTensor_(select)(gradOutSlice, gradOutput, 0, k);*/
+ real *ptr_gradOutput = gradOutput_data + k*noutSlice;
+ long l;
+ for(l = 0; l < noutSlice; l++)
+ gradBias_data[k] += scale*ptr_gradOutput[l];
+ }
+
+ /* gradient to kernels */
+ THTensor_(conv2DRevger)(gradWeight, 1.0, scale, gradOutput, input, dH, dW);
+ }
+ else
+ {
+ long k;
+#pragma omp parallel for private(k)
+ for(k = 0; k < nOutputPlane; k++)
+ {
+ long p;
+ for(p = 0; p < input->size[0]; p++)
+ {
+ /* BIAS */
+ real *ptr_gradOutput = gradOutput_data + p*nOutputPlane*noutSlice + k*noutSlice;
+ long l;
+ for(l = 0; l < noutSlice; l++)
+ gradBias_data[k] += scale*ptr_gradOutput[l];
+ }
+ }
+ /* gradient to kernels */
+ THTensor_(conv2DRevgerm)(gradWeight, 1.0, scale, gradOutput, input, dH, dW);
+ }
+ return 0;
+}
+
+static const struct luaL_Reg nn_(SpatialFullConvolution__) [] = {
+ {"SpatialFullConvolution_updateOutput", nn_(SpatialFullConvolution_updateOutput)},
+ {"SpatialFullConvolution_updateGradInput", nn_(SpatialFullConvolution_updateGradInput)},
+ {"SpatialFullConvolution_accGradParameters", nn_(SpatialFullConvolution_accGradParameters)},
+ {NULL, NULL}
+};
+
+static void nn_(SpatialFullConvolution_init)(lua_State *L)
+{
+ luaT_pushmetatable(L, torch_Tensor);
+ luaT_registeratname(L, nn_(SpatialFullConvolution__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/SpatialFullConvolutionMap.c b/generic/SpatialFullConvolutionMap.c
new file mode 100644
index 0000000..8a5d9df
--- /dev/null
+++ b/generic/SpatialFullConvolutionMap.c
@@ -0,0 +1,225 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialFullConvolutionMap.c"
+#else
+
+static int nn_(SpatialFullConvolutionMap_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int kH = luaT_getfieldcheckint(L, 1, "kH");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+ int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+
+
+ THTensor *connTable = luaT_getfieldcheckudata(L, 1, "connTable", torch_Tensor);
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
+ THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
+
+ luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected");
+ luaL_argcheck(L, input->size[0] >= nInputPlane, 2, "invalid number of input planes");
+
+
+ THTensor_(resize3d)(output, nOutputPlane,
+ (input->size[1] - 1) * dH + kH,
+ (input->size[2] - 1) * dW + kW);
+
+ // contiguous
+ input = THTensor_(newContiguous)(input);
+ output = THTensor_(newContiguous)(output);
+
+ // get raw pointers
+ real *input_data = THTensor_(data)(input);
+ real *output_data = THTensor_(data)(output);
+ real *weight_data = THTensor_(data)(weight);
+ real *bias_data = THTensor_(data)(bias);
+ real *connTable_data = THTensor_(data)(connTable);
+
+ // and dims
+ long input_h = input->size[1];
+ long input_w = input->size[2];
+ long output_h = output->size[1];
+ long output_w = output->size[2];
+ long weight_h = weight->size[1];
+ long weight_w = weight->size[2];
+
+ long p;
+#pragma omp parallel for private(p)
+ for (p = 0; p < nOutputPlane; p++) {
+ // add bias
+ real *ptr_output = output_data + p*output_w*output_h;
+ long j;
+ for(j = 0; j < output_h*output_w; j++)
+ ptr_output[j] = bias_data[p];
+
+ // convolve all maps
+ int nweight = connTable->size[0];
+ long k;
+ for (k = 0; k < nweight; k++) {
+ // get offsets for input/output
+ int o = (int)connTable_data[k*2+1]-1;
+ int i = (int)connTable_data[k*2+0]-1;
+
+ if (o == p)
+ {
+ THTensor_(fullConv2Dptr)(output_data + o*output_w*output_h,
+ 1.0,
+ input_data + i*input_w*input_h, input_h, input_w,
+ weight_data + k*weight_w*weight_h, weight_h, weight_w,
+ dH, dW);
+ }
+ }
+ }
+
+ // clean up
+ THTensor_(free)(input);
+ THTensor_(free)(output);
+
+ return 1;
+}
+
+static int nn_(SpatialFullConvolutionMap_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ int nInputPlane = luaT_getfieldcheckint(L, 1, "nInputPlane");
+
+ THTensor *connTable = luaT_getfieldcheckudata(L, 1, "connTable", torch_Tensor);
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
+
+ // contiguous
+ gradInput = THTensor_(newContiguous)(gradInput);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ // Resize/Zero
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ // get raw pointers
+ real *gradInput_data = THTensor_(data)(gradInput);
+ real *gradOutput_data = THTensor_(data)(gradOutput);
+ real *weight_data = THTensor_(data)(weight);
+ real *connTable_data = THTensor_(data)(connTable);
+
+ // and dims
+ long input_h = input->size[1];
+ long input_w = input->size[2];
+ long output_h = gradOutput->size[1];
+ long output_w = gradOutput->size[2];
+ long weight_h = weight->size[1];
+ long weight_w = weight->size[2];
+
+ long p;
+#pragma omp parallel for private(p)
+ for(p = 0; p < nInputPlane; p++)
+ {
+ long k;
+ // backward all
+ int nkernel = connTable->size[0];
+ for(k = 0; k < nkernel; k++)
+ {
+ int o = (int)connTable_data[k*2+1]-1;
+ int i = (int)connTable_data[k*2+0]-1;
+ if (i == p)
+ {
+ // gradient to input
+ THTensor_(validXCorr2Dptr)(gradInput_data + i*input_w*input_h,
+ 1.0,
+ gradOutput_data + o*output_w*output_h, output_h, output_w,
+ weight_data + k*weight_w*weight_h, weight_h, weight_w,
+ dH, dW);
+ }
+ }
+ }
+
+ // clean up
+ THTensor_(free)(gradInput);
+ THTensor_(free)(gradOutput);
+
+ return 1;
+}
+
+static int nn_(SpatialFullConvolutionMap_accGradParameters)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ int nOutputPlane = luaT_getfieldcheckint(L, 1, "nOutputPlane");
+ real scale = luaL_optnumber(L, 4, 1);
+
+ THTensor *connTable = luaT_getfieldcheckudata(L, 1, "connTable", torch_Tensor);
+ THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
+ THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
+ THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);
+
+ // contiguous
+ input = THTensor_(newContiguous)(input);
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ // get raw pointers
+ real *input_data = THTensor_(data)(input);
+ real *gradOutput_data = THTensor_(data)(gradOutput);
+ real *gradWeight_data = THTensor_(data)(gradWeight);
+ real *gradBias_data = THTensor_(data)(gradBias);
+
+ // and dims
+ long input_h = input->size[1];
+ long input_w = input->size[2];
+ long output_h = gradOutput->size[1];
+ long output_w = gradOutput->size[2];
+ long weight_h = weight->size[1];
+ long weight_w = weight->size[2];
+
+ // gradients wrt bias
+ long k;
+#pragma omp parallel for private(k)
+ for(k = 0; k < nOutputPlane; k++) {
+ real *ptr_gradOutput = gradOutput_data + k*output_w*output_h;
+ long l;
+ for(l = 0; l < output_h*output_w; l++)
+ gradBias_data[k] += scale*ptr_gradOutput[l];
+ }
+
+ // gradients wrt weight
+ int nkernel = connTable->size[0];
+#pragma omp parallel for private(k)
+ for(k = 0; k < nkernel; k++)
+ {
+ int o = (int)THTensor_(get2d)(connTable,k,1)-1;
+ int i = (int)THTensor_(get2d)(connTable,k,0)-1;
+
+ // gradient to kernel
+ THTensor_(validXCorr2DRevptr)(gradWeight_data + k*weight_w*weight_h,
+ scale,
+ gradOutput_data + o*output_w*output_h, output_h, output_w,
+ input_data + i*input_w*input_h, input_h, input_w,
+ dH, dW);
+ }
+
+ // clean up
+ THTensor_(free)(input);
+ THTensor_(free)(gradOutput);
+ return 0;
+}
+
+static const struct luaL_Reg nn_(SpatialFullConvolutionMapStuff__) [] = {
+ {"SpatialFullConvolutionMap_updateOutput", nn_(SpatialFullConvolutionMap_updateOutput)},
+ {"SpatialFullConvolutionMap_updateGradInput", nn_(SpatialFullConvolutionMap_updateGradInput)},
+ {"SpatialFullConvolutionMap_accGradParameters", nn_(SpatialFullConvolutionMap_accGradParameters)},
+ {NULL, NULL}
+};
+
+static void nn_(SpatialFullConvolutionMap_init)(lua_State *L)
+{
+ luaT_pushmetatable(L, torch_Tensor);
+ luaT_registeratname(L, nn_(SpatialFullConvolutionMapStuff__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/hessian.lua b/hessian.lua
index c55e066..3d336fe 100644
--- a/hessian.lua
+++ b/hessian.lua
@@ -164,6 +164,24 @@ function nn.hessian.enable()
end
----------------------------------------------------------------------
+ -- WeightedMSECriterion
+ ----------------------------------------------------------------------
+ function nn.WeightedMSECriterion.updateDiagHessianInput(self,input,target)
+ return nn.MSECriterion.updateDiagHessianInput(self,input,target)
+ end
+
+ ----------------------------------------------------------------------
+ -- L1Cost
+ ----------------------------------------------------------------------
+ function nn.L1Cost.updateDiagHessianInput(self,input)
+ self.diagHessianInput = self.diagHessianInput or input.new()
+ self.diagHessianInput:resizeAs(input)
+ self.diagHessianInput:fill(1)
+ self.diagHessianInput[torch.eq(input,0)] = 0
+ return self.diagHessianInput
+ end
+
+ ----------------------------------------------------------------------
-- Linear
----------------------------------------------------------------------
function nn.Linear.updateDiagHessianInput(self, input, diagHessianOutput)
@@ -188,7 +206,7 @@ function nn.hessian.enable()
end
function nn.SpatialConvolution.accDiagHessianParameters(self, input, diagHessianOutput)
- accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight'}, {'diagHessianWeight'})
+ accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight','gradBias'}, {'diagHessianWeight','diagHessianBias'})
end
function nn.SpatialConvolution.initDiagHessianParameters(self)
@@ -196,6 +214,22 @@ function nn.hessian.enable()
end
----------------------------------------------------------------------
+ -- SpatialFullConvolution
+ ----------------------------------------------------------------------
+ function nn.SpatialFullConvolution.updateDiagHessianInput(self, input, diagHessianOutput)
+ updateDiagHessianInput(self, input, diagHessianOutput, {'weight'}, {'weightSq'})
+ return self.diagHessianInput
+ end
+
+ function nn.SpatialFullConvolution.accDiagHessianParameters(self, input, diagHessianOutput)
+ accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight'}, {'diagHessianWeight'})
+ end
+
+ function nn.SpatialFullConvolution.initDiagHessianParameters(self)
+ initDiagHessianParameters(self,{'gradWeight','gradBias'},{'diagHessianWeight','diagHessianBias'})
+ end
+
+ ----------------------------------------------------------------------
-- SpatialConvolutionMap
----------------------------------------------------------------------
function nn.SpatialConvolutionMap.updateDiagHessianInput(self, input, diagHessianOutput)
@@ -212,6 +246,22 @@ function nn.hessian.enable()
end
----------------------------------------------------------------------
+ -- SpatialFullConvolutionMap
+ ----------------------------------------------------------------------
+ function nn.SpatialFullConvolutionMap.updateDiagHessianInput(self, input, diagHessianOutput)
+ updateDiagHessianInput(self, input, diagHessianOutput, {'weight'}, {'weightSq'})
+ return self.diagHessianInput
+ end
+
+ function nn.SpatialFullConvolutionMap.accDiagHessianParameters(self, input, diagHessianOutput)
+ accDiagHessianParameters(self,input, diagHessianOutput, {'gradWeight','gradBias'}, {'diagHessianWeight','diagHessianBias'})
+ end
+
+ function nn.SpatialFullConvolutionMap.initDiagHessianParameters(self)
+ initDiagHessianParameters(self,{'gradWeight','gradBias'},{'diagHessianWeight','diagHessianBias'})
+ end
+
+----------------------------------------------------------------------
-- Tanh
----------------------------------------------------------------------
function nn.Tanh.updateDiagHessianInput(self, input, diagHessianOutput)
@@ -220,6 +270,16 @@ function nn.hessian.enable()
end
----------------------------------------------------------------------
+ -- TanhShrink
+ ----------------------------------------------------------------------
+ function nn.TanhShrink.updateDiagHessianInput(self, input, diagHessianOutput)
+ updateDiagHessianInputPointWise(self.tanh, input, diagHessianOutput)
+ self.diagHessianInput = self.diagHessianInput or input.new():resizeAs(input)
+ torch.add(self.diagHessianInput, self.tanh.diagHessianInput, diagHessianOutput)
+ return self.diagHessianInput
+ end
+
+ ----------------------------------------------------------------------
-- Square
----------------------------------------------------------------------
function nn.Square.updateDiagHessianInput(self, input, diagHessianOutput)
diff --git a/init.c b/init.c
index 9082b83..2142a8a 100644
--- a/init.c
+++ b/init.c
@@ -74,6 +74,12 @@
#include "generic/SpatialConvolution.c"
#include "THGenerateFloatTypes.h"
+#include "generic/SpatialFullConvolution.c"
+#include "THGenerateFloatTypes.h"
+
+#include "generic/SpatialFullConvolutionMap.c"
+#include "THGenerateFloatTypes.h"
+
#include "generic/SpatialConvolutionMM.c"
#include "THGenerateFloatTypes.h"
@@ -95,6 +101,9 @@
#include "generic/MultiLabelMarginCriterion.c"
#include "THGenerateFloatTypes.h"
+#include "generic/L1Cost.c"
+#include "THGenerateFloatTypes.h"
+
DLL_EXPORT int luaopen_libnn(lua_State *L)
{
lua_newtable(L);
@@ -124,6 +133,8 @@ DLL_EXPORT int luaopen_libnn(lua_State *L)
nn_FloatTemporalSubSampling_init(L);
nn_FloatTemporalMaxPooling_init(L);
nn_FloatSpatialConvolution_init(L);
+ nn_FloatSpatialFullConvolution_init(L);
+ nn_FloatSpatialFullConvolutionMap_init(L);
nn_FloatSpatialConvolutionMM_init(L);
nn_FloatSpatialConvolutionMap_init(L);
nn_FloatSpatialSubSampling_init(L);
@@ -131,6 +142,7 @@ DLL_EXPORT int luaopen_libnn(lua_State *L)
nn_FloatVolumetricConvolution_init(L);
nn_FloatMultiMarginCriterion_init(L);
nn_FloatMultiLabelMarginCriterion_init(L);
+ nn_FloatL1Cost_init(L);
nn_DoubleMin_init(L);
nn_DoubleMax_init(L);
@@ -155,6 +167,8 @@ DLL_EXPORT int luaopen_libnn(lua_State *L)
nn_DoubleTemporalSubSampling_init(L);
nn_DoubleTemporalMaxPooling_init(L);
nn_DoubleSpatialConvolution_init(L);
+ nn_DoubleSpatialFullConvolution_init(L);
+ nn_DoubleSpatialFullConvolutionMap_init(L);
nn_DoubleSpatialConvolutionMM_init(L);
nn_DoubleSpatialConvolutionMap_init(L);
nn_DoubleSpatialSubSampling_init(L);
@@ -162,6 +176,7 @@ DLL_EXPORT int luaopen_libnn(lua_State *L)
nn_DoubleVolumetricConvolution_init(L);
nn_DoubleMultiMarginCriterion_init(L);
nn_DoubleMultiLabelMarginCriterion_init(L);
+ nn_DoubleL1Cost_init(L);
return 1;
}
diff --git a/init.lua b/init.lua
index d121be1..587412f 100644
--- a/init.lua
+++ b/init.lua
@@ -45,6 +45,7 @@ include('SoftMin.lua')
include('SoftPlus.lua')
include('SoftSign.lua')
include('Tanh.lua')
+include('TanhShrink.lua')
include('Abs.lua')
include('Power.lua')
include('Square.lua')
@@ -55,6 +56,8 @@ include('Threshold.lua')
include('LookupTable.lua')
include('SpatialConvolution.lua')
+include('SpatialFullConvolution.lua')
+include('SpatialFullConvolutionMap.lua')
include('SpatialConvolutionMM.lua')
include('SpatialConvolutionMap.lua')
include('SpatialSubSampling.lua')
@@ -90,6 +93,8 @@ include('CosineEmbeddingCriterion.lua')
include('MarginRankingCriterion.lua')
include('MultiMarginCriterion.lua')
include('MultiLabelMarginCriterion.lua')
+include('L1Cost.lua')
+include('WeightedMSECriterion.lua')
include('StochasticGradient.lua')
diff --git a/test/test.lua b/test/test.lua
index 3097431..58a9bd7 100644
--- a/test/test.lua
+++ b/test/test.lua
@@ -7,7 +7,6 @@ local precision = 1e-5
local expprecision = 1e-4
local nntest = {}
-local nntestx = {}
function nntest.Add()
local ini = math.random(10,20)
@@ -341,6 +340,22 @@ function nntest.WeightedEuclidean()
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
end
+function nntest.WeightedMSECriterion()
+ local from = math.random(100,200)
+ local input = torch.Tensor(from):zero()
+ local target = torch.randn(from)
+ local weight = torch.randn(from)
+ local cri = nn.WeightedMSECriterion(weight)
+ local module = nn.CriterionModule(cri,target)
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
function nntest.LogSigmoid()
local ini = math.random(10,20)
local inj = math.random(10,20)
@@ -795,6 +810,167 @@ function nntest.SpatialConvolutionMap()
mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
end
+
+function nntest.SpatialFullConvolution()
+ local from = math.random(1,10)
+ local to = math.random(1,10)
+ local ki = math.random(1,10)
+ local kj = math.random(1,10)
+ local si = math.random(1,4)
+ local sj = math.random(1,4)
+ local ini = math.random(10,18)
+ local inj = math.random(10,18)
+ local module = nn.SpatialFullConvolution(from, to, ki, kj, si, sj)
+ local input = torch.Tensor(from, inj, ini):zero()
+
+ -- stochastic
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ -- batch
+ local batch = math.random(2,5)
+ ini = math.random(4,8)
+ inj = math.random(4,8)
+ module = nn.SpatialFullConvolution(from, to, ki, kj, si, sj)
+ input = torch.Tensor(batch,from,inj,ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'batch error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'batch error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'batch error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'batch error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'batch error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'batch error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialFullConvolutionMap()
+ local from = math.ceil(torch.uniform(2,5))
+ local to = math.ceil(torch.uniform(2,7))
+ local fanin = math.ceil(torch.uniform(1, from))
+ local tt = nn.tables.random(from, to, fanin)
+ local ki = math.ceil(torch.uniform(2,7))
+ local kj = math.ceil(torch.uniform(2,7))
+ local si = math.ceil(torch.uniform(1,3))
+ local sj = math.ceil(torch.uniform(1,3))
+ local ini = math.ceil(torch.uniform(10,18))
+ local inj = math.ceil(torch.uniform(10,18))
+ local module = nn.SpatialFullConvolutionMap(tt, ki, kj, si, sj)
+ local input = torch.Tensor(from, inj, ini):zero()
+
+ -- stochastic
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local err = jac.testJacobianParameters(module, input, module.weight, module.gradWeight)
+ mytester:assertlt(err , precision, 'error on weight ')
+
+ local err = jac.testJacobianParameters(module, input, module.bias, module.gradBias)
+ mytester:assertlt(err , precision, 'error on bias ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.weight)
+ mytester:assertlt(err , precision, 'error on weight [direct update] ')
+
+ local err = jac.testJacobianUpdateParameters(module, input, module.bias)
+ mytester:assertlt(err , precision, 'error on bias [direct update] ')
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'weight', 'gradWeight')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on weight [%s]', t))
+ end
+
+ for t,err in pairs(jac.testAllUpdate(module, input, 'bias', 'gradBias')) do
+ mytester:assertlt(err, precision, string.format(
+ 'error on bias [%s]', t))
+ end
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
+function nntest.SpatialFullConvolutionCompare()
+ local from = math.ceil(torch.uniform(2,5))
+ local to = math.ceil(torch.uniform(2,7))
+ local tt = nn.tables.full(from, to)
+ local ki = math.ceil(torch.uniform(2,7))
+ local kj = math.ceil(torch.uniform(2,7))
+ local si = math.ceil(torch.uniform(1,3))
+ local sj = math.ceil(torch.uniform(1,3))
+ local ini = math.ceil(torch.uniform(10,18))
+ local inj = math.ceil(torch.uniform(10,18))
+ local module1 = nn.SpatialFullConvolutionMap(tt, ki, kj, si, sj)
+ local module2 = nn.SpatialFullConvolution(from, to, ki, kj, si, sj)
+ local input = torch.rand(from, inj, ini)
+ for k=1,tt:size(1) do
+ module1.weight[k]:copy(module2.weight[tt[k][1]][tt[k][2]])
+ module1.bias:copy(module2.bias)
+ end
+
+ local o1 = module1:updateOutput(input)
+ local o2 = module2:updateOutput(input)
+ mytester:assertlt(o1:dist(o2), precision, 'error on output')
+
+ local go1 = torch.rand(o1:size())
+ local go2 = go1:clone()
+
+ local gi1= module1:updateGradInput(input,go1)
+ local gi2 = module2:updateGradInput(input,go2)
+ mytester:assertlt(gi1:dist(gi2), precision, 'error on gradInput')
+
+ module1:zeroGradParameters()
+ module2:zeroGradParameters()
+
+ module1:accGradParameters(input,go1)
+ module2:accGradParameters(input,go2)
+ for k=1,tt:size(1) do
+ mytester:assertlt(module1.gradWeight[k]:dist(module2.gradWeight[tt[k][1]][tt[k][2]]),precision,'error on gradWeight ' .. k)
+ end
+ mytester:assertlt(module1.gradBias:dist(module2.gradBias),precision,'error on gradBias ')
+end
+
local function batchcompare(smod, sin, plist)
local bs = torch.LongStorage(sin:size():size()+1)
bs[1] = 1
@@ -841,6 +1017,24 @@ function nntest.SpatialConvolutionBatchCompare()
batchcompare(module,input, {'weight','bias','gradWeight','gradBias'})
end
+function nntest.SpatialFullConvolutionBatchCompare()
+ local from = math.random(1,10)
+ local to = math.random(1,10)
+ local ki = math.random(1,10)
+ local kj = math.random(1,10)
+ local si = math.random(1,4)
+ local sj = math.random(1,4)
+ local ini = math.random(10,18)
+ local inj = math.random(10,18)
+
+ local module = nn.SpatialFullConvolution(from, to, ki, kj, si, sj)
+ local input = torch.randn(from, inj, ini)
+
+ batchcompare(module,input, {'weight','bias','gradWeight','gradBias'})
+end
+
+
+
function nntest.SpatialSubSamplingBatchCompare()
local from = math.random(1,10)
local ki = math.random(1,5)
@@ -1105,7 +1299,7 @@ function nntest.TemporalSubSampling()
mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
end
-function nntestx.TemporalMaxPooling()
+function nntest.TemporalMaxPooling()
local from = math.random(1,10)
local ki = math.random(1,10)
local si = math.random(1,4)