Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/nn.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPáidí Creed <paidi@swiftkey.net>2013-11-06 19:15:28 +0400
committerPáidí Creed <paidi@swiftkey.net>2013-11-06 19:15:28 +0400
commitaa60b6e2be23beb899b3eca28c762793afea52a6 (patch)
tree17584525dd35143c4aec254269da571ff1def87b
parentd1c7da6a9f760c31a1b27477e27b124e84d556ab (diff)
parentb8f17d4136410b80bf621ff660abb8ab1ed7bf5e (diff)
Merge remote-tracking branch 'upstream/master'
Conflicts: extra/nn/test/test.lua
-rw-r--r--CMakeLists.txt4
-rw-r--r--MarginRankingCriterion.lua66
-rw-r--r--PairwiseDistance.lua50
-rw-r--r--VolumetricMaxPooling.lua37
-rw-r--r--dok/index.dok50
-rw-r--r--generic/SoftMax.c4
-rw-r--r--generic/SpatialConvolution.c117
-rw-r--r--generic/SpatialConvolutionMM.c38
-rw-r--r--generic/SpatialConvolutionMap.c168
-rw-r--r--generic/SpatialFullConvolution.c117
-rw-r--r--generic/SpatialFullConvolutionMap.c165
-rw-r--r--generic/SpatialMaxPooling.c109
-rw-r--r--generic/SpatialSubSampling.c84
-rw-r--r--generic/TemporalMaxPooling.c72
-rw-r--r--generic/VolumetricConvolution.c62
-rw-r--r--generic/VolumetricMaxPooling.c234
-rw-r--r--init.c5
-rw-r--r--init.lua1
-rw-r--r--test/test.lua43
19 files changed, 1029 insertions, 397 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 93c5dc6..2bfd582 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -2,6 +2,10 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR)
CMAKE_POLICY(VERSION 2.6)
FIND_PACKAGE(Torch REQUIRED)
+IF("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU")
+ SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c89 -pedantic")
+ENDIF()
+
SET(src init.c)
FILE(GLOB luasrc *.lua)
diff --git a/MarginRankingCriterion.lua b/MarginRankingCriterion.lua
index ec85fb9..30c6855 100644
--- a/MarginRankingCriterion.lua
+++ b/MarginRankingCriterion.lua
@@ -8,18 +8,64 @@ function MarginRankingCriterion:__init(margin)
end
function MarginRankingCriterion:updateOutput(input,y)
- self.output=math.max(0, -y*(input[1][1]-input[2][1]) + self.margin )
+ if input[1]:size(1) == 1 then
+ self.output=math.max(0, -y*(input[1][1]-input[2][1]) + self.margin )
+ else
+ if type(self.output) == "number" then
+ self.output = input[1]:clone()
+ end
+ self.output = self.output or input[1]:clone()
+ self.output:resizeAs(input[1])
+ self.output:copy(input[1])
+
+ self.output:add(-1, input[2])
+ self.output:mul(-y)
+ self.output:add(self.margin)
+
+ self.mask = self.mask or self.output:clone()
+ self.mask:resizeAs(self.output)
+ self.mask:copy(self.output)
+
+ self.mask:ge(self.output, 0.0)
+ self.output:cmul(self.mask)
+ end
+
return self.output
end
function MarginRankingCriterion:updateGradInput(input, y)
- local dist = -y*(input[1][1]-input[2][1]) + self.margin
- if dist < 0 then
- self.gradInput[1][1]=0;
- self.gradInput[2][1]=0;
- else
- self.gradInput[1][1]=-y
- self.gradInput[2][1]=y
- end
- return self.gradInput
+ if input[1]:size(1) == 1 then
+ local dist = -y*(input[1][1]-input[2][1]) + self.margin
+ if dist < 0 then
+ self.gradInput[1][1]=0;
+ self.gradInput[2][1]=0;
+ else
+ self.gradInput[1][1]=-y
+ self.gradInput[2][1]=y
+ end
+ else
+ self.dist = self.dist or input[1].new()
+ self.dist = self.dist:resizeAs(input[1]):copy(input[1])
+ local dist = self.dist
+
+ dist:add(-1, input[2])
+ dist:mul(-y)
+ dist:add(self.margin)
+
+ self.mask = self.mask or input[1].new()
+ self.mask = self.mask:resizeAs(input[1]):copy(dist)
+ local mask = self.mask
+
+ mask:ge(dist, 0)
+
+ self.gradInput[1]:resize(dist:size())
+ self.gradInput[2]:resize(dist:size())
+
+ self.gradInput[1]:copy(mask)
+ self.gradInput[1]:mul(-y)
+ self.gradInput[2]:copy(mask)
+ self.gradInput[2]:mul(y)
+
+ end
+ return self.gradInput
end
diff --git a/PairwiseDistance.lua b/PairwiseDistance.lua
index 638c58f..d9e6f81 100644
--- a/PairwiseDistance.lua
+++ b/PairwiseDistance.lua
@@ -10,7 +10,24 @@ function PairwiseDistance:__init(p)
end
function PairwiseDistance:updateOutput(input)
- self.output[1]=input[1]:dist(input[2],self.norm);
+ if input[1]:dim() == 1 then
+ self.output[1]=input[1]:dist(input[2],self.norm)
+ elseif input[1]:dim() == 2 then
+ self.diff = self.diff or input[1].new()
+ self.diff:resizeAs(input[1])
+
+ local diff = self.diff:zero()
+ --local diff = torch.add(input[1], -1, input[2])
+ diff:add(input[1], -1, input[2])
+
+ self.output:resize(input[1]:size(1))
+ self.output:zero()
+ self.output:add(diff:pow(self.norm):sum(2))
+ self.output:pow(1./self.norm)
+ else
+ error('input must be vector or matrix')
+ end
+
return self.output
end
@@ -20,14 +37,27 @@ local function mathsign(x)
end
function PairwiseDistance:updateGradInput(input, gradOutput)
- self.gradInput[1]:resizeAs(input[1])
- self.gradInput[2]:resizeAs(input[2])
- self.gradInput[1]:copy(input[1])
- self.gradInput[1]:add(-1, input[2])
- if self.norm==1 then
+ self.gradInput[1]:resize(input[1]:size())
+ self.gradInput[2]:resize(input[2]:size())
+ self.gradInput[1]:copy(input[1])
+ self.gradInput[1]:add(-1, input[2])
+ if self.norm==1 then
self.gradInput[1]:apply(mathsign)
- end
- self.gradInput[1]:mul(gradOutput[1]);
- self.gradInput[2]:zero():add(-1, self.gradInput[1])
- return self.gradInput
+ end
+ if input[1]:dim() == 1 then
+ self.gradInput[1]:mul(gradOutput[1])
+ elseif input[1]:dim() == 2 then
+ self.grad = self.grad or gradOutput.new()
+ self.ones = self.ones or gradOutput.new()
+
+ self.grad:resizeAs(input[1]):zero()
+ self.ones:resize(input[1]:size(2)):fill(1)
+
+ self.grad:addr(gradOutput, self.ones)
+ self.gradInput[1]:cmul(self.grad)
+ else
+ error('input must be vector or matrix')
+ end
+ self.gradInput[2]:zero():add(-1, self.gradInput[1])
+ return self.gradInput
end
diff --git a/VolumetricMaxPooling.lua b/VolumetricMaxPooling.lua
new file mode 100644
index 0000000..2bc7a76
--- /dev/null
+++ b/VolumetricMaxPooling.lua
@@ -0,0 +1,37 @@
+local VolumetricMaxPooling, parent = torch.class('nn.VolumetricMaxPooling', 'nn.Module')
+
+function VolumetricMaxPooling:__init(kT, kW, kH, dT, dW, dH)
+ parent.__init(self)
+
+ dT = dT or kT
+ dW = dW or kW
+ dH = dH or kH
+
+ self.kT = kT
+ self.kH = kH
+ self.kW = kW
+ self.dT = dT
+ self.dW = dW
+ self.dH = dH
+
+ self.indices = torch.Tensor()
+end
+
+function VolumetricMaxPooling:updateOutput(input)
+ input.nn.VolumetricMaxPooling_updateOutput(self, input)
+ return self.output
+end
+
+function VolumetricMaxPooling:updateGradInput(input, gradOutput)
+ input.nn.VolumetricMaxPooling_updateGradInput(self, input, gradOutput)
+ return self.gradInput
+end
+
+function VolumetricMaxPooling:empty()
+ self.gradInput:resize()
+ self.gradInput:storage():resize(0)
+ self.output:resize()
+ self.output:storage():resize(0)
+ self.indices:resize()
+ self.indices:storage():resize(0)
+end
diff --git a/dok/index.dok b/dok/index.dok
index d4a0160..baac983 100644
--- a/dok/index.dok
+++ b/dok/index.dok
@@ -1377,6 +1377,45 @@ output[i][j][k] = bias[k]
* input[dW*(i-1)+s)][dH*(j-1)+t][l]
</file>
+==== VolumetricConvolution ====
+{{anchor:nn.VolumetricConvolution}}
+
+<file lua>
+module = nn.VolumetricConvolution(nInputPlane, nOutputPlane, kT, kW, kH [, dT, dW, dH])
+</file>
+
+Applies a 3D convolution over an input image composed of several input planes. The ''input'' tensor in
+''forward(input)'' is expected to be a 4D tensor (''nInputPlane x time x height x width'').
+
+The parameters are the following:
+ * ''nInputPlane'': The number of expected input planes in the image given into ''forward()''.
+ * ''nOutputPlane'': The number of output planes the convolution layer will produce.
+ * ''kT'': The kernel size of the convolution in time
+ * ''kW'': The kernel width of the convolution
+ * ''kH'': The kernel height of the convolution
+ * ''dT'': The step of the convolution in the time dimension. Default is ''1''.
+ * ''dW'': The step of the convolution in the width dimension. Default is ''1''.
+ * ''dH'': The step of the convolution in the height dimension. Default is ''1''.
+
+Note that depending of the size of your kernel, several (of the last)
+columns or rows of the input image might be lost. It is up to the user to
+add proper padding in images.
+
+If the input image is a 4D tensor ''nInputPlane x time x height x width'', the output image size
+will be ''nOutputPlane x otime x owidth x oheight'' where
+<file lua>
+otime = (time - kT) / dT + 1
+owidth = (width - kW) / dW + 1
+oheight = (height - kH) / dH + 1 .
+</file>
+
+The parameters of the convolution can be found in ''self.weight'' (Tensor of
+size ''nOutputPlane x nInputPlane x kT x kH x kW'') and ''self.bias'' (Tensor of
+size ''nOutputPlane''). The corresponding gradients can be found in
+''self.gradWeight'' and ''self.gradBias''.
+
+</file>
+
==== SpatialConvolutionMap ====
{{anchor:nn.SpatialConvolutionMap}}
@@ -1436,6 +1475,17 @@ Applies 2D max-pooling operation in ''kWxkH'' regions by step size
''dWxdH'' steps. The number of output features is equal to the number of
input planes.
+==== VoulmetricMaxPooling ====
+{{anchor:nn.VolumetricMaxPooling}}
+
+<file lua>
+module = nn.VolumetricMaxPooling(kT, kW, kH [, dT, dW, dH])
+</file>
+
+Applies 3D max-pooling operation in ''kTxkWxkH'' regions by step size
+''dTxdWxdH'' steps. The number of output features is equal to the number of
+input planes.
+
==== SpatialSubSampling ====
{{anchor:nn.SpatialSubSampling}}
diff --git a/generic/SoftMax.c b/generic/SoftMax.c
index fd73b3e..bddb70d 100644
--- a/generic/SoftMax.c
+++ b/generic/SoftMax.c
@@ -31,11 +31,13 @@ static int nn_(SoftMax_updateOutput)(lua_State *L)
for(t = 0; t < nframe; t++)
{
real inputMax = -THInf;
+ accreal sum;
+
for(d = 0; d < dim; d++) {
if (input_data[d] >= inputMax) inputMax = input_data[d];
}
- accreal sum = 0;
+ sum = 0;
for(d = 0; d < dim; d++) {
real z = THExpMinusApprox(inputMax - input_data[d]);
output_data[d] = z;
diff --git a/generic/SpatialConvolution.c b/generic/SpatialConvolution.c
index bfe5698..bcadf38 100644
--- a/generic/SpatialConvolution.c
+++ b/generic/SpatialConvolution.c
@@ -12,70 +12,79 @@ static int nn_(SpatialConvolution_updateOutput)(lua_State *L)
THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
- luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
-
int dimw = 2;
int dimh = 1;
+
+ luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
+
if (input->nDimension == 4) {
dimw++;
dimh++;
}
- long nOutputPlane = weight->size[0];
- long kW = weight->size[3];
- long kH = weight->size[2];
- long inputWidth = input->size[dimw];
- long inputHeight = input->size[dimh];
- long outputWidth = (inputWidth - kW) / dW + 1;
- long outputHeight = (inputHeight - kH) / dH + 1;
-
- if (input->nDimension == 3)
{
- THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
- /* add bias */
- long i;
- /*THTensor *outn = THTensor_(new)();*/
- real* bias_data = THTensor_(data)(bias);
- real* output_data = THTensor_(data)(output);
-#pragma omp parallel for private(i)
- for (i=0; i<bias->size[0]; i++)
+ long nOutputPlane = weight->size[0];
+ long kW = weight->size[3];
+ long kH = weight->size[2];
+ long inputWidth = input->size[dimw];
+ long inputHeight = input->size[dimh];
+ long outputWidth = (inputWidth - kW) / dW + 1;
+ long outputHeight = (inputHeight - kH) / dH + 1;
+
+ if (input->nDimension == 3)
{
- /*THTensor_(select)(outn,output,0,i);*/
- /*TH_TENSOR_APPLY(real,outn, *outn_data = bias_data[i];);*/
- real *ptr_output = output_data + i*outputWidth*outputHeight;
- long j;
- for(j = 0; j < outputWidth*outputHeight; j++)
- ptr_output[j] = bias_data[i];
- }
- /*THTensor_(free)(outn);*/
-
- /* do convolutions */
- THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "V","X");
- }
- else
- {
- THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth);
+ long i;
+ real* bias_data;
+ real* output_data;
- real* bias_data = THTensor_(data)(bias);
- real* output_data = THTensor_(data)(output);
+ THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
+ /* add bias */
+ bias_data = THTensor_(data)(bias);
+ output_data = THTensor_(data)(output);
- long p;
-#pragma omp parallel for private(p)
- for (p=0; p<input->size[0]; p++)
- {
- /* BIAS */
- long i;
+#pragma omp parallel for private(i)
for (i=0; i<bias->size[0]; i++)
{
- real *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight;
+ /*THTensor_(select)(outn,output,0,i);*/
+ /*TH_TENSOR_APPLY(real,outn, *outn_data = bias_data[i];);*/
+ real *ptr_output = output_data + i*outputWidth*outputHeight;
long j;
for(j = 0; j < outputWidth*outputHeight; j++)
ptr_output[j] = bias_data[i];
}
+ /*THTensor_(free)(outn);*/
+
+ /* do convolutions */
+ THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "V","X");
}
+ else
+ {
+ real* bias_data;
+ real* output_data;
+ long p;
- /* do convolutions */
- THTensor_(conv2Dmm)(output, 1.0, 1.0, input, weight, dH, dW, "V","X");
+ THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth);
+
+ bias_data = THTensor_(data)(bias);
+ output_data = THTensor_(data)(output);
+
+#pragma omp parallel for private(p)
+ for (p=0; p<input->size[0]; p++)
+ {
+ /* BIAS */
+ long i;
+ for (i=0; i<bias->size[0]; i++)
+ {
+ real *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight;
+ long j;
+ for(j = 0; j < outputWidth*outputHeight; j++)
+ ptr_output[j] = bias_data[i];
+ }
+ }
+
+ /* do convolutions */
+ THTensor_(conv2Dmm)(output, 1.0, 1.0, input, weight, dH, dW, "V","X");
+ }
}
return 1;
}
@@ -92,10 +101,12 @@ static int nn_(SpatialConvolution_updateGradInput)(lua_State *L)
THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
+ THTensor *tweight;
+
THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );
/* gradient to input */
- THTensor *tweight = THTensor_(newTranspose)(weight,0,1);
+ tweight = THTensor_(newTranspose)(weight,0,1);
if (input->nDimension == 3)
{
@@ -122,11 +133,15 @@ static int nn_(SpatialConvolution_accGradParameters)(lua_State *L)
THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);
- THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );
-
int dimw = 2;
int dimh = 1;
+ real *gradBias_data;
+ real *gradOutput_data;
+ long noutSlice;
+
+ THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );
+
if (input->nDimension == 4)
{
dimw++;
@@ -134,9 +149,9 @@ static int nn_(SpatialConvolution_accGradParameters)(lua_State *L)
}
/* gradient to bias */
- real *gradBias_data = THTensor_(data)(gradBias);
- real *gradOutput_data = THTensor_(data)(gradOutput);
- long noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw];
+ gradBias_data = THTensor_(data)(gradBias);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw];
/*THTensor* gradOutSlice = THTensor_(new)();*/
if (input->nDimension == 3)
diff --git a/generic/SpatialConvolutionMM.c b/generic/SpatialConvolutionMM.c
index a8014f7..d4fd953 100644
--- a/generic/SpatialConvolutionMM.c
+++ b/generic/SpatialConvolutionMM.c
@@ -61,12 +61,13 @@ static void nn_(SpatialConvolutionMM_updateOutput_frame)(THTensor *input, THTens
long nOutputPlane, long outputWidth, long outputHeight)
{
long i;
+ THTensor *output2d;
nn_(unfolded_copy)(finput, input, kW, kH, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight);
- THTensor *output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset,
- nOutputPlane, -1,
- outputHeight*outputWidth, -1);
+ output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset,
+ nOutputPlane, -1,
+ outputHeight*outputWidth, -1);
for(i = 0; i < nOutputPlane; i++)
THVector_(fill)(output->storage->data+output->storageOffset+output->stride[0]*i, THTensor_(get1d)(bias, i), outputHeight*outputWidth);
@@ -87,23 +88,31 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L)
THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
- luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
-
int dimf = 0;
int dimw = 2;
int dimh = 1;
+
+ long nInputPlane;
+ long inputWidth;
+ long inputHeight;
+ long nOutputPlane;
+ long outputWidth;
+ long outputHeight;
+
+ luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
+
if (input->nDimension == 4) {
dimf++;
dimw++;
dimh++;
}
- long nInputPlane = input->size[dimf];
- long inputWidth = input->size[dimw];
- long inputHeight = input->size[dimh];
- long nOutputPlane = weight->size[0];
- long outputWidth = (inputWidth - kW) + 1;
- long outputHeight = (inputHeight - kH) + 1;
+ nInputPlane = input->size[dimf];
+ inputWidth = input->size[dimw];
+ inputHeight = input->size[dimh];
+ nOutputPlane = weight->size[0];
+ outputWidth = (inputWidth - kW) + 1;
+ outputHeight = (inputHeight - kH) + 1;
if(input->nDimension == 3)
{
@@ -126,7 +135,7 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L)
THStorage_(clearFlag)(input->storage, TH_STORAGE_REFCOUNTED);
THStorage_(clearFlag)(output->storage, TH_STORAGE_REFCOUNTED);
THStorage_(clearFlag)(finput->storage, TH_STORAGE_REFCOUNTED);
-// mkl_set_num_threads(1);
+
#pragma omp parallel for private(t)
for(t = 0; t < T; t++)
{
@@ -147,7 +156,6 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L)
THStorage_(setFlag)(output->storage, TH_STORAGE_REFCOUNTED);
THStorage_(setFlag)(finput->storage, TH_STORAGE_REFCOUNTED);
}
-// mkl_set_num_threads(4);
return 1;
}
@@ -227,15 +235,15 @@ static void nn_(SpatialConvolutionMM_accGradParameters_frame)(THTensor *gradOutp
real scale)
{
long i;
-
+ THTensor *gradOutputPlane = THTensor_(new)();
THTensor *gradOutput2d = THTensor_(newWithStorage2d)(gradOutput->storage, gradOutput->storageOffset,
gradOutput->size[0], -1,
gradOutput->size[1]*gradOutput->size[2], -1);
+
THTensor_(transpose)(finput, finput, 0, 1);
THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, finput);
THTensor_(transpose)(finput, finput, 0, 1);
- THTensor *gradOutputPlane = THTensor_(new)();
for(i = 0; i < gradBias->size[0]; i++)
{
long k;
diff --git a/generic/SpatialConvolutionMap.c b/generic/SpatialConvolutionMap.c
index a1d20bc..32c75e5 100644
--- a/generic/SpatialConvolutionMap.c
+++ b/generic/SpatialConvolutionMap.c
@@ -17,6 +17,24 @@ static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L)
THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
+ real *input_data;
+ real *output_data;
+ real *weight_data;
+ real *bias_data;
+ real *connTable_data;
+
+ long input_h;
+ long input_w;
+ long output_h;
+ long output_w;
+ long weight_h;
+ long weight_w;
+
+ long p;
+ int nweight;
+
+
+
luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected");
luaL_argcheck(L, input->size[0] >= nInputPlane, 2, "invalid number of input planes");
luaL_argcheck(L, input->size[2] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size");
@@ -25,39 +43,37 @@ static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L)
(input->size[1] - kH) / dH + 1,
(input->size[2] - kW) / dW + 1);
- // contiguous
+ /* contiguous */
input = THTensor_(newContiguous)(input);
output = THTensor_(newContiguous)(output);
- // get raw pointers
- real *input_data = THTensor_(data)(input);
- real *output_data = THTensor_(data)(output);
- real *weight_data = THTensor_(data)(weight);
- real *bias_data = THTensor_(data)(bias);
- real *connTable_data = THTensor_(data)(connTable);
-
- // and dims
- long input_h = input->size[1];
- long input_w = input->size[2];
- long output_h = output->size[1];
- long output_w = output->size[2];
- long weight_h = weight->size[1];
- long weight_w = weight->size[2];
+ /* get raw pointers */
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ weight_data = THTensor_(data)(weight);
+ bias_data = THTensor_(data)(bias);
+ connTable_data = THTensor_(data)(connTable);
+
+ /* and dims */
+ input_h = input->size[1];
+ input_w = input->size[2];
+ output_h = output->size[1];
+ output_w = output->size[2];
+ weight_h = weight->size[1];
+ weight_w = weight->size[2];
- long p;
#pragma omp parallel for private(p)
for (p = 0; p < nOutputPlane; p++) {
- // add bias
+ /* add bias */
real *ptr_output = output_data + p*output_w*output_h;
- long j;
+ long j,k;
for(j = 0; j < output_h*output_w; j++)
ptr_output[j] = bias_data[p];
- // convolve all maps
- int nweight = connTable->size[0];
- long k;
+ /* convolve all maps */
+ nweight = connTable->size[0];
for (k = 0; k < nweight; k++) {
- // get offsets for input/output
+ /* get offsets for input/output */
int o = (int)connTable_data[k*2+1]-1;
int i = (int)connTable_data[k*2+0]-1;
@@ -72,7 +88,7 @@ static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L)
}
}
- // clean up
+ /* clean up */
THTensor_(free)(input);
THTensor_(free)(output);
@@ -91,34 +107,47 @@ static int nn_(SpatialConvolutionMap_updateGradInput)(lua_State *L)
THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
- // contiguous
+ real *gradInput_data;
+ real *gradOutput_data;
+ real *weight_data;
+ real *connTable_data;
+
+ long input_h;
+ long input_w;
+ long output_h;
+ long output_w;
+ long weight_h;
+ long weight_w;
+
+ long p;
+
+ /* contiguous */
gradInput = THTensor_(newContiguous)(gradInput);
gradOutput = THTensor_(newContiguous)(gradOutput);
- // Resize/Zero
+ /* Resize/Zero */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
- // get raw pointers
- real *gradInput_data = THTensor_(data)(gradInput);
- real *gradOutput_data = THTensor_(data)(gradOutput);
- real *weight_data = THTensor_(data)(weight);
- real *connTable_data = THTensor_(data)(connTable);
+ /* get raw pointers */
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ weight_data = THTensor_(data)(weight);
+ connTable_data = THTensor_(data)(connTable);
- // and dims
- long input_h = input->size[1];
- long input_w = input->size[2];
- long output_h = gradOutput->size[1];
- long output_w = gradOutput->size[2];
- long weight_h = weight->size[1];
- long weight_w = weight->size[2];
+ /* and dims */
+ input_h = input->size[1];
+ input_w = input->size[2];
+ output_h = gradOutput->size[1];
+ output_w = gradOutput->size[2];
+ weight_h = weight->size[1];
+ weight_w = weight->size[2];
- long p;
#pragma omp parallel for private(p)
for(p = 0; p < nInputPlane; p++)
{
long k;
- // backward all
+ /* backward all */
int nkernel = connTable->size[0];
for(k = 0; k < nkernel; k++)
{
@@ -126,7 +155,7 @@ static int nn_(SpatialConvolutionMap_updateGradInput)(lua_State *L)
int i = (int)connTable_data[k*2+0]-1;
if (i == p)
{
- // gradient to input
+ /* gradient to input */
THTensor_(fullConv2Dptr)(gradInput_data + i*input_w*input_h,
1.0,
gradOutput_data + o*output_w*output_h, output_h, output_w,
@@ -136,7 +165,7 @@ static int nn_(SpatialConvolutionMap_updateGradInput)(lua_State *L)
}
}
- // clean up
+ /* clean up */
THTensor_(free)(gradInput);
THTensor_(free)(gradOutput);
@@ -157,26 +186,41 @@ static int nn_(SpatialConvolutionMap_accGradParameters)(lua_State *L)
THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);
- // contiguous
+ real *input_data;
+ real *gradOutput_data;
+ real *gradWeight_data;
+ real *gradBias_data;
+
+ /* and dims */
+ long input_h;
+ long input_w;
+ long output_h;
+ long output_w;
+ long weight_h;
+ long weight_w;
+
+ long k;
+ int nkernel;
+
+ /* contiguous */
input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput);
- // get raw pointers
- real *input_data = THTensor_(data)(input);
- real *gradOutput_data = THTensor_(data)(gradOutput);
- real *gradWeight_data = THTensor_(data)(gradWeight);
- real *gradBias_data = THTensor_(data)(gradBias);
-
- // and dims
- long input_h = input->size[1];
- long input_w = input->size[2];
- long output_h = gradOutput->size[1];
- long output_w = gradOutput->size[2];
- long weight_h = weight->size[1];
- long weight_w = weight->size[2];
-
- // gradients wrt bias
- long k;
+ /* get raw pointers */
+ input_data = THTensor_(data)(input);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ gradWeight_data = THTensor_(data)(gradWeight);
+ gradBias_data = THTensor_(data)(gradBias);
+
+ /* and dims */
+ input_h = input->size[1];
+ input_w = input->size[2];
+ output_h = gradOutput->size[1];
+ output_w = gradOutput->size[2];
+ weight_h = weight->size[1];
+ weight_w = weight->size[2];
+
+ /* gradients wrt bias */
#pragma omp parallel for private(k)
for(k = 0; k < nOutputPlane; k++) {
real *ptr_gradOutput = gradOutput_data + k*output_w*output_h;
@@ -185,15 +229,15 @@ static int nn_(SpatialConvolutionMap_accGradParameters)(lua_State *L)
gradBias_data[k] += scale*ptr_gradOutput[l];
}
- // gradients wrt weight
- int nkernel = connTable->size[0];
+ /* gradients wrt weight */
+ nkernel = connTable->size[0];
#pragma omp parallel for private(k)
for(k = 0; k < nkernel; k++)
{
int o = (int)THTensor_(get2d)(connTable,k,1)-1;
int i = (int)THTensor_(get2d)(connTable,k,0)-1;
- // gradient to kernel
+ /* gradient to kernel */
THTensor_(validXCorr2DRevptr)(gradWeight_data + k*weight_w*weight_h,
scale,
input_data + i*input_w*input_h, input_h, input_w,
@@ -201,7 +245,7 @@ static int nn_(SpatialConvolutionMap_accGradParameters)(lua_State *L)
dH, dW);
}
- // clean up
+ /* clean up */
THTensor_(free)(input);
THTensor_(free)(gradOutput);
return 0;
diff --git a/generic/SpatialFullConvolution.c b/generic/SpatialFullConvolution.c
index cb2e340..3b55297 100644
--- a/generic/SpatialFullConvolution.c
+++ b/generic/SpatialFullConvolution.c
@@ -12,69 +12,80 @@ static int nn_(SpatialFullConvolution_updateOutput)(lua_State *L)
THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
- luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
int dimw = 2;
int dimh = 1;
+
+ luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
if (input->nDimension == 4) {
dimw++;
dimh++;
}
- long nOutputPlane = weight->size[1];
- long kW = weight->size[3];
- long kH = weight->size[2];
- long inputWidth = input->size[dimw];
- long inputHeight = input->size[dimh];
- long outputWidth = (inputWidth - 1) * dW + kW;
- long outputHeight = (inputHeight - 1) * dH + kH;
-
- if (input->nDimension == 3)
- {
- THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
- /* add bias */
- long i;
- real* bias_data = THTensor_(data)(bias);
- real* output_data = THTensor_(data)(output);
-#pragma omp parallel for private(i)
- for (i=0; i<bias->size[0]; i++)
- {
- /*THTensor_(select)(outn,output,0,i);*/
- /*TH_TENSOR_APPLY(real,outn, *outn_data = bias_data[i];);*/
- real *ptr_output = output_data + i*outputWidth*outputHeight;
- long j;
- for(j = 0; j < outputWidth*outputHeight; j++)
- ptr_output[j] = bias_data[i];
- }
-
- /* do convolutions */
- THTensor *tweight = THTensor_(newTranspose)(weight,0,1);
- THTensor_(conv2Dmv)(output, 1.0, 1.0, input, tweight, dH, dW, "F", "C");
- THTensor_(free)(tweight);
- }
- else
{
- THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth);
- real* bias_data = THTensor_(data)(bias);
- real* output_data = THTensor_(data)(output);
-
- long p;
-#pragma omp parallel for private(p)
- for (p=0; p<input->size[0]; p++)
+ long nOutputPlane = weight->size[1];
+ long kW = weight->size[3];
+ long kH = weight->size[2];
+ long inputWidth = input->size[dimw];
+ long inputHeight = input->size[dimh];
+ long outputWidth = (inputWidth - 1) * dW + kW;
+ long outputHeight = (inputHeight - 1) * dH + kH;
+
+ if (input->nDimension == 3)
{
- /* BIAS */
long i;
+ real* bias_data;
+ real* output_data;
+
+ THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth);
+ /* add bias */
+ bias_data = THTensor_(data)(bias);
+ output_data = THTensor_(data)(output);
+#pragma omp parallel for private(i)
for (i=0; i<bias->size[0]; i++)
{
- real *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight;
+ real *ptr_output = output_data + i*outputWidth*outputHeight;
long j;
for(j = 0; j < outputWidth*outputHeight; j++)
ptr_output[j] = bias_data[i];
}
+
+ /* do convolutions */
+ {
+ THTensor *tweight = THTensor_(newTranspose)(weight,0,1);
+ THTensor_(conv2Dmv)(output, 1.0, 1.0, input, tweight, dH, dW, "F", "C");
+ THTensor_(free)(tweight);
+ }
+ }
+ else
+ {
+ real* bias_data;
+ real* output_data;
+ long p;
+
+ THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth);
+ bias_data = THTensor_(data)(bias);
+ output_data = THTensor_(data)(output);
+
+#pragma omp parallel for private(p)
+ for (p=0; p<input->size[0]; p++)
+ {
+ /* BIAS */
+ long i;
+ for (i=0; i<bias->size[0]; i++)
+ {
+ real *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight;
+ long j;
+ for(j = 0; j < outputWidth*outputHeight; j++)
+ ptr_output[j] = bias_data[i];
+ }
+ }
+ /* do convolutions */
+ {
+ THTensor *tweight = THTensor_(newTranspose)(weight,0,1);
+ THTensor_(conv2Dmm)(output, 1.0, 1.0, input, tweight, dH, dW, "F", "C");
+ THTensor_(free)(tweight);
+ }
}
- /* do convolutions */
- THTensor *tweight = THTensor_(newTranspose)(weight,0,1);
- THTensor_(conv2Dmm)(output, 1.0, 1.0, input, tweight, dH, dW, "F", "C");
- THTensor_(free)(tweight);
}
return 1;
}
@@ -120,20 +131,26 @@ static int nn_(SpatialFullConvolution_accGradParameters)(lua_State *L)
THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);
long nOutputPlane = weight->size[1];
- THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );
int dimw = 2;
int dimh = 1;
+ real *gradBias_data;
+ real *gradOutput_data;
+ long noutSlice;
+
+ THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" );
+
+
if (input->nDimension == 4)
{
dimw++;
dimh++;
}
/* gradient to bias */
- real *gradBias_data = THTensor_(data)(gradBias);
- real *gradOutput_data = THTensor_(data)(gradOutput);
- long noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw];
+ gradBias_data = THTensor_(data)(gradBias);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw];
/*THTensor* gradOutSlice = THTensor_(new)();*/
if (input->nDimension == 3)
diff --git a/generic/SpatialFullConvolutionMap.c b/generic/SpatialFullConvolutionMap.c
index 8a5d9df..9d5cff2 100644
--- a/generic/SpatialFullConvolutionMap.c
+++ b/generic/SpatialFullConvolutionMap.c
@@ -18,6 +18,21 @@ static int nn_(SpatialFullConvolutionMap_updateOutput)(lua_State *L)
THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor);
THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
+ real *input_data;
+ real *output_data;
+ real *weight_data;
+ real *bias_data;
+ real *connTable_data;
+
+ long input_h;
+ long input_w;
+ long output_h;
+ long output_w;
+ long weight_h;
+ long weight_w;
+
+ long p;
+
luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected");
luaL_argcheck(L, input->size[0] >= nInputPlane, 2, "invalid number of input planes");
@@ -26,39 +41,40 @@ static int nn_(SpatialFullConvolutionMap_updateOutput)(lua_State *L)
(input->size[1] - 1) * dH + kH,
(input->size[2] - 1) * dW + kW);
- // contiguous
+ /* contiguous */
input = THTensor_(newContiguous)(input);
output = THTensor_(newContiguous)(output);
- // get raw pointers
- real *input_data = THTensor_(data)(input);
- real *output_data = THTensor_(data)(output);
- real *weight_data = THTensor_(data)(weight);
- real *bias_data = THTensor_(data)(bias);
- real *connTable_data = THTensor_(data)(connTable);
-
- // and dims
- long input_h = input->size[1];
- long input_w = input->size[2];
- long output_h = output->size[1];
- long output_w = output->size[2];
- long weight_h = weight->size[1];
- long weight_w = weight->size[2];
+ /* get raw pointers */
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ weight_data = THTensor_(data)(weight);
+ bias_data = THTensor_(data)(bias);
+ connTable_data = THTensor_(data)(connTable);
+
+ /* and dims */
+ input_h = input->size[1];
+ input_w = input->size[2];
+ output_h = output->size[1];
+ output_w = output->size[2];
+ weight_h = weight->size[1];
+ weight_w = weight->size[2];
- long p;
#pragma omp parallel for private(p)
for (p = 0; p < nOutputPlane; p++) {
- // add bias
+ /* add bias */
real *ptr_output = output_data + p*output_w*output_h;
long j;
+ int nweight;
+ long k;
+
for(j = 0; j < output_h*output_w; j++)
ptr_output[j] = bias_data[p];
- // convolve all maps
- int nweight = connTable->size[0];
- long k;
+ /* convolve all maps */
+ nweight = connTable->size[0];
for (k = 0; k < nweight; k++) {
- // get offsets for input/output
+ /* get offsets for input/output */
int o = (int)connTable_data[k*2+1]-1;
int i = (int)connTable_data[k*2+0]-1;
@@ -73,7 +89,7 @@ static int nn_(SpatialFullConvolutionMap_updateOutput)(lua_State *L)
}
}
- // clean up
+ /* clean up */
THTensor_(free)(input);
THTensor_(free)(output);
@@ -92,34 +108,47 @@ static int nn_(SpatialFullConvolutionMap_updateGradInput)(lua_State *L)
THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
- // contiguous
+ real *gradInput_data;
+ real *gradOutput_data;
+ real *weight_data;
+ real *connTable_data;
+
+ long input_h;
+ long input_w;
+ long output_h;
+ long output_w;
+ long weight_h;
+ long weight_w;
+
+ long p;
+
+ /* contiguous */
gradInput = THTensor_(newContiguous)(gradInput);
gradOutput = THTensor_(newContiguous)(gradOutput);
- // Resize/Zero
+ /* Resize/Zero */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
- // get raw pointers
- real *gradInput_data = THTensor_(data)(gradInput);
- real *gradOutput_data = THTensor_(data)(gradOutput);
- real *weight_data = THTensor_(data)(weight);
- real *connTable_data = THTensor_(data)(connTable);
+ /* get raw pointers */
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ weight_data = THTensor_(data)(weight);
+ connTable_data = THTensor_(data)(connTable);
- // and dims
- long input_h = input->size[1];
- long input_w = input->size[2];
- long output_h = gradOutput->size[1];
- long output_w = gradOutput->size[2];
- long weight_h = weight->size[1];
- long weight_w = weight->size[2];
+ /* and dims */
+ input_h = input->size[1];
+ input_w = input->size[2];
+ output_h = gradOutput->size[1];
+ output_w = gradOutput->size[2];
+ weight_h = weight->size[1];
+ weight_w = weight->size[2];
- long p;
#pragma omp parallel for private(p)
for(p = 0; p < nInputPlane; p++)
{
long k;
- // backward all
+ /* backward all */
int nkernel = connTable->size[0];
for(k = 0; k < nkernel; k++)
{
@@ -127,7 +156,7 @@ static int nn_(SpatialFullConvolutionMap_updateGradInput)(lua_State *L)
int i = (int)connTable_data[k*2+0]-1;
if (i == p)
{
- // gradient to input
+ /* gradient to input */
THTensor_(validXCorr2Dptr)(gradInput_data + i*input_w*input_h,
1.0,
gradOutput_data + o*output_w*output_h, output_h, output_w,
@@ -137,7 +166,7 @@ static int nn_(SpatialFullConvolutionMap_updateGradInput)(lua_State *L)
}
}
- // clean up
+ /* clean up */
THTensor_(free)(gradInput);
THTensor_(free)(gradOutput);
@@ -158,26 +187,40 @@ static int nn_(SpatialFullConvolutionMap_accGradParameters)(lua_State *L)
THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);
- // contiguous
+ real *input_data;
+ real *gradOutput_data;
+ real *gradWeight_data;
+ real *gradBias_data;
+
+ long input_h;
+ long input_w;
+ long output_h;
+ long output_w;
+ long weight_h;
+ long weight_w;
+
+ long k;
+ int nkernel;
+
+ /* contiguous */
input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput);
- // get raw pointers
- real *input_data = THTensor_(data)(input);
- real *gradOutput_data = THTensor_(data)(gradOutput);
- real *gradWeight_data = THTensor_(data)(gradWeight);
- real *gradBias_data = THTensor_(data)(gradBias);
-
- // and dims
- long input_h = input->size[1];
- long input_w = input->size[2];
- long output_h = gradOutput->size[1];
- long output_w = gradOutput->size[2];
- long weight_h = weight->size[1];
- long weight_w = weight->size[2];
-
- // gradients wrt bias
- long k;
+ /* get raw pointers */
+ input_data = THTensor_(data)(input);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ gradWeight_data = THTensor_(data)(gradWeight);
+ gradBias_data = THTensor_(data)(gradBias);
+
+ /* and dims */
+ input_h = input->size[1];
+ input_w = input->size[2];
+ output_h = gradOutput->size[1];
+ output_w = gradOutput->size[2];
+ weight_h = weight->size[1];
+ weight_w = weight->size[2];
+
+ /* gradients wrt bias */
#pragma omp parallel for private(k)
for(k = 0; k < nOutputPlane; k++) {
real *ptr_gradOutput = gradOutput_data + k*output_w*output_h;
@@ -186,15 +229,15 @@ static int nn_(SpatialFullConvolutionMap_accGradParameters)(lua_State *L)
gradBias_data[k] += scale*ptr_gradOutput[l];
}
- // gradients wrt weight
- int nkernel = connTable->size[0];
+ /* gradients wrt weight */
+ nkernel = connTable->size[0];
#pragma omp parallel for private(k)
for(k = 0; k < nkernel; k++)
{
int o = (int)THTensor_(get2d)(connTable,k,1)-1;
int i = (int)THTensor_(get2d)(connTable,k,0)-1;
- // gradient to kernel
+ /* gradient to kernel */
THTensor_(validXCorr2DRevptr)(gradWeight_data + k*weight_w*weight_h,
scale,
gradOutput_data + o*output_w*output_h, output_h, output_w,
@@ -202,7 +245,7 @@ static int nn_(SpatialFullConvolutionMap_accGradParameters)(lua_State *L)
dH, dW);
}
- // clean up
+ /* clean up */
THTensor_(free)(input);
THTensor_(free)(gradOutput);
return 0;
diff --git a/generic/SpatialMaxPooling.c b/generic/SpatialMaxPooling.c
index 7faa0ee..8dd04c9 100644
--- a/generic/SpatialMaxPooling.c
+++ b/generic/SpatialMaxPooling.c
@@ -13,19 +13,19 @@ static void nn_(SpatialMaxPooling_updateOutput_frame)(real *input_p, real *outpu
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
{
- // loop over output
+ /* loop over output */
long i, j;
for(i = 0; i < oheight; i++)
{
for(j = 0; j < owidth; j++)
{
- // local pointers
+ /* local pointers */
real *ip = input_p + k*iwidth*iheight + i*iwidth*dH + j*dW;
real *op = output_p + k*owidth*oheight + i*owidth + j;
real *indyp = indy_p + k*owidth*oheight + i*owidth + j;
real *indxp = indx_p + k*owidth*oheight + i*owidth + j;
- // compute local max:
+ /* compute local max: */
long maxindex = -1;
real maxval = -THInf;
long tcntr = 0;
@@ -44,10 +44,10 @@ static void nn_(SpatialMaxPooling_updateOutput_frame)(real *input_p, real *outpu
}
}
- // set output to local max
+ /* set output to local max */
*op = maxval;
- // store location of max (x,y)
+ /* store location of max (x,y) */
*indyp = (int)(maxindex / kW)+1;
*indxp = (maxindex % kW) +1;
}
@@ -64,11 +64,21 @@ static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L)
int dH = luaT_getfieldcheckint(L, 1, "dH");
THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
-
- luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected");
int dimw = 2;
int dimh = 1;
long nbatch = 1;
+ long nslices;
+ long iheight;
+ long iwidth;
+ long oheight;
+ long owidth;
+ real *input_data;
+ real *output_data;
+ real *indices_data;
+
+
+ luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected");
+
if (input->nDimension == 4)
{
nbatch = input->size[0];
@@ -77,26 +87,26 @@ static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L)
}
luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH, 2, "input image smaller than kernel size");
- // sizes
- long nslices = input->size[dimh-1];
- long iheight = input->size[dimh];
- long iwidth = input->size[dimw];
- long oheight = (iheight - kH) / dH + 1;
- long owidth = (iwidth - kW) / dW + 1;
+ /* sizes */
+ nslices = input->size[dimh-1];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ oheight = (iheight - kH) / dH + 1;
+ owidth = (iwidth - kW) / dW + 1;
- // get contiguous input
+ /* get contiguous input */
input = THTensor_(newContiguous)(input);
- // resize output
+ /* resize output */
if (input->nDimension == 3)
{
THTensor_(resize3d)(output, nslices, oheight, owidth);
- // indices will contain i,j locations for each output point
+ /* indices will contain i,j locations for each output point */
THTensor_(resize4d)(indices, 2, nslices, oheight, owidth);
- real *input_data = THTensor_(data)(input);
- real *output_data = THTensor_(data)(output);
- real *indices_data = THTensor_(data)(indices);
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THTensor_(data)(indices);
nn_(SpatialMaxPooling_updateOutput_frame)(input_data, output_data,
indices_data+nslices*owidth*oheight, indices_data,
@@ -107,15 +117,16 @@ static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L)
}
else
{
+ long p;
+
THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
- // indices will contain i,j locations for each output point
+ /* indices will contain i,j locations for each output point */
THTensor_(resize5d)(indices, 2, nbatch, nslices, oheight, owidth);
- real *input_data = THTensor_(data)(input);
- real *output_data = THTensor_(data)(output);
- real *indices_data = THTensor_(data)(indices);
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THTensor_(data)(indices);
- long p;
#pragma omp parallel for private(p)
for (p = 0; p < nbatch; p++)
{
@@ -128,7 +139,7 @@ static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L)
}
}
- // cleanup
+ /* cleanup */
THTensor_(free)(input);
return 1;
}
@@ -149,17 +160,17 @@ static void nn_(SpatialMaxPooling_updateGradInput_frame)(real *gradInput_p, real
real *indx_p_k = indx_p + k*owidth*oheight;
real *indy_p_k = indy_p + k*owidth*oheight;
- // calculate max points
+ /* calculate max points */
long i, j;
for(i = 0; i < oheight; i++)
{
for(j = 0; j < owidth; j++)
{
- // retrieve position of max
+ /* retrieve position of max */
long maxi = indy_p_k[i*owidth + j] - 1 + i*dH;
long maxj = indx_p_k[i*owidth + j] - 1 + j*dW;
- // update gradient
+ /* update gradient */
gradInput_p_k[maxi*iwidth + maxj] += gradOutput_p_k[i*owidth + j];
}
}
@@ -174,36 +185,44 @@ static int nn_(SpatialMaxPooling_updateGradInput)(lua_State *L)
int dH = luaT_getfieldcheckint(L, 1, "dH");
THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
+ int dimw = 2;
+ int dimh = 1;
+ long nbatch = 1;
+ int nslices;
+ int iheight;
+ int iwidth;
+ int oheight;
+ int owidth;
+ real *gradInput_data;
+ real *gradOutput_data;
+ real *indices_data;
- // get contiguous gradOutput
+ /* get contiguous gradOutput */
gradOutput = THTensor_(newContiguous)(gradOutput);
- // resize
+ /* resize */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
- int dimw = 2;
- int dimh = 1;
- long nbatch = 1;
if (input->nDimension == 4) {
nbatch = input->size[0];
dimw++;
dimh++;
}
- // sizes
- int nslices = input->size[dimh-1];
- int iheight = input->size[dimh];
- int iwidth = input->size[dimw];
- int oheight = gradOutput->size[dimh];
- int owidth = gradOutput->size[dimw];
+ /* sizes */
+ nslices = input->size[dimh-1];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ oheight = gradOutput->size[dimh];
+ owidth = gradOutput->size[dimw];
- // get raw pointers
- real *gradInput_data = THTensor_(data)(gradInput);
- real *gradOutput_data = THTensor_(data)(gradOutput);
- real *indices_data = THTensor_(data)(indices);
+ /* get raw pointers */
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ indices_data = THTensor_(data)(indices);
- // backprop
+ /* backprop */
if (input->nDimension == 3)
{
nn_(SpatialMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
@@ -228,7 +247,7 @@ static int nn_(SpatialMaxPooling_updateGradInput)(lua_State *L)
}
}
- // cleanup
+ /* cleanup */
THTensor_(free)(gradOutput);
return 1;
diff --git a/generic/SpatialSubSampling.c b/generic/SpatialSubSampling.c
index ed9c059..912592c 100644
--- a/generic/SpatialSubSampling.c
+++ b/generic/SpatialSubSampling.c
@@ -20,21 +20,29 @@ static int nn_(SpatialSubSampling_updateOutput)(lua_State *L)
real *output_data;
real *input_data;
- luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
-
int dimw = 2;
int dimh = 1;
long nbatch = 1;
+
+ long inputWidth;
+ long inputHeight;
+ long outputWidth;
+ long outputHeight;
+
+ long k;
+
+ luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected");
+
if (input->nDimension == 4) {
nbatch = input->size[0];
dimw++;
dimh++;
}
- long inputWidth = input->size[dimw];
- long inputHeight = input->size[dimh];
- long outputWidth = (inputWidth - kW) / dW + 1;
- long outputHeight = (inputHeight - kH) / dH + 1;
+ inputWidth = input->size[dimw];
+ inputHeight = input->size[dimh];
+ outputWidth = (inputWidth - kW) / dW + 1;
+ outputHeight = (inputHeight - kH) / dH + 1;
luaL_argcheck(L, input->size[dimh-1] == nInputPlane, 2, "invalid number of input planes");
luaL_argcheck(L, inputWidth >= kW && inputHeight >= kH, 2, "input image smaller than kernel size");
@@ -48,7 +56,6 @@ static int nn_(SpatialSubSampling_updateOutput)(lua_State *L)
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
- long k;
#pragma omp parallel for private(k)
for(k = 0; k < nInputPlane; k++)
{
@@ -70,7 +77,7 @@ static int nn_(SpatialSubSampling_updateOutput)(lua_State *L)
{
for(xx = 0; xx < outputWidth; xx++)
{
- // Compute the mean of the input image...
+ /* Compute the mean of the input image... */
real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW;
real sum = 0;
long kx, ky;
@@ -79,9 +86,9 @@ static int nn_(SpatialSubSampling_updateOutput)(lua_State *L)
{
for(kx = 0; kx < kW; kx++)
sum += ptr_input[kx];
- ptr_input += inputWidth; // next input line
+ ptr_input += inputWidth; /* next input line */
}
- // Update output
+ /* Update output */
*ptr_output++ += the_weight*sum;
}
}
@@ -108,20 +115,31 @@ static int nn_(SpatialSubSampling_updateGradInput)(lua_State *L)
int dimw = 2;
int dimh = 1;
long nbatch = 1;
+
+ long inputWidth;
+ long inputHeight;
+ long outputWidth;
+ long outputHeight;
+
+ real *weight_data;
+ real *gradOutput_data;
+ real *input_data, *gradInput_data;
+
+ long k;
+
if (input->nDimension == 4) {
nbatch = input->size[0];
dimw++;
dimh++;
}
- long inputWidth = input->size[dimw];
- long inputHeight = input->size[dimh];
- long outputWidth = (inputWidth - kW) / dW + 1;
- long outputHeight = (inputHeight - kH) / dH + 1;
+ inputWidth = input->size[dimw];
+ inputHeight = input->size[dimh];
+ outputWidth = (inputWidth - kW) / dW + 1;
+ outputHeight = (inputHeight - kH) / dH + 1;
- real *weight_data = THTensor_(data)(weight);
- real *gradOutput_data = THTensor_(data)(gradOutput);
- real *input_data, *gradInput_data;
+ weight_data = THTensor_(data)(weight);
+ gradOutput_data = THTensor_(data)(gradOutput);
input_data = THTensor_(data)(input);
@@ -129,7 +147,6 @@ static int nn_(SpatialSubSampling_updateGradInput)(lua_State *L)
gradInput_data = THTensor_(data)(gradInput);
gradOutput_data = THTensor_(data)(gradOutput);
- long k;
#pragma omp parallel for private(k)
for(k = 0; k < nInputPlane; k++)
{
@@ -184,26 +201,37 @@ static int nn_(SpatialSubSampling_accGradParameters)(lua_State *L)
long nbatch = 1;
long dimw = 2;
long dimh = 1;
+
+ long inputWidth;
+ long inputHeight;
+ long outputWidth;
+ long outputHeight;
+
+ real *gradWeight_data;
+ real *gradBias_data;
+ real *gradOutput_data;
+ real *input_data;
+
+ long k;
+
if (input->nDimension == 4) {
dimw++;
dimh++;
nbatch = input->size[0];
}
- long inputWidth = input->size[dimw];
- long inputHeight = input->size[dimh];
- long outputWidth = (inputWidth - kW) / dW + 1;
- long outputHeight = (inputHeight - kH) / dH + 1;
+ inputWidth = input->size[dimw];
+ inputHeight = input->size[dimh];
+ outputWidth = (inputWidth - kW) / dW + 1;
+ outputHeight = (inputHeight - kH) / dH + 1;
- real *gradWeight_data = THTensor_(data)(gradWeight);
- real *gradBias_data = THTensor_(data)(gradBias);
- real *gradOutput_data = THTensor_(data)(gradOutput);
- real *input_data;
+ gradWeight_data = THTensor_(data)(gradWeight);
+ gradBias_data = THTensor_(data)(gradBias);
+ gradOutput_data = THTensor_(data)(gradOutput);
input = THTensor_(newContiguous)(input);
input_data = THTensor_(data)(input);
- long k;
#pragma omp parallel for private(k)
for(k = 0; k < nInputPlane; k++)
{
@@ -213,9 +241,9 @@ static int nn_(SpatialSubSampling_accGradParameters)(lua_State *L)
real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight;
real sum;
long xx, yy;
+ long i;
sum = 0;
- long i;
for(i = 0; i < outputWidth*outputHeight; i++)
sum += ptr_gradOutput[i];
gradBias_data[k] += scale*sum;
diff --git a/generic/TemporalMaxPooling.c b/generic/TemporalMaxPooling.c
index 0111cb5..3c0384d 100644
--- a/generic/TemporalMaxPooling.c
+++ b/generic/TemporalMaxPooling.c
@@ -10,29 +10,38 @@ static int nn_(TemporalMaxPooling_updateOutput)(lua_State *L)
THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
+ long niframe;
+ long framesize;
+ long noframe;
+
+ real *input_data;
+ real *output_data;
+ real *indices_data;
+
+ long t, y;
+
luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected");
luaL_argcheck(L, input->size[0] >= kW, 2, "input sequence smaller than kernel size");
- // sizes
- long niframe = input->size[0];
- long framesize = input->size[1];
- long noframe = (niframe - kW) / dW + 1;
+ /* sizes */
+ niframe = input->size[0];
+ framesize = input->size[1];
+ noframe = (niframe - kW) / dW + 1;
- // get contiguous input
+ /* get contiguous input */
input = THTensor_(newContiguous)(input);
- // resize output
+ /* resize output */
THTensor_(resize2d)(output, noframe, framesize);
- // indices will contain index locations for each output point
+ /* indices will contain index locations for each output point */
THTensor_(resize2d)(indices, noframe, framesize);
- // get raw pointers
- real *input_data = THTensor_(data)(input);
- real *output_data = THTensor_(data)(output);
- real *indices_data = THTensor_(data)(indices);
+ /* get raw pointers */
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THTensor_(data)(indices);
- long t, x, y;
for(t = 0; t < noframe; t++)
{
real *ip = input_data + t*framesize*dW;
@@ -41,9 +50,10 @@ static int nn_(TemporalMaxPooling_updateOutput)(lua_State *L)
#pragma omp parallel for private(y)
for(y = 0; y < framesize; y++)
{
- // compute local max:
+ /* compute local max: */
long maxindex = -1;
real maxval = -THInf;
+ long x;
for(x = 0; x < kW; x++)
{
real val = ip[x*framesize+y];
@@ -54,13 +64,13 @@ static int nn_(TemporalMaxPooling_updateOutput)(lua_State *L)
}
}
- // set output to local max
+ /* set output to local max */
op[y] = maxval;
xp[y] = (real)maxindex;
}
}
- // cleanup
+ /* cleanup */
THTensor_(free)(input);
return 1;
@@ -74,23 +84,31 @@ static int nn_(TemporalMaxPooling_updateGradInput)(lua_State *L)
THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
- // get contiguous gradOutput
+ int noframe;
+ long framesize;
+
+ real *gradInput_data;
+ real *gradOutput_data;
+ real *indices_data;
+
+ long t, y;
+
+ /* get contiguous gradOutput */
gradOutput = THTensor_(newContiguous)(gradOutput);
- // resize and zero
+ /* resize and zero */
THTensor_(resizeAs)(gradInput, input);
THTensor_(zero)(gradInput);
- // sizes
- int noframe = gradOutput->size[0];
- long framesize = gradOutput->size[1];
+ /* sizes */
+ noframe = gradOutput->size[0];
+ framesize = gradOutput->size[1];
- // get raw pointers
- real *gradInput_data = THTensor_(data)(gradInput);
- real *gradOutput_data = THTensor_(data)(gradOutput);
- real *indices_data = THTensor_(data)(indices);
+ /* get raw pointers */
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ indices_data = THTensor_(data)(indices);
- long t, y;
for(t = 0; t < noframe; t++)
{
real *gip = gradInput_data + t*framesize*dW;
@@ -99,13 +117,13 @@ static int nn_(TemporalMaxPooling_updateGradInput)(lua_State *L)
#pragma omp parallel for private(y)
for(y = 0; y < framesize; y++)
{
- // compute local max:
+ /* compute local max: */
long maxindex = (long)xp[y];
gip[maxindex*framesize+y] += gop[y];
}
}
- // cleanup
+ /* cleanup */
THTensor_(free)(gradOutput);
return 1;
diff --git a/generic/VolumetricConvolution.c b/generic/VolumetricConvolution.c
index 6e0b6d8..feeaf05 100644
--- a/generic/VolumetricConvolution.c
+++ b/generic/VolumetricConvolution.c
@@ -15,30 +15,33 @@ static int nn_(VolumetricConvolution_updateOutput)(lua_State *L)
luaL_argcheck(L, input->nDimension == 4, 2, "4D tensor expected");
- long nOutputPlane = weight->size[0];
- long kT = weight->size[2];
- long kH = weight->size[3];
- long kW = weight->size[4];
- long inputDepth = input->size[1];
- long inputHeight = input->size[2];
- long inputWidth = input->size[3];
- long outputDepth = (inputDepth - kT) / dT + 1;
- long outputWidth = (inputWidth - kW) / dW + 1;
- long outputHeight = (inputHeight - kH) / dH + 1;
-
- THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
-
- /* add bias */
- long i;
- THTensor *outn = THTensor_(new)();
- for (i=0; i<bias->size[0]; i++) {
- THTensor_(select)(outn,output,0,i);
- THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
- }
- THTensor_(free)(outn);
+ {
+ long nOutputPlane = weight->size[0];
+ long kT = weight->size[2];
+ long kH = weight->size[3];
+ long kW = weight->size[4];
+ long inputDepth = input->size[1];
+ long inputHeight = input->size[2];
+ long inputWidth = input->size[3];
+ long outputDepth = (inputDepth - kT) / dT + 1;
+ long outputWidth = (inputWidth - kW) / dW + 1;
+ long outputHeight = (inputHeight - kH) / dH + 1;
+ THTensor *outn = THTensor_(new)();
+ long i;
+
+ THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth);
+
+ /* add bias */
+ for (i=0; i<bias->size[0]; i++) {
+ THTensor_(select)(outn,output,0,i);
+ THTensor_(fill)(outn, THTensor_(get1d)(bias, i));
+ }
- /* do convolutions */
- THTensor_(conv3Dmv)(output, 1.0, 1.0, input, weight, dT, dH, dW, "V", "X");
+ THTensor_(free)(outn);
+
+ /* do convolutions */
+ THTensor_(conv3Dmv)(output, 1.0, 1.0, input, weight, dT, dH, dW, "V", "X");
+ }
return 1;
}
@@ -54,11 +57,12 @@ static int nn_(VolumetricConvolution_updateGradInput)(lua_State *L)
THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor);
THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
+ THTensor *tweight;
THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" );
/* gradient to input */
- THTensor *tweight = THTensor_(newTranspose)(weight,0,1);
+ tweight = THTensor_(newTranspose)(weight,0,1);
THTensor_(conv3Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dT, dH, dW, "F", "C");
THTensor_(free)(tweight);
@@ -77,14 +81,16 @@ static int nn_(VolumetricConvolution_accGradParameters)(lua_State *L)
THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor);
THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor);
-
- THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" );
long k;
+ real *gradBias_data;
+ THTensor* gradOutSlice;
+
+ THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" );
/* gradient to bias */
- real *gradBias_data = THTensor_(data)(gradBias);
- THTensor* gradOutSlice = THTensor_(new)();
+ gradBias_data = THTensor_(data)(gradBias);
+ gradOutSlice = THTensor_(new)();
for(k = 0; k < nOutputPlane; k++)
{
THTensor_(select)(gradOutSlice, gradOutput, 0, k);
diff --git a/generic/VolumetricMaxPooling.c b/generic/VolumetricMaxPooling.c
new file mode 100644
index 0000000..20f9701
--- /dev/null
+++ b/generic/VolumetricMaxPooling.c
@@ -0,0 +1,234 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricMaxPooling.c"
+#else
+
+static void nn_(VolumetricMaxPooling_updateOutput_frame)(real *input_p, real *output_p,
+ real *indx_p, real *indy_p, real *indz_p,
+ long nslices,
+ long itime, long iwidth, long iheight,
+ long otime, long owidth, long oheight,
+ int kT, int kW, int kH, int dT, int dW, int dH)
+{
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ /* loop over output */
+ long i, j, ti;
+ for(ti = 0; ti < otime; ti++)
+ {
+ for(i = 0; i < oheight; i++)
+ {
+ for(j = 0; j < owidth; j++)
+ {
+ /* local pointers */
+ real *ip = input_p + k*itime*iwidth*iheight + ti*iwidth*iheight*dT + i*iwidth*dH + j*dW;
+ real *op = output_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j;
+ real *indzp = indz_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j;
+ real *indyp = indy_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j;
+ real *indxp = indx_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j;
+
+ /* compute local max: */
+ real maxval = -THInf;
+ int x,y,z;
+
+ *indzp = -1;
+ *indyp = -1;
+ *indxp = -1;
+ for(z=0; z < kT; z++)
+ {
+ for(y = 0; y < kH; y++)
+ {
+ for(x = 0; x < kW; x++)
+ {
+ real val = *(ip + z*iwidth*iheight + y*iwidth + x);
+ if (val > maxval)
+ {
+ maxval = val;
+ *indzp = z+1;
+ *indyp = y+1;
+ *indxp = x+1;
+ }
+ }
+ }
+ }
+ /* set output to local max */
+ *op = maxval;
+
+ /* store location of max (x,y) */
+ /**indyp = (int)(maxindex / kW)+1;*/
+ /**indxp = (maxindex % kW) +1;*/
+ }
+ }
+ }
+ }
+}
+
+static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+ int kT = luaT_getfieldcheckint(L, 1, "kT");
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int kH = luaT_getfieldcheckint(L, 1, "kH");
+ int dT = luaT_getfieldcheckint(L, 1, "dT");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
+ long nslices;
+ long itime;
+ long iheight;
+ long iwidth;
+ long otime;
+ long oheight;
+ long owidth;
+ real *input_data;
+ real *output_data;
+ real *indices_data;
+
+
+ luaL_argcheck(L, input->nDimension == 4 , 2, "4D tensor expected");
+ luaL_argcheck(L, input->size[3] >= kW && input->size[2] >= kH && input->size[1] >= kT, 2, "input image smaller than kernel size");
+
+ /* sizes */
+ nslices = input->size[0];
+ itime = input->size[1];
+ iheight = input->size[2];
+ iwidth = input->size[3];
+ otime = (itime - kT) / dT + 1;
+ oheight = (iheight - kH) / dH + 1;
+ owidth = (iwidth - kW) / dW + 1;
+
+ /* get contiguous input */
+ input = THTensor_(newContiguous)(input);
+
+ /* resize output */
+ THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
+ /* indices will contain ti,i,j locations for each output point */
+ THTensor_(resize5d)(indices, 3, nslices, otime, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+ indices_data = THTensor_(data)(indices);
+
+ nn_(VolumetricMaxPooling_updateOutput_frame)(input_data, output_data,
+ indices_data+nslices*otime*owidth*oheight*2,
+ indices_data+nslices*otime*owidth*oheight,
+ indices_data,
+ nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ kT, kW, kH, dT, dW, dH);
+ /* cleanup */
+ THTensor_(free)(input);
+ return 1;
+}
+
+static void nn_(VolumetricMaxPooling_updateGradInput_frame)(real *gradInput_p, real *gradOutput_p,
+ real *indx_p, real *indy_p, real *indz_p,
+ long nslices,
+ long itime, long iwidth, long iheight,
+ long otime, long owidth, long oheight,
+ int dT, int dW, int dH)
+{
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++)
+ {
+ real *gradInput_p_k = gradInput_p + k*itime*iwidth*iheight;
+ real *gradOutput_p_k = gradOutput_p + k*otime*owidth*oheight;
+ real *indx_p_k = indx_p + k*otime*owidth*oheight;
+ real *indy_p_k = indy_p + k*otime*owidth*oheight;
+ real *indz_p_k = indz_p + k*otime*owidth*oheight;
+
+ /* calculate max points */
+ long ti, i, j;
+ for(ti = 0; ti < otime; ti++)
+ {
+ for(i = 0; i < oheight; i++)
+ {
+ for(j = 0; j < owidth; j++)
+ {
+ /* retrieve position of max */
+ long maxti = indz_p_k[ti*oheight*owidth + i*owidth + j] - 1 + ti*dT;
+ long maxi = indy_p_k[ti*oheight*owidth + i*owidth + j] - 1 + i*dH;
+ long maxj = indx_p_k[ti*oheight*owidth + i*owidth + j] - 1 + j*dW;
+
+ /* update gradient */
+ gradInput_p_k[maxti*iheight*iwidth + maxi*iwidth + maxj] += gradOutput_p_k[ti*oheight*owidth + i*owidth + j];
+ }
+ }
+ }
+ }
+}
+
+static int nn_(VolumetricMaxPooling_updateGradInput)(lua_State *L)
+{
+ THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
+ int dT = luaT_getfieldcheckint(L, 1, "dT");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
+ int nslices;
+ int itime;
+ int iheight;
+ int iwidth;
+ int otime;
+ int oheight;
+ int owidth;
+ real *gradInput_data;
+ real *gradOutput_data;
+ real *indices_data;
+
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ /* sizes */
+ nslices = input->size[0];
+ itime = input->size[1];
+ iheight = input->size[2];
+ iwidth = input->size[3];
+ otime = gradOutput->size[1];
+ oheight = gradOutput->size[2];
+ owidth = gradOutput->size[3];
+
+ /* get raw pointers */
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+ indices_data = THTensor_(data)(indices);
+
+ /* backprop */
+ nn_(VolumetricMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
+ indices_data+nslices*otime*owidth*oheight*2,
+ indices_data+nslices*otime*owidth*oheight,
+ indices_data,
+ nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ dT, dW, dH);
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+ return 1;
+}
+
+static const struct luaL_Reg nn_(VolumetricMaxPooling__) [] = {
+ {"VolumetricMaxPooling_updateOutput", nn_(VolumetricMaxPooling_updateOutput)},
+ {"VolumetricMaxPooling_updateGradInput", nn_(VolumetricMaxPooling_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(VolumetricMaxPooling_init)(lua_State *L)
+{
+ luaT_pushmetatable(L, torch_Tensor);
+ luaT_registeratname(L, nn_(VolumetricMaxPooling__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/init.c b/init.c
index 538c572..877faa9 100644
--- a/init.c
+++ b/init.c
@@ -95,6 +95,9 @@
#include "generic/VolumetricConvolution.c"
#include "THGenerateFloatTypes.h"
+#include "generic/VolumetricMaxPooling.c"
+#include "THGenerateFloatTypes.h"
+
#include "generic/MultiMarginCriterion.c"
#include "THGenerateFloatTypes.h"
@@ -142,6 +145,7 @@ int luaopen_libnn(lua_State *L)
nn_FloatSpatialSubSampling_init(L);
nn_FloatSpatialMaxPooling_init(L);
nn_FloatVolumetricConvolution_init(L);
+ nn_FloatVolumetricMaxPooling_init(L);
nn_FloatMultiMarginCriterion_init(L);
nn_FloatMultiLabelMarginCriterion_init(L);
nn_FloatL1Cost_init(L);
@@ -176,6 +180,7 @@ int luaopen_libnn(lua_State *L)
nn_DoubleSpatialSubSampling_init(L);
nn_DoubleSpatialMaxPooling_init(L);
nn_DoubleVolumetricConvolution_init(L);
+ nn_DoubleVolumetricMaxPooling_init(L);
nn_DoubleMultiMarginCriterion_init(L);
nn_DoubleMultiLabelMarginCriterion_init(L);
nn_DoubleL1Cost_init(L);
diff --git a/init.lua b/init.lua
index dfdfc8e..7071298 100644
--- a/init.lua
+++ b/init.lua
@@ -75,6 +75,7 @@ include('SpatialContrastiveNormalization.lua')
include('SpatialZeroPadding.lua')
include('VolumetricConvolution.lua')
+include('VolumetricMaxPooling.lua')
include('ParallelTable.lua')
include('ConcatTable.lua')
diff --git a/test/test.lua b/test/test.lua
index 0147ea3..89db059 100644
--- a/test/test.lua
+++ b/test/test.lua
@@ -387,21 +387,20 @@ function nntest.WeightedEuclidean()
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
end
--- function nntest.WeightedMSECriterion()
+--function nntest.WeightedMSECriterion()
-- local from = math.random(100,200)
-- local input = torch.Tensor(from):zero()
-- local target = torch.randn(from)
-- local weight = torch.randn(from)
-- local cri = nn.WeightedMSECriterion(weight)
-- local module = nn.CriterionModule(cri,target)
-
--- local err = jac.testJacobian(module, input)
+-- local err = jac.testJacobian(module, input)
-- mytester:assertlt(err, precision, 'error on state ')
--- local ferr, berr = jac.testIO(module, input)
+-- local ferr, berr = jac.testIO(module, input)
-- mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
-- mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
--- end
+--end
function nntest.LogSigmoid()
local ini = math.random(10,20)
@@ -1347,10 +1346,10 @@ function nntest.TemporalSubSampling()
end
function nntest.TemporalMaxPooling()
- local from = math.random(1,10)
- local ki = math.random(1,10)
- local si = math.random(1,4)
- local outi = math.random(10,20)
+ local from = math.random(10,10)
+ local ki = math.random(5,10)
+ local si = math.random(1,2)
+ local outi = math.random(50,90)
local ini = (outi-1)*si+ki
local module = nn.TemporalMaxPooling(ki, si)
local input = torch.Tensor(ini, from):zero()
@@ -1411,6 +1410,32 @@ function nntest.VolumetricConvolution()
mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
end
+function nntest.VolumetricMaxPooling()
+ local from = math.random(2,5)
+ local to = from
+ local kt = math.random(3,7)
+ local ki = math.random(3,7)
+ local kj = math.random(3,7)
+ local st = math.random(2,4)
+ local si = math.random(2,4)
+ local sj = math.random(2,4)
+ local outt = math.random(3,7)
+ local outi = math.random(3,7)
+ local outj = math.random(3,7)
+ local int = (outt-1)*st+kt
+ local ini = (outi-1)*si+ki
+ local inj = (outj-1)*sj+kj
+ local module = nn.VolumetricMaxPooling(kt, ki, kj, st, si, sj)
+ local input = torch.Tensor(from, int, inj, ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+end
+
function nntest.Module_getParameters_1()
local n = nn.Sequential()
n:add( nn.Linear(10,10) )