Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/nn.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsoumith <soumith@fb.com>2015-05-28 07:42:59 +0300
committersoumith <soumith@fb.com>2015-05-28 07:42:59 +0300
commitec6953806d02ea189cc9fb5ba83ada05eca7d28c (patch)
tree8a51527d9e30802b97aeed5999b9939f3f62b0ee
parente35f09a1f7ff6a123e841dfae24485999f7ce31d (diff)
Volumetric Average Pooling + doc + unit test, better performance for Volumetric Max Pooling
-rw-r--r--VolumetricAveragePooling.lua34
-rwxr-xr-xdoc/convolution.md12
-rw-r--r--generic/VolumetricAveragePooling.c263
-rw-r--r--generic/VolumetricMaxPooling.c259
-rw-r--r--init.c5
-rw-r--r--init.lua1
-rw-r--r--test.lua143
7 files changed, 519 insertions, 198 deletions
diff --git a/VolumetricAveragePooling.lua b/VolumetricAveragePooling.lua
new file mode 100644
index 0000000..1fc0ec9
--- /dev/null
+++ b/VolumetricAveragePooling.lua
@@ -0,0 +1,34 @@
+local VolumetricAveragePooling, parent = torch.class(
+ 'nn.VolumetricAveragePooling', 'nn.Module')
+
+function VolumetricAveragePooling:__init(kT, kW, kH, dT, dW, dH)
+ parent.__init(self)
+
+ dT = dT or kT
+ dW = dW or kW
+ dH = dH or kH
+
+ self.kT = kT
+ self.kH = kH
+ self.kW = kW
+ self.dT = dT
+ self.dW = dW
+ self.dH = dH
+end
+
+function VolumetricAveragePooling:updateOutput(input)
+ input.nn.VolumetricAveragePooling_updateOutput(self, input)
+ return self.output
+end
+
+function VolumetricAveragePooling:updateGradInput(input, gradOutput)
+ input.nn.VolumetricAveragePooling_updateGradInput(self, input, gradOutput)
+ return self.gradInput
+end
+
+function VolumetricAveragePooling:empty()
+ self.gradInput:resize()
+ self.gradInput:storage():resize(0)
+ self.output:resize()
+ self.output:storage():resize(0)
+end
diff --git a/doc/convolution.md b/doc/convolution.md
index f2380f2..d0a02fa 100755
--- a/doc/convolution.md
+++ b/doc/convolution.md
@@ -23,6 +23,7 @@ a kernel for computing the weighted average in a neighborhood ;
* [Volumetric Modules](#nn.VolumetricModules) apply to inputs with three-dimensional relationships (e.g. videos) :
* [VolumetricConvolution](#nn.VolumetricConvolution) : a 3D convolution over an input video (a sequence of images) ;
* [VolumetricMaxPooling](#nn.VolumetricMaxPooling) : a 3D max-pooling operation over an input video.
+ * [VolumetricAveragePooling](#nn.VolumetricAveragePooling) : a 3D average-pooling operation over an input video.
<a name="nn.TemporalModules"/>
## Temporal Modules ##
@@ -605,3 +606,14 @@ module = nn.VolumetricMaxPooling(kT, kW, kH [, dT, dW, dH])
Applies 3D max-pooling operation in `kTxkWxkH` regions by step size
`dTxdWxdH` steps. The number of output features is equal to the number of
input planes / dT.
+
+<a name="nn.VolumetricAveragePooling"/>
+### VolumetricAveragePooling ###
+
+```lua
+module = nn.VolumetricAveragePooling(kT, kW, kH [, dT, dW, dH])
+```
+
+Applies 3D average-pooling operation in `kTxkWxkH` regions by step size
+`dTxdWxdH` steps. The number of output features is equal to the number of
+input planes / dT.
diff --git a/generic/VolumetricAveragePooling.c b/generic/VolumetricAveragePooling.c
new file mode 100644
index 0000000..28bd0b0
--- /dev/null
+++ b/generic/VolumetricAveragePooling.c
@@ -0,0 +1,263 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricAveragePooling.c"
+#else
+
+static void nn_(VolumetricAveragePooling_updateOutput_frame)(
+ real *input_p, real *output_p, long nslices,
+ long itime, long iwidth, long iheight,
+ long otime, long owidth, long oheight,
+ int kT, int kW, int kH, int dT, int dW, int dH) {
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++) {
+ /* loop over output */
+ long i, j, ti;
+ for(ti = 0; ti < otime; ti++) {
+ for(i = 0; i < oheight; i++) {
+ for(j = 0; j < owidth; j++) {
+ /* local pointers */
+ real *ip = input_p + k * itime * iwidth * iheight
+ + ti * iwidth * iheight * dT + i * iwidth * dH + j * dW;
+ real *op = output_p + k * otime * owidth * oheight
+ + ti * owidth * oheight + i * owidth + j;
+
+ /* compute local sum: */
+ real sum = 0.0;
+ int x,y,z;
+
+ for(z=0; z < kT; z++) {
+ for(y = 0; y < kH; y++) {
+ for(x = 0; x < kW; x++) {
+ sum += *(ip + z * iwidth * iheight + y * iwidth + x);
+ }
+ }
+ }
+
+ /* set output to local max */
+ *op = sum / (kT * kW * kH);
+ }
+ }
+ }
+ }
+}
+
+static int nn_(VolumetricAveragePooling_updateOutput)(lua_State *L) {
+ THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+ int kT = luaT_getfieldcheckint(L, 1, "kT");
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int kH = luaT_getfieldcheckint(L, 1, "kH");
+ int dT = luaT_getfieldcheckint(L, 1, "dT");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
+ long nslices;
+ long itime;
+ long iheight;
+ long iwidth;
+ long otime;
+ long oheight;
+ long owidth;
+ real *input_data;
+ real *output_data;
+
+ luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2,
+ "4D or 5D (batch-mode) tensor expected");
+
+ int dimN = 0;
+ int dimt = 1;
+ int dimh = 2;
+ int dimw = 3;
+
+ if (input->nDimension == 5) {
+ dimN++;
+ dimt++;
+ dimh++;
+ dimw++;
+ }
+
+ luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH &&
+ input->size[dimt] >= kT, 2,
+ "input image smaller than kernel size");
+
+ /* sizes */
+ nslices = input->size[dimN];
+ itime = input->size[dimt];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ otime = (itime - kT) / dT + 1;
+ oheight = (iheight - kH) / dH + 1;
+ owidth = (iwidth - kW) / dW + 1;
+
+ /* get contiguous input */
+ input = THTensor_(newContiguous)(input);
+
+ if (input->nDimension == 4) { /* non-batch mode */
+ /* resize output */
+ THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+ nn_(VolumetricAveragePooling_updateOutput_frame)(input_data, output_data,
+ nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ kT, kW, kH, dT, dW, dH);
+ } else { /* batch mode */
+ long p;
+ long nBatch = input->size[0];
+
+ long istride = nslices * itime * iwidth * iheight;
+ long ostride = nslices * otime * owidth * oheight;
+
+ /* resize output */
+ THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);
+
+ input_data = THTensor_(data)(input);
+ output_data = THTensor_(data)(output);
+
+#pragma omp parallel for private(p)
+ for (p=0; p < nBatch; p++) {
+ nn_(VolumetricAveragePooling_updateOutput_frame)(
+ input_data + p * istride, output_data + p * ostride,
+ nslices, itime, iwidth, iheight, otime, owidth, oheight,
+ kT, kW, kH, dT, dW, dH);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(input);
+ return 1;
+}
+
+static void nn_(VolumetricAveragePooling_updateGradInput_frame)(
+ real *gradInput_p, real *gradOutput_p, long nslices,
+ long itime, long iwidth, long iheight,
+ long otime, long owidth, long oheight,
+ int kT, int kW, int kH, int dT, int dW, int dH) {
+ long k;
+#pragma omp parallel for private(k)
+ for (k = 0; k < nslices; k++) {
+ /* loop over output */
+ long i, j, ti;
+ for(ti = 0; ti < otime; ti++) {
+ for(i = 0; i < oheight; i++) {
+ for(j = 0; j < owidth; j++) {
+ /* local pointers */
+ real *ip = gradInput_p + k * itime * iwidth * iheight
+ + ti * iwidth * iheight * dT + i * iwidth * dH + j * dW;
+ real *op = gradOutput_p + k * otime * owidth * oheight
+ + ti * owidth * oheight + i * owidth + j;
+
+ /* scatter gradients out to footprint: */
+ real val = *op / (kT * kW * kH);
+ int x,y,z;
+ for(z=0; z < kT; z++) {
+ for(y = 0; y < kH; y++) {
+ for(x = 0; x < kW; x++) {
+ *(ip + z * iwidth * iheight + y * iwidth + x) += val;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
+
+static int nn_(VolumetricAveragePooling_updateGradInput)(lua_State *L) {
+ THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+ THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
+ int dT = luaT_getfieldcheckint(L, 1, "dT");
+ int dW = luaT_getfieldcheckint(L, 1, "dW");
+ int dH = luaT_getfieldcheckint(L, 1, "dH");
+ int kT = luaT_getfieldcheckint(L, 1, "kT");
+ int kW = luaT_getfieldcheckint(L, 1, "kW");
+ int kH = luaT_getfieldcheckint(L, 1, "kH");
+ THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput",
+ torch_Tensor);
+ int nslices;
+ int itime;
+ int iheight;
+ int iwidth;
+ int otime;
+ int oheight;
+ int owidth;
+ real *gradInput_data;
+ real *gradOutput_data;
+ real *indices_data;
+
+ int dimN = 0;
+ int dimt = 1;
+ int dimh = 2;
+ int dimw = 3;
+
+ /* get contiguous gradOutput */
+ gradOutput = THTensor_(newContiguous)(gradOutput);
+
+ /* resize */
+ THTensor_(resizeAs)(gradInput, input);
+ THTensor_(zero)(gradInput);
+
+ if (input->nDimension == 5) {
+ dimN++;
+ dimt++;
+ dimh++;
+ dimw++;
+ }
+
+ /* sizes */
+ nslices = input->size[dimN];
+ itime = input->size[dimt];
+ iheight = input->size[dimh];
+ iwidth = input->size[dimw];
+ otime = gradOutput->size[dimt];
+ oheight = gradOutput->size[dimh];
+ owidth = gradOutput->size[dimw];
+
+ /* get raw pointers */
+ gradInput_data = THTensor_(data)(gradInput);
+ gradOutput_data = THTensor_(data)(gradOutput);
+
+ /* backprop */
+ if (input->nDimension == 4) { /* non-batch mode*/
+ nn_(VolumetricAveragePooling_updateGradInput_frame)(
+ gradInput_data, gradOutput_data, nslices,
+ itime, iwidth, iheight, otime, owidth, oheight,
+ kT, kW, kH, dT, dW, dH);
+ } else { /* batch mode */
+ long p;
+ long nBatch = input->size[0];
+
+ long istride = nslices * itime * iwidth * iheight;
+ long ostride = nslices * otime * owidth * oheight;
+
+#pragma omp parallel for private(p)
+ for (p = 0; p < nBatch; p++) {
+ nn_(VolumetricAveragePooling_updateGradInput_frame)(
+ gradInput_data + p * istride, gradOutput_data + p * ostride, nslices,
+ itime, iwidth, iheight, otime, owidth, oheight,
+ kT, kW, kH, dT, dW, dH);
+ }
+ }
+
+ /* cleanup */
+ THTensor_(free)(gradOutput);
+ return 1;
+}
+
+static const struct luaL_Reg nn_(VolumetricAveragePooling__) [] = {
+ {"VolumetricAveragePooling_updateOutput",
+ nn_(VolumetricAveragePooling_updateOutput)},
+ {"VolumetricAveragePooling_updateGradInput",
+ nn_(VolumetricAveragePooling_updateGradInput)},
+ {NULL, NULL}
+};
+
+static void nn_(VolumetricAveragePooling_init)(lua_State *L) {
+ luaT_pushmetatable(L, torch_Tensor);
+ luaT_registeratname(L, nn_(VolumetricAveragePooling__), "nn");
+ lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/VolumetricMaxPooling.c b/generic/VolumetricMaxPooling.c
index 28fd5fe..4114889 100644
--- a/generic/VolumetricMaxPooling.c
+++ b/generic/VolumetricMaxPooling.c
@@ -2,64 +2,55 @@
#define TH_GENERIC_FILE "generic/VolumetricMaxPooling.c"
#else
-static void nn_(VolumetricMaxPooling_updateOutput_frame)(real *input_p, real *output_p,
- real *indx_p, real *indy_p, real *indz_p,
- long nslices,
- long itime, long iwidth, long iheight,
- long otime, long owidth, long oheight,
- int kT, int kW, int kH, int dT, int dW, int dH)
-{
+static void nn_(VolumetricMaxPooling_updateOutput_frame)(
+ real *input_p, real *output_p, real *indz_p,
+ long nslices, long itime, long iwidth, long iheight,
+ long otime, long owidth, long oheight,
+ int kT, int kW, int kH, int dT, int dW, int dH) {
long k;
#pragma omp parallel for private(k)
for (k = 0; k < nslices; k++)
{
/* loop over output */
long i, j, ti;
- for(ti = 0; ti < otime; ti++)
- {
- for(i = 0; i < oheight; i++)
- {
- for(j = 0; j < owidth; j++)
- {
- /* local pointers */
- real *ip = input_p + k*itime*iwidth*iheight + ti*iwidth*iheight*dT + i*iwidth*dH + j*dW;
- real *op = output_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j;
- real *indzp = indz_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j;
- real *indyp = indy_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j;
- real *indxp = indx_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j;
-
- /* compute local max: */
- real maxval = -THInf;
- int x,y,z;
-
- *indzp = -1;
- *indyp = -1;
- *indxp = -1;
- for(z=0; z < kT; z++)
- {
- for(y = 0; y < kH; y++)
- {
- for(x = 0; x < kW; x++)
- {
- real val = *(ip + z*iwidth*iheight + y*iwidth + x);
- if (val > maxval)
- {
- maxval = val;
- *indzp = z+1;
- *indyp = y+1;
- *indxp = x+1;
- }
- }
- }
- }
-
- /* set output to local max */
- *op = maxval;
-
- /* store location of max (x,y) */
- /**indyp = (int)(maxindex / kW)+1;*/
- /**indxp = (maxindex % kW) +1;*/
- }
+ for(ti = 0; ti < otime; ti++) {
+ for(i = 0; i < oheight; i++) {
+ for(j = 0; j < owidth; j++) {
+ /* local pointers */
+ real *ip = input_p + k * itime * iwidth * iheight
+ + ti * iwidth * iheight * dT + i * iwidth * dH + j * dW;
+ real *op = output_p + k * otime * owidth * oheight
+ + ti * owidth * oheight + i * owidth + j;
+ real *indzp = indz_p + k * otime * owidth * oheight
+ + ti * owidth * oheight + i * owidth + j;
+
+ /* compute local max: */
+ real maxval = -THInf;
+ int x,y,z;
+ int mx, my, mz;
+
+ for(z = 0; z < kT; z++) {
+ for(y = 0; y < kH; y++) {
+ for(x = 0; x < kW; x++) {
+ real val = *(ip + z * iwidth * iheight + y * iwidth + x);
+ if (val > maxval) {
+ maxval = val;
+ mz = z;
+ my = y;
+ mx = x;
+ }
+ }
+ }
+ }
+
+ // set max values
+ ((unsigned char*)(indzp))[0] = mz;
+ ((unsigned char*)(indzp))[1] = my;
+ ((unsigned char*)(indzp))[2] = mx;
+ ((unsigned char*)(indzp))[3] = 0;
+ /* set output to local max */
+ *op = maxval;
+ }
}
}
}
@@ -87,7 +78,8 @@ static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L)
real *output_data;
real *indices_data;
- luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch-mode) tensor expected");
+ luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2,
+ "4D or 5D (batch-mode) tensor expected");
int dimN = 0;
int dimt = 1;
@@ -101,16 +93,18 @@ static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L)
dimw++;
}
- luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2, "input image smaller than kernel size");
+ luaL_argcheck(L, input->size[dimw] >= kW &&
+ input->size[dimh] >= kH && input->size[dimt] >= kT, 2,
+ "input image smaller than kernel size");
/* sizes */
nslices = input->size[dimN];
- itime = input->size[dimt];
+ itime = input->size[dimt];
iheight = input->size[dimh];
- iwidth = input->size[dimw];
- otime = (itime - kT) / dT + 1;
+ iwidth = input->size[dimw];
+ otime = (itime - kT) / dT + 1;
oheight = (iheight - kH) / dH + 1;
- owidth = (iwidth - kW) / dW + 1;
+ owidth = (iwidth - kW) / dW + 1;
/* get contiguous input */
input = THTensor_(newContiguous)(input);
@@ -118,61 +112,46 @@ static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L)
if (input->nDimension == 4) { /* non-batch mode */
/* resize output */
THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
- /* indices will contain ti,i,j locations for each output point */
- THTensor_(resize5d)(indices, 3, nslices, otime, oheight, owidth);
-
+ /* indices will contain ti,i,j uchar locations packed into float/double */
+ THTensor_(resize4d)(indices, nslices, otime, oheight, owidth);
+
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THTensor_(data)(indices);
-
+
nn_(VolumetricMaxPooling_updateOutput_frame)(input_data, output_data,
- indices_data+nslices*otime*owidth*oheight*2,
- indices_data+nslices*otime*owidth*oheight,
- indices_data,
- nslices,
- itime, iwidth, iheight,
- otime, owidth, oheight,
- kT, kW, kH, dT, dW, dH);
- }
- else { /* batch mode */
+ indices_data,
+ nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ kT, kW, kH, dT, dW, dH);
+ } else { /* batch mode */
long p;
long nBatch = input->size[0];
- long istride = nslices*itime*iwidth*iheight;
- long ostride = nslices*otime*owidth*oheight;
+ long istride = nslices * itime * iwidth * iheight;
+ long ostride = nslices * otime * owidth * oheight;
/* resize output */
THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);
/* indices will contain ti,i,j locations for each output point */
-
- THLongStorage* size = THLongStorage_newWithSize(6);
- size->data[0] = 3; size->data[1] = nBatch;
- size->data[2] = nslices; size->data[3] = otime;
- size->data[4] = oheight; size->data[5] = owidth;
- THTensor_(resize)(indices, size, NULL); /* resize6d not available */
- //TODO: Replace with resize6d when available
- //THTensor_(resize6d)(indices, 3, nBatch, nslices, otime, oheight, owidth);
+ THTensor_(resize5d)(indices, nBatch, nslices, otime, oheight, owidth);
input_data = THTensor_(data)(input);
output_data = THTensor_(data)(output);
indices_data = THTensor_(data)(indices);
#pragma omp parallel for private(p)
- for (p=0; p < nBatch; p++)
- {
+ for (p=0; p < nBatch; p++) {
nn_(VolumetricMaxPooling_updateOutput_frame)(
- input_data+p*istride,
- output_data+p*ostride,
- indices_data+(p+nBatch+nBatch)*ostride,
- indices_data+(p+nBatch)*ostride,
- indices_data+p*ostride,
- nslices,
- itime, iwidth, iheight,
- otime, owidth, oheight,
- kT, kW, kH, dT, dW, dH);
+ input_data + p * istride,
+ output_data + p * ostride,
+ indices_data + p * ostride,
+ nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ kT, kW, kH, dT, dW, dH);
}
-
- THLongStorage_free(size);
}
/* cleanup */
@@ -180,39 +159,34 @@ static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L)
return 1;
}
-static void nn_(VolumetricMaxPooling_updateGradInput_frame)(real *gradInput_p, real *gradOutput_p,
- real *indx_p, real *indy_p, real *indz_p,
- long nslices,
- long itime, long iwidth, long iheight,
- long otime, long owidth, long oheight,
- int dT, int dW, int dH)
-{
+static void nn_(VolumetricMaxPooling_updateGradInput_frame)(
+ real *gradInput_p, real *gradOutput_p, real *indz_p,
+ long nslices,
+ long itime, long iwidth, long iheight,
+ long otime, long owidth, long oheight,
+ int dT, int dW, int dH) {
long k;
#pragma omp parallel for private(k)
- for (k = 0; k < nslices; k++)
- {
- real *gradInput_p_k = gradInput_p + k*itime*iwidth*iheight;
- real *gradOutput_p_k = gradOutput_p + k*otime*owidth*oheight;
- real *indx_p_k = indx_p + k*otime*owidth*oheight;
- real *indy_p_k = indy_p + k*otime*owidth*oheight;
- real *indz_p_k = indz_p + k*otime*owidth*oheight;
+ for (k = 0; k < nslices; k++) {
+ real *gradInput_p_k = gradInput_p + k * itime * iwidth * iheight;
+ real *gradOutput_p_k = gradOutput_p + k * otime * owidth * oheight;
+ real *indz_p_k = indz_p + k * otime * owidth * oheight;
/* calculate max points */
long ti, i, j;
- for(ti = 0; ti < otime; ti++)
- {
- for(i = 0; i < oheight; i++)
- {
- for(j = 0; j < owidth; j++)
- {
- /* retrieve position of max */
- long maxti = indz_p_k[ti*oheight*owidth + i*owidth + j] - 1 + ti*dT;
- long maxi = indy_p_k[ti*oheight*owidth + i*owidth + j] - 1 + i*dH;
- long maxj = indx_p_k[ti*oheight*owidth + i*owidth + j] - 1 + j*dW;
-
- /* update gradient */
- gradInput_p_k[maxti*iheight*iwidth + maxi*iwidth + maxj] += gradOutput_p_k[ti*oheight*owidth + i*owidth + j];
- }
+ for(ti = 0; ti < otime; ti++) {
+ for(i = 0; i < oheight; i++) {
+ for(j = 0; j < owidth; j++) {
+ /* retrieve position of max */
+ real * indzp = &indz_p_k[ti * oheight * owidth + i * owidth + j];
+ long maxti = ((unsigned char*)(indzp))[0] + ti * dT;
+ long maxi = ((unsigned char*)(indzp))[1] + i * dH;
+ long maxj = ((unsigned char*)(indzp))[2] + j * dW;
+
+ /* update gradient */
+ gradInput_p_k[maxti * iheight * iwidth + maxi * iwidth + maxj] +=
+ gradOutput_p_k[ti * oheight * owidth + i * owidth + j];
+ }
}
}
}
@@ -274,36 +248,31 @@ static int nn_(VolumetricMaxPooling_updateGradInput)(lua_State *L)
/* backprop */
if (input->nDimension == 4) { /* non-batch mode*/
-
- nn_(VolumetricMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
- indices_data+nslices*otime*owidth*oheight*2,
- indices_data+nslices*otime*owidth*oheight,
- indices_data,
- nslices,
- itime, iwidth, iheight,
- otime, owidth, oheight,
- dT, dW, dH);
+ nn_(VolumetricMaxPooling_updateGradInput_frame)(
+ gradInput_data, gradOutput_data,
+ indices_data,
+ nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ dT, dW, dH);
}
else { /* batch mode */
long p;
long nBatch = input->size[0];
- long istride = nslices*itime*iwidth*iheight;
- long ostride = nslices*otime*owidth*oheight;
-
+ long istride = nslices * itime * iwidth * iheight;
+ long ostride = nslices * otime * owidth * oheight;
+
#pragma omp parallel for private(p)
- for (p = 0; p < nBatch; p++)
- {
+ for (p = 0; p < nBatch; p++) {
nn_(VolumetricMaxPooling_updateGradInput_frame)(
- gradInput_data+p*istride,
- gradOutput_data+p*ostride,
- indices_data+(p+nBatch+nBatch)*ostride,
- indices_data+(p+nBatch)*ostride,
- indices_data+p*ostride,
- nslices,
- itime, iwidth, iheight,
- otime, owidth, oheight,
- dT, dW, dH);
+ gradInput_data + p * istride,
+ gradOutput_data + p * ostride,
+ indices_data + p * ostride,
+ nslices,
+ itime, iwidth, iheight,
+ otime, owidth, oheight,
+ dT, dW, dH);
}
}
diff --git a/init.c b/init.c
index 3f040f2..ef834c8 100644
--- a/init.c
+++ b/init.c
@@ -110,6 +110,9 @@
#include "generic/VolumetricMaxPooling.c"
#include "THGenerateFloatTypes.h"
+#include "generic/VolumetricAveragePooling.c"
+#include "THGenerateFloatTypes.h"
+
#include "generic/MultiMarginCriterion.c"
#include "THGenerateFloatTypes.h"
@@ -165,6 +168,7 @@ int luaopen_libnn(lua_State *L)
nn_FloatSpatialAdaptiveMaxPooling_init(L);
nn_FloatVolumetricConvolution_init(L);
nn_FloatVolumetricMaxPooling_init(L);
+ nn_FloatVolumetricAveragePooling_init(L);
nn_FloatMultiMarginCriterion_init(L);
nn_FloatMultiLabelMarginCriterion_init(L);
nn_FloatL1Cost_init(L);
@@ -205,6 +209,7 @@ int luaopen_libnn(lua_State *L)
nn_DoubleSpatialAdaptiveMaxPooling_init(L);
nn_DoubleVolumetricConvolution_init(L);
nn_DoubleVolumetricMaxPooling_init(L);
+ nn_DoubleVolumetricAveragePooling_init(L);
nn_DoubleMultiMarginCriterion_init(L);
nn_DoubleMultiLabelMarginCriterion_init(L);
nn_DoubleL1Cost_init(L);
diff --git a/init.lua b/init.lua
index b1d36db..e6c5827 100644
--- a/init.lua
+++ b/init.lua
@@ -92,6 +92,7 @@ include('SpatialBatchNormalization.lua')
include('VolumetricConvolution.lua')
include('VolumetricMaxPooling.lua')
+include('VolumetricAveragePooling.lua')
include('ParallelTable.lua')
include('ConcatTable.lua')
diff --git a/test.lua b/test.lua
index 94033dd..e60f425 100644
--- a/test.lua
+++ b/test.lua
@@ -1161,7 +1161,7 @@ function nntest.SpatialSubtractiveNormalization_2dkernel()
local ferr,berr = jac.testIO(module,input)
mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-
+
-- test batch mode
local output = module:forward(input):clone()
local gradOutput = output:clone():uniform(0,1)
@@ -1169,22 +1169,22 @@ function nntest.SpatialSubtractiveNormalization_2dkernel()
local batchSize = 4
local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
input2[2]:copy(input)
-
+
local output2 = module:forward(input2)
local gradOutput2 = output2:clone():uniform(0,1)
gradOutput2[2]:copy(gradOutput)
local gradInput2 = module:backward(input2, gradOutput2)
-
+
mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialSubstractiveNormalization 2d forward batch err")
mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialSubstractiveNormalization 2d backward batch err")
-
+
local err = jac.testJacobian(module,input2)
mytester:assertlt(err,precision, 'error on state ')
local ferr,berr = jac.testIO(module,input2)
mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-
+
end
function nntest.SpatialSubtractiveNormalization_1dkernel()
@@ -1201,7 +1201,7 @@ function nntest.SpatialSubtractiveNormalization_1dkernel()
local ferr,berr = jac.testIO(module,input)
mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-
+
-- test batch mode
local output = module:forward(input):clone()
local gradOutput = output:clone():uniform(0,1)
@@ -1209,15 +1209,15 @@ function nntest.SpatialSubtractiveNormalization_1dkernel()
local batchSize = 4
local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
input2[2]:copy(input)
-
+
local output2 = module:forward(input2)
local gradOutput2 = output2:clone():uniform(0,1)
gradOutput2[2]:copy(gradOutput)
local gradInput2 = module:backward(input2, gradOutput2)
-
+
mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialSubstractiveNormalization 1d forward batch err")
mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialSubstractiveNormalization 1d backward batch err")
-
+
local err = jac.testJacobian(module,input2)
mytester:assertlt(err,precision, 'error on state ')
@@ -1240,7 +1240,7 @@ function nntest.SpatialDivisiveNormalization_2dkernel()
local ferr,berr = jac.testIO(module,input)
mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-
+
-- test batch mode
local output = module:forward(input):clone()
local gradOutput = output:clone():uniform(0,1)
@@ -1248,15 +1248,15 @@ function nntest.SpatialDivisiveNormalization_2dkernel()
local batchSize = 4
local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
input2[2]:copy(input)
-
+
local output2 = module:forward(input2)
local gradOutput2 = output2:clone():uniform(0,1)
gradOutput2[2]:copy(gradOutput)
local gradInput2 = module:backward(input2, gradOutput2)
-
+
mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialDivisiveNormalization 2d forward batch err")
mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialDivisiveNormalization 2d backward batch err")
-
+
local err = jac.testJacobian(module,input2)
mytester:assertlt(err,precision, 'error on state ')
@@ -1279,7 +1279,7 @@ function nntest.SpatialDivisiveNormalization_1dkernel()
local ferr,berr = jac.testIO(module,input)
mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-
+
-- test batch mode
local output = module:forward(input):clone()
local gradOutput = output:clone():uniform(0,1)
@@ -1287,15 +1287,15 @@ function nntest.SpatialDivisiveNormalization_1dkernel()
local batchSize = 4
local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
input2[2]:copy(input)
-
+
local output2 = module:forward(input2)
local gradOutput2 = output2:clone():uniform(0,1)
gradOutput2[2]:copy(gradOutput)
local gradInput2 = module:backward(input2, gradOutput2)
-
+
mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialDivisiveNormalization 1d forward batch err")
mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialDivisiveNormalization 1d backward batch err")
-
+
local err = jac.testJacobian(module,input2)
mytester:assertlt(err,precision, 'error on state ')
@@ -1318,7 +1318,7 @@ function nntest.SpatialContrastiveNormalization()
local ferr,berr = jac.testIO(module,input)
mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-
+
-- test batch mode and type
local output = module:forward(input):clone()
local gradOutput = output:clone():uniform(0,1)
@@ -1326,16 +1326,16 @@ function nntest.SpatialContrastiveNormalization()
local batchSize = 4
local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2):float()
input2[2]:copy(input)
-
+
module:float() -- type-cast
local output2 = module:forward(input2)
local gradOutput2 = output2:clone():uniform(0,1)
gradOutput2[2]:copy(gradOutput)
local gradInput2 = module:backward(input2, gradOutput2)
-
+
mytester:assertTensorEq(output2[2], output:float(), 0.000001, "SpatialContrastiveNormalization 2d forward batch err")
mytester:assertTensorEq(gradOutput2[2], gradOutput:float(), 0.000001, "SpatialContrastiveNormalization 2d backward batch err")
-
+
module:double()
input2 = input2:double()
local err = jac.testJacobian(module,input2)
@@ -2044,9 +2044,9 @@ function nntest.SpatialAdaptiveMaxPooling()
local ferr, berr = jac.testIO(module, input)
mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ')
mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ')
-
+
-- non-contiguous
-
+
input = torch.rand(from,ini,inj):transpose(2,3)
module = nn.SpatialAdaptiveMaxPooling(ki,kj)
local inputc = input:contiguous() -- contiguous
@@ -2056,13 +2056,13 @@ function nntest.SpatialAdaptiveMaxPooling()
local gradInput = module:backward(input, output):clone()
local gradInputc = module:backward(inputc, outputc):clone()
mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - non-contiguous err ')
-
+
-- non-contiguous batch
local nbatch = math.random(1,3)
input = torch.rand(nbatch,from,ini,inj):transpose(1,3):transpose(2,4)
local inputc = input:contiguous() -- contiguous
module = nn.SpatialAdaptiveMaxPooling(ki,kj)
-
+
local output = module:forward(input):clone()
local outputc = module:forward(inputc):clone()
mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - batch non-contiguous err ')
@@ -2365,6 +2365,43 @@ function nntest.VolumetricConvolutionBatchCompare()
batchcompare(module,input, {'weight','bias','gradWeight','gradBias'})
end
+function nntest.VolumetricAveragePooling()
+ local from = math.random(2,3)
+ local kt = math.random(3,4)
+ local ki = math.random(3,4)
+ local kj = math.random(3,4)
+ local st = math.random(2,3)
+ local si = math.random(2,3)
+ local sj = math.random(2,3)
+ local outt = math.random(3,4)
+ local outi = math.random(3,4)
+ local outj = math.random(3,4)
+ local int = (outt-1)*st+kt
+ local ini = (outi-1)*si+ki
+ local inj = (outj-1)*sj+kj
+ local module = nn.VolumetricAveragePooling(kt, ki, kj, st, si, sj)
+ local input = torch.Tensor(from, int, inj, ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+ mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+
+ -- batch
+ local nbatch = math.random(2,3)
+ module = nn.VolumetricAveragePooling(kt, ki, kj, st, si, sj)
+ input = torch.Tensor(nbatch, from, int, inj, ini):zero()
+
+ local err = jac.testJacobian(module, input)
+ mytester:assertlt(err, precision, 'error on state (Batch) ')
+
+ local ferr, berr = jac.testIO(module, input)
+ mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ')
+ mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ')
+end
+
function nntest.VolumetricMaxPooling()
local from = math.random(2,3)
local kt = math.random(3,4)
@@ -2740,42 +2777,42 @@ function nntest.AddConstant()
-- Test BPROP
local err = jac.testJacobian(mod, input)
mytester:assertlt(err, precision, 'bprop error ')
-
+
-- inplace comparisons
local ini = math.random(3,5)
local inj = math.random(3,5)
local ink = math.random(3,5)
local constant = torch.uniform()*math.random(1,10)
-
+
local input1 = torch.rand(ink, inj, ini)
local input2 = input1:clone()
-
+
local module1 = nn.AddConstant(constant,true)
local module2 = nn.AddConstant(constant)
-
+
local gradOutput1 = torch.rand(ink, inj, ini)
local gradOutput2 = gradOutput1:clone()
-
+
local out1 = module1:forward(input1)
local out2 = module2:forward(input2)
-
- mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) ..
+
+ mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) ..
' - in-place forward err ')
local gradInput1 = module1:backward(input1, gradOutput1)
local gradInput2 = module2:backward(input2, gradOutput2)
-
- mytester:asserteq(0, (gradInput1-gradInput2):abs():max(),
+
+ mytester:asserteq(0, (gradInput1-gradInput2):abs():max(),
torch.typename(module1) .. ' - in-place backward err ')
-
+
local input1 = torch.rand(ink, inj, ini)
local input2 = input1:clone()
-
+
module1:forward(input1)
module1:backward(module1.output,torch.rand(input1:size()))
-
+
local err = (input1-input2):abs():max()
- mytester:asserteq(err, 0, torch.typename(module1) ..
+ mytester:asserteq(err, 0, torch.typename(module1) ..
' - inplace input change err ')
end
@@ -2797,42 +2834,42 @@ function nntest.MulConstant()
-- Test BPROP
local err = jac.testJacobian(mod, input)
mytester:assertlt(err, precision, 'bprop error ')
-
+
-- inplace comparisons
local ini = math.random(3,5)
local inj = math.random(3,5)
local ink = math.random(3,5)
local constant = torch.uniform()*math.random(1,10)
-
+
local input1 = torch.rand(ink, inj, ini)
local input2 = input1:clone()
-
+
local module1 = nn.MulConstant(constant,true)
local module2 = nn.MulConstant(constant)
-
+
local gradOutput1 = torch.rand(ink, inj, ini)
local gradOutput2 = gradOutput1:clone()
-
+
local out1 = module1:forward(input1)
local out2 = module2:forward(input2)
-
- mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) ..
+
+ mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) ..
' - in-place forward err ')
local gradInput1 = module1:backward(input1, gradOutput1)
local gradInput2 = module2:backward(input2, gradOutput2)
-
- mytester:asserteq(0, (gradInput1-gradInput2):abs():max(),
+
+ mytester:asserteq(0, (gradInput1-gradInput2):abs():max(),
torch.typename(module1) .. ' - in-place backward err ')
-
+
local input1 = torch.rand(ink, inj, ini)
local input2 = input1:clone()
-
+
module1:forward(input1)
module1:backward(module1.output,torch.rand(input1:size()))
-
+
local err = (input1-input2):abs():max()
- mytester:assertalmosteq(err, 0, 1e-15, torch.typename(module1) ..
+ mytester:assertalmosteq(err, 0, 1e-15, torch.typename(module1) ..
' - inplace input change err ')
end
@@ -3563,10 +3600,10 @@ function nntest.Replicate()
mytester:assertTensorEq(vOutput1, expected1, precision, 'Wrong tiling of data when replicating vector.')
mytester:assertTensorEq(vOutput2, expected2, precision, 'Wrong tiling of data when replicating vector.')
-
+
-- batch mode
local vector = torch.rand(4,3)
-
+
local r1 = nn.Replicate(2, 1, 1)
local r2 = nn.Replicate(2, 2, 1)