Volumetric Average Pooling + doc + unit test, better performance for Volumetric Max Pooling

author: soumith <soumith@fb.com> 2015-05-28 07:42:59 +0300
committer: soumith <soumith@fb.com> 2015-05-28 07:42:59 +0300
commit: ec6953806d02ea189cc9fb5ba83ada05eca7d28c (patch)
tree: 8a51527d9e30802b97aeed5999b9939f3f62b0ee
parent: e35f09a1f7ff6a123e841dfae24485999f7ce31d (diff)
7 files changed, 519 insertions, 198 deletions
diff --git a/VolumetricAveragePooling.lua b/VolumetricAveragePooling.lua
new file mode 100644
index 0000000..1fc0ec9
--- /dev/null
+++ b/VolumetricAveragePooling.lua
@@ -0,0 +1,34 @@
+local VolumetricAveragePooling, parent = torch.class(
+   'nn.VolumetricAveragePooling', 'nn.Module')
+
+function VolumetricAveragePooling:__init(kT, kW, kH, dT, dW, dH)
+   parent.__init(self)
+
+   dT = dT or kT
+   dW = dW or kW
+   dH = dH or kH
+
+   self.kT = kT
+   self.kH = kH
+   self.kW = kW
+   self.dT = dT
+   self.dW = dW
+   self.dH = dH
+end
+
+function VolumetricAveragePooling:updateOutput(input)
+   input.nn.VolumetricAveragePooling_updateOutput(self, input)
+   return self.output
+end
+
+function VolumetricAveragePooling:updateGradInput(input, gradOutput)
+   input.nn.VolumetricAveragePooling_updateGradInput(self, input, gradOutput)
+   return self.gradInput
+end
+
+function VolumetricAveragePooling:empty()
+   self.gradInput:resize()
+   self.gradInput:storage():resize(0)
+   self.output:resize()
+   self.output:storage():resize(0)
+end
diff --git a/doc/convolution.md b/doc/convolution.md
index f2380f2..d0a02fa 100755
--- a/doc/convolution.md
+++ b/doc/convolution.md
@@ -23,6 +23,7 @@ a kernel for computing the weighted average in a neighborhood ;
  * [Volumetric Modules](#nn.VolumetricModules) apply to inputs with three-dimensional relationships (e.g. videos) :
    * [VolumetricConvolution](#nn.VolumetricConvolution) : a 3D convolution over an input video (a sequence of images) ;
    * [VolumetricMaxPooling](#nn.VolumetricMaxPooling) : a 3D max-pooling operation over an input video.
+   * [VolumetricAveragePooling](#nn.VolumetricAveragePooling) : a 3D average-pooling operation over an input video.
 
 <a name="nn.TemporalModules"/>
 ## Temporal Modules ##
@@ -605,3 +606,14 @@ module = nn.VolumetricMaxPooling(kT, kW, kH [, dT, dW, dH])
 Applies 3D max-pooling operation in `kTxkWxkH` regions by step size
 `dTxdWxdH` steps. The number of output features is equal to the number of
 input planes / dT.
+
+<a name="nn.VolumetricAveragePooling"/>
+### VolumetricAveragePooling ###
+
+```lua
+module = nn.VolumetricAveragePooling(kT, kW, kH [, dT, dW, dH])
+```
+
+Applies 3D average-pooling operation in `kTxkWxkH` regions by step size
+`dTxdWxdH` steps. The number of output features is equal to the number of
+input planes / dT.
diff --git a/generic/VolumetricAveragePooling.c b/generic/VolumetricAveragePooling.c
new file mode 100644
index 0000000..28bd0b0
--- /dev/null
+++ b/generic/VolumetricAveragePooling.c
@@ -0,0 +1,263 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/VolumetricAveragePooling.c"
+#else
+
+static void nn_(VolumetricAveragePooling_updateOutput_frame)(
+  real *input_p, real *output_p, long nslices,
+  long itime, long iwidth, long iheight,
+  long otime, long owidth, long oheight,
+  int kT, int kW, int kH, int dT, int dW, int dH) {
+  long k;
+#pragma omp parallel for private(k)
+  for (k = 0; k < nslices; k++)  {
+    /* loop over output */
+    long i, j, ti;
+    for(ti = 0; ti < otime; ti++) {
+      for(i = 0; i < oheight; i++) {
+        for(j = 0; j < owidth; j++) {
+          /* local pointers */
+          real *ip = input_p + k * itime * iwidth * iheight
+            + ti * iwidth * iheight * dT +  i * iwidth * dH + j * dW;
+          real *op = output_p + k * otime * owidth * oheight
+            + ti * owidth * oheight + i * owidth + j;
+
+          /* compute local sum: */
+          real sum = 0.0;
+          int x,y,z;
+
+          for(z=0; z < kT; z++) {
+            for(y = 0; y < kH; y++) {
+              for(x = 0; x < kW; x++) {
+                sum +=  *(ip + z * iwidth * iheight + y * iwidth + x);
+              }
+            }
+          }
+
+          /* set output to local max */
+          *op = sum / (kT * kW * kH);
+        }
+      }
+    }
+  }
+}
+
+static int nn_(VolumetricAveragePooling_updateOutput)(lua_State *L) {
+  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+  int kT = luaT_getfieldcheckint(L, 1, "kT");
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  int dT = luaT_getfieldcheckint(L, 1, "dT");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
+  long nslices;
+  long itime;
+  long iheight;
+  long iwidth;
+  long otime;
+  long oheight;
+  long owidth;
+  real *input_data;
+  real *output_data;
+
+  luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2,
+                "4D or 5D (batch-mode) tensor expected");
+
+  int dimN = 0;
+  int dimt = 1;
+  int dimh = 2;
+  int dimw = 3;
+
+  if (input->nDimension == 5) {
+    dimN++;
+    dimt++;
+    dimh++;
+    dimw++;
+  }
+
+  luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH &&
+                input->size[dimt] >= kT, 2,
+                "input image smaller than kernel size");
+
+  /* sizes */
+  nslices = input->size[dimN];
+  itime   = input->size[dimt];
+  iheight = input->size[dimh];
+  iwidth  = input->size[dimw];
+  otime   = (itime   - kT) / dT + 1;
+  oheight = (iheight - kH) / dH + 1;
+  owidth  = (iwidth  - kW) / dW + 1;
+
+  /* get contiguous input */
+  input = THTensor_(newContiguous)(input);
+
+  if (input->nDimension == 4) { /* non-batch mode */
+    /* resize output */
+    THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
+
+    input_data = THTensor_(data)(input);
+    output_data = THTensor_(data)(output);
+
+    nn_(VolumetricAveragePooling_updateOutput_frame)(input_data, output_data,
+                                                     nslices,
+                                                     itime, iwidth, iheight,
+                                                     otime, owidth, oheight,
+                                                     kT, kW, kH, dT, dW, dH);
+  } else { /* batch mode */
+    long p;
+    long nBatch = input->size[0];
+
+    long istride = nslices * itime * iwidth * iheight;
+    long ostride = nslices * otime * owidth * oheight;
+
+    /* resize output */
+    THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);
+
+    input_data = THTensor_(data)(input);
+    output_data = THTensor_(data)(output);
+
+#pragma omp parallel for private(p)
+    for (p=0; p < nBatch; p++) {
+      nn_(VolumetricAveragePooling_updateOutput_frame)(
+        input_data + p * istride, output_data + p * ostride,
+        nslices, itime, iwidth, iheight, otime, owidth, oheight,
+        kT, kW, kH, dT, dW, dH);
+    }
+  }
+
+  /* cleanup */
+  THTensor_(free)(input);
+  return 1;
+}
+
+static void nn_(VolumetricAveragePooling_updateGradInput_frame)(
+  real *gradInput_p, real *gradOutput_p, long nslices,
+  long itime, long iwidth, long iheight,
+  long otime, long owidth, long oheight,
+  int kT, int kW, int kH, int dT, int dW, int dH) {
+  long k;
+#pragma omp parallel for private(k)
+  for (k = 0; k < nslices; k++)  {
+    /* loop over output */
+    long i, j, ti;
+    for(ti = 0; ti < otime; ti++) {
+      for(i = 0; i < oheight; i++) {
+        for(j = 0; j < owidth; j++) {
+          /* local pointers */
+          real *ip = gradInput_p + k * itime * iwidth * iheight
+            + ti * iwidth * iheight * dT +  i * iwidth * dH + j * dW;
+          real *op = gradOutput_p + k * otime * owidth * oheight
+            + ti * owidth * oheight + i * owidth + j;
+
+          /* scatter gradients out to footprint: */
+          real val  = *op / (kT * kW * kH);
+          int x,y,z;
+          for(z=0; z < kT; z++) {
+            for(y = 0; y < kH; y++) {
+              for(x = 0; x < kW; x++) {
+                *(ip + z * iwidth * iheight + y * iwidth + x) += val;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+static int nn_(VolumetricAveragePooling_updateGradInput)(lua_State *L) {
+  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
+  int dT = luaT_getfieldcheckint(L, 1, "dT");
+  int dW = luaT_getfieldcheckint(L, 1, "dW");
+  int dH = luaT_getfieldcheckint(L, 1, "dH");
+  int kT = luaT_getfieldcheckint(L, 1, "kT");
+  int kW = luaT_getfieldcheckint(L, 1, "kW");
+  int kH = luaT_getfieldcheckint(L, 1, "kH");
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput",
+                                                torch_Tensor);
+  int nslices;
+  int itime;
+  int iheight;
+  int iwidth;
+  int otime;
+  int oheight;
+  int owidth;
+  real *gradInput_data;
+  real *gradOutput_data;
+  real *indices_data;
+
+  int dimN = 0;
+  int dimt = 1;
+  int dimh = 2;
+  int dimw = 3;
+
+  /* get contiguous gradOutput */
+  gradOutput = THTensor_(newContiguous)(gradOutput);
+
+  /* resize */
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+
+  if (input->nDimension == 5) {
+    dimN++;
+    dimt++;
+    dimh++;
+    dimw++;
+  }
+
+  /* sizes */
+  nslices = input->size[dimN];
+  itime = input->size[dimt];
+  iheight = input->size[dimh];
+  iwidth = input->size[dimw];
+  otime = gradOutput->size[dimt];
+  oheight = gradOutput->size[dimh];
+  owidth = gradOutput->size[dimw];
+
+  /* get raw pointers */
+  gradInput_data = THTensor_(data)(gradInput);
+  gradOutput_data = THTensor_(data)(gradOutput);
+
+  /* backprop */
+  if (input->nDimension == 4) { /* non-batch mode*/
+    nn_(VolumetricAveragePooling_updateGradInput_frame)(
+      gradInput_data, gradOutput_data, nslices,
+      itime, iwidth, iheight, otime, owidth, oheight,
+      kT, kW, kH, dT, dW, dH);
+  } else { /* batch mode */
+    long p;
+    long nBatch = input->size[0];
+
+    long istride = nslices * itime * iwidth * iheight;
+    long ostride = nslices * otime * owidth * oheight;
+
+#pragma omp parallel for private(p)
+    for (p = 0; p < nBatch; p++) {
+      nn_(VolumetricAveragePooling_updateGradInput_frame)(
+        gradInput_data  + p * istride, gradOutput_data + p * ostride, nslices,
+        itime, iwidth, iheight, otime, owidth, oheight,
+        kT, kW, kH, dT, dW, dH);
+    }
+  }
+
+  /* cleanup */
+  THTensor_(free)(gradOutput);
+  return 1;
+}
+
+static const struct luaL_Reg nn_(VolumetricAveragePooling__) [] = {
+  {"VolumetricAveragePooling_updateOutput",
+   nn_(VolumetricAveragePooling_updateOutput)},
+  {"VolumetricAveragePooling_updateGradInput",
+   nn_(VolumetricAveragePooling_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(VolumetricAveragePooling_init)(lua_State *L) {
+  luaT_pushmetatable(L, torch_Tensor);
+  luaT_registeratname(L, nn_(VolumetricAveragePooling__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
diff --git a/generic/VolumetricMaxPooling.c b/generic/VolumetricMaxPooling.c
index 28fd5fe..4114889 100644
--- a/generic/VolumetricMaxPooling.c
+++ b/generic/VolumetricMaxPooling.c
@@ -2,64 +2,55 @@
 #define TH_GENERIC_FILE "generic/VolumetricMaxPooling.c"
 #else
 
-static void nn_(VolumetricMaxPooling_updateOutput_frame)(real *input_p, real *output_p,
-							 real *indx_p, real *indy_p, real *indz_p,
-							 long nslices,
-							 long itime, long iwidth, long iheight,
-							 long otime, long owidth, long oheight,
-							 int kT, int kW, int kH, int dT, int dW, int dH)
-{
+static void nn_(VolumetricMaxPooling_updateOutput_frame)(
+  real *input_p, real *output_p, real *indz_p,
+  long nslices, long itime, long iwidth, long iheight,
+  long otime, long owidth, long oheight,
+  int kT, int kW, int kH, int dT, int dW, int dH) {
   long k;
 #pragma omp parallel for private(k)
   for (k = 0; k < nslices; k++)
   {
     /* loop over output */
     long i, j, ti;
-    for(ti = 0; ti < otime; ti++)
-    {
-      for(i = 0; i < oheight; i++)
-      {
-	for(j = 0; j < owidth; j++)
-	{
-	  /* local pointers */
-	  real *ip = input_p   + k*itime*iwidth*iheight + ti*iwidth*iheight*dT +  i*iwidth*dH + j*dW;
-	  real *op = output_p  + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j;
-	  real *indzp = indz_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j;
-	  real *indyp = indy_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j;
-	  real *indxp = indx_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j;
-	  
-	  /* compute local max: */
-	  real maxval = -THInf;
-	  int x,y,z;
-
-	  *indzp = -1;
-	  *indyp = -1;
-	  *indxp = -1;
-	  for(z=0; z < kT; z++)
-	  {
-	    for(y = 0; y < kH; y++)
-	    {
-	      for(x = 0; x < kW; x++)
-	      {
-		real val = *(ip + z*iwidth*iheight + y*iwidth + x);
-		if (val > maxval)
-		{
-		  maxval = val;
-		  *indzp = z+1;
-		  *indyp = y+1;
-		  *indxp = x+1;
-		}
-	      }
-	    }
-	  }
-
-	  /* set output to local max */
-	  *op = maxval;
-	  
-	  /* store location of max (x,y) */
-	  /**indyp = (int)(maxindex / kW)+1;*/
-	  /**indxp = (maxindex % kW) +1;*/
-	}
+    for(ti = 0; ti < otime; ti++) {
+      for(i = 0; i < oheight; i++) {
+        for(j = 0; j < owidth; j++) {
+          /* local pointers */
+          real *ip = input_p + k * itime * iwidth * iheight
+            + ti * iwidth * iheight * dT + i * iwidth * dH + j * dW;
+          real *op = output_p + k * otime * owidth * oheight
+            + ti * owidth * oheight + i * owidth + j;
+          real *indzp = indz_p + k * otime * owidth * oheight
+            + ti * owidth * oheight + i * owidth + j;
+
+          /* compute local max: */
+          real maxval = -THInf;
+          int x,y,z;
+          int mx, my, mz;
+
+          for(z = 0; z < kT; z++) {
+            for(y = 0; y < kH; y++) {
+              for(x = 0; x < kW; x++) {
+                real val = *(ip + z * iwidth * iheight + y * iwidth + x);
+                if (val > maxval) {
+                  maxval = val;
+                  mz = z;
+                  my = y;
+                  mx = x;
+                }
+              }
+            }
+          }
+
+          // set max values
+          ((unsigned char*)(indzp))[0] = mz;
+          ((unsigned char*)(indzp))[1] = my;
+          ((unsigned char*)(indzp))[2] = mx;
+          ((unsigned char*)(indzp))[3] = 0;
+          /* set output to local max */
+          *op = maxval;
+        }
       }
     }
   }
@@ -87,7 +78,8 @@ static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L)
   real *output_data;
   real *indices_data;
 
-  luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch-mode) tensor expected");
+  luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2,
+                "4D or 5D (batch-mode) tensor expected");
 
   int dimN = 0;
   int dimt = 1;
@@ -101,16 +93,18 @@ static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L)
     dimw++;
   }
 
-  luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2, "input image smaller than kernel size");
+  luaL_argcheck(L, input->size[dimw] >= kW &&
+                input->size[dimh] >= kH && input->size[dimt] >= kT, 2,
+                "input image smaller than kernel size");
 
   /* sizes */
   nslices = input->size[dimN];
-  itime = input->size[dimt];
+  itime   = input->size[dimt];
   iheight = input->size[dimh];
-  iwidth = input->size[dimw];
-  otime = (itime - kT) / dT + 1;
+  iwidth  = input->size[dimw];
+  otime   = (itime   - kT) / dT + 1;
   oheight = (iheight - kH) / dH + 1;
-  owidth = (iwidth - kW) / dW + 1;
+  owidth  = (iwidth  - kW) / dW + 1;
 
   /* get contiguous input */
   input = THTensor_(newContiguous)(input);
@@ -118,61 +112,46 @@ static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L)
   if (input->nDimension == 4) { /* non-batch mode */
     /* resize output */
     THTensor_(resize4d)(output, nslices, otime, oheight, owidth);
-    /* indices will contain ti,i,j locations for each output point */
-    THTensor_(resize5d)(indices, 3, nslices, otime, oheight, owidth);
-    
+    /* indices will contain ti,i,j uchar locations packed into float/double */
+    THTensor_(resize4d)(indices, nslices, otime, oheight, owidth);
+
     input_data = THTensor_(data)(input);
     output_data = THTensor_(data)(output);
     indices_data = THTensor_(data)(indices);
-    
+
     nn_(VolumetricMaxPooling_updateOutput_frame)(input_data, output_data,
-  					       indices_data+nslices*otime*owidth*oheight*2, 
-  					       indices_data+nslices*otime*owidth*oheight, 
-  					       indices_data,
-  					       nslices,
-  					       itime, iwidth, iheight,
-  					       otime, owidth, oheight,
-  					       kT, kW, kH, dT, dW, dH);
-  }
-  else { /* batch mode */
+                                                 indices_data,
+                                                 nslices,
+                                                 itime, iwidth, iheight,
+                                                 otime, owidth, oheight,
+                                                 kT, kW, kH, dT, dW, dH);
+  } else { /* batch mode */
     long p;
     long nBatch = input->size[0];
 
-    long istride = nslices*itime*iwidth*iheight;
-    long ostride = nslices*otime*owidth*oheight;
+    long istride = nslices * itime * iwidth * iheight;
+    long ostride = nslices * otime * owidth * oheight;
 
     /* resize output */
     THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth);
     /* indices will contain ti,i,j locations for each output point */
-
-    THLongStorage* size = THLongStorage_newWithSize(6);
-    size->data[0] = 3; size->data[1] = nBatch;
-    size->data[2] = nslices; size->data[3] = otime; 
-    size->data[4] = oheight; size->data[5] = owidth;
-    THTensor_(resize)(indices, size, NULL); /* resize6d not available */
-    //TODO: Replace with resize6d when available
-    //THTensor_(resize6d)(indices, 3, nBatch, nslices, otime, oheight, owidth);
+    THTensor_(resize5d)(indices, nBatch, nslices, otime, oheight, owidth);
 
     input_data = THTensor_(data)(input);
     output_data = THTensor_(data)(output);
     indices_data = THTensor_(data)(indices);
 
 #pragma omp parallel for private(p)
-    for (p=0; p < nBatch; p++) 
-    {
+    for (p=0; p < nBatch; p++) {
       nn_(VolumetricMaxPooling_updateOutput_frame)(
-                    input_data+p*istride, 
-                    output_data+p*ostride,
-                    indices_data+(p+nBatch+nBatch)*ostride, 
-                    indices_data+(p+nBatch)*ostride, 
-                    indices_data+p*ostride,
-                    nslices,
-                    itime, iwidth, iheight,
-                    otime, owidth, oheight,
-                    kT, kW, kH, dT, dW, dH);
+        input_data   + p * istride,
+        output_data  + p * ostride,
+        indices_data + p * ostride,
+        nslices,
+        itime, iwidth, iheight,
+        otime, owidth, oheight,
+        kT, kW, kH, dT, dW, dH);
     }
-
-    THLongStorage_free(size);
   }
 
   /* cleanup */
@@ -180,39 +159,34 @@ static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L)
   return 1;
 }
 
-static void nn_(VolumetricMaxPooling_updateGradInput_frame)(real *gradInput_p, real *gradOutput_p,
-							    real *indx_p, real *indy_p, real *indz_p,
-							    long nslices,
-							    long itime, long iwidth, long iheight,
-							    long otime, long owidth, long oheight,
-							    int dT, int dW, int dH)
-{
+static void nn_(VolumetricMaxPooling_updateGradInput_frame)(
+  real *gradInput_p, real *gradOutput_p, real *indz_p,
+  long nslices,
+  long itime, long iwidth, long iheight,
+  long otime, long owidth, long oheight,
+  int dT, int dW, int dH) {
   long k;
 #pragma omp parallel for private(k)
-  for (k = 0; k < nslices; k++)
-  {
-    real *gradInput_p_k = gradInput_p + k*itime*iwidth*iheight;
-    real *gradOutput_p_k = gradOutput_p + k*otime*owidth*oheight;
-    real *indx_p_k = indx_p + k*otime*owidth*oheight;
-    real *indy_p_k = indy_p + k*otime*owidth*oheight;
-    real *indz_p_k = indz_p + k*otime*owidth*oheight;
+  for (k = 0; k < nslices; k++) {
+    real *gradInput_p_k  = gradInput_p  + k * itime * iwidth * iheight;
+    real *gradOutput_p_k = gradOutput_p + k * otime * owidth * oheight;
+    real *indz_p_k = indz_p + k * otime * owidth * oheight;
 
     /* calculate max points */
     long ti, i, j;
-    for(ti = 0; ti < otime; ti++)
-    {
-      for(i = 0; i < oheight; i++)
-      {
-	for(j = 0; j < owidth; j++)
-	{
-	  /* retrieve position of max */
-	  long maxti = indz_p_k[ti*oheight*owidth + i*owidth + j] - 1 + ti*dT;
-	  long maxi  = indy_p_k[ti*oheight*owidth + i*owidth + j] - 1 + i*dH;
-	  long maxj  = indx_p_k[ti*oheight*owidth + i*owidth + j] - 1 + j*dW;
-	  
-	  /* update gradient */
-	  gradInput_p_k[maxti*iheight*iwidth + maxi*iwidth + maxj] += gradOutput_p_k[ti*oheight*owidth + i*owidth + j];
-	}
+    for(ti = 0; ti < otime; ti++) {
+      for(i = 0; i < oheight; i++) {
+        for(j = 0; j < owidth; j++) {
+          /* retrieve position of max */
+          real * indzp = &indz_p_k[ti * oheight * owidth + i * owidth + j];
+          long maxti = ((unsigned char*)(indzp))[0] + ti * dT;
+          long maxi  = ((unsigned char*)(indzp))[1] + i * dH;
+          long maxj  = ((unsigned char*)(indzp))[2] + j * dW;
+
+          /* update gradient */
+          gradInput_p_k[maxti * iheight * iwidth + maxi * iwidth + maxj] +=
+            gradOutput_p_k[ti * oheight * owidth + i * owidth + j];
+        }
       }
     }
   }
@@ -274,36 +248,31 @@ static int nn_(VolumetricMaxPooling_updateGradInput)(lua_State *L)
 
   /* backprop */
   if (input->nDimension == 4) { /* non-batch mode*/
-
-    nn_(VolumetricMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
-  						  indices_data+nslices*otime*owidth*oheight*2, 
-  						  indices_data+nslices*otime*owidth*oheight, 
-  						  indices_data,
-  						  nslices,
-  						  itime, iwidth, iheight,
-  						  otime, owidth, oheight,
-  						  dT, dW, dH);
+    nn_(VolumetricMaxPooling_updateGradInput_frame)(
+      gradInput_data, gradOutput_data,
+      indices_data,
+      nslices,
+      itime, iwidth, iheight,
+      otime, owidth, oheight,
+      dT, dW, dH);
   }
   else { /* batch mode */
     long p;
     long nBatch = input->size[0];
 
-    long istride = nslices*itime*iwidth*iheight;
-    long ostride = nslices*otime*owidth*oheight;
-    
+    long istride = nslices * itime * iwidth * iheight;
+    long ostride = nslices * otime * owidth * oheight;
+
 #pragma omp parallel for private(p)
-    for (p = 0; p < nBatch; p++)
-    {
+    for (p = 0; p < nBatch; p++) {
       nn_(VolumetricMaxPooling_updateGradInput_frame)(
-                gradInput_data+p*istride, 
-                gradOutput_data+p*ostride,
-                indices_data+(p+nBatch+nBatch)*ostride, 
-                indices_data+(p+nBatch)*ostride,
-                indices_data+p*ostride,
-                nslices,
-                itime, iwidth, iheight,
-                otime, owidth, oheight,
-                dT, dW, dH);
+        gradInput_data + p * istride,
+        gradOutput_data + p * ostride,
+        indices_data + p * ostride,
+        nslices,
+        itime, iwidth, iheight,
+        otime, owidth, oheight,
+        dT, dW, dH);
     }
   }
 
diff --git a/init.c b/init.c
index 3f040f2..ef834c8 100644
--- a/init.c
+++ b/init.c
@@ -110,6 +110,9 @@
 #include "generic/VolumetricMaxPooling.c"
 #include "THGenerateFloatTypes.h"
 
+#include "generic/VolumetricAveragePooling.c"
+#include "THGenerateFloatTypes.h"
+
 #include "generic/MultiMarginCriterion.c"
 #include "THGenerateFloatTypes.h"
 
@@ -165,6 +168,7 @@ int luaopen_libnn(lua_State *L)
   nn_FloatSpatialAdaptiveMaxPooling_init(L);
   nn_FloatVolumetricConvolution_init(L);
   nn_FloatVolumetricMaxPooling_init(L);
+  nn_FloatVolumetricAveragePooling_init(L);
   nn_FloatMultiMarginCriterion_init(L);
   nn_FloatMultiLabelMarginCriterion_init(L);
   nn_FloatL1Cost_init(L);
@@ -205,6 +209,7 @@ int luaopen_libnn(lua_State *L)
   nn_DoubleSpatialAdaptiveMaxPooling_init(L);
   nn_DoubleVolumetricConvolution_init(L);
   nn_DoubleVolumetricMaxPooling_init(L);
+  nn_DoubleVolumetricAveragePooling_init(L);
   nn_DoubleMultiMarginCriterion_init(L);
   nn_DoubleMultiLabelMarginCriterion_init(L);
   nn_DoubleL1Cost_init(L);
diff --git a/init.lua b/init.lua
index b1d36db..e6c5827 100644
--- a/init.lua
+++ b/init.lua
@@ -92,6 +92,7 @@ include('SpatialBatchNormalization.lua')
 
 include('VolumetricConvolution.lua')
 include('VolumetricMaxPooling.lua')
+include('VolumetricAveragePooling.lua')
 
 include('ParallelTable.lua')
 include('ConcatTable.lua')
diff --git a/test.lua b/test.lua
index 94033dd..e60f425 100644
--- a/test.lua
+++ b/test.lua
@@ -1161,7 +1161,7 @@ function nntest.SpatialSubtractiveNormalization_2dkernel()
    local ferr,berr = jac.testIO(module,input)
    mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
    mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-   
+
     -- test batch mode
    local output = module:forward(input):clone()
    local gradOutput = output:clone():uniform(0,1)
@@ -1169,22 +1169,22 @@ function nntest.SpatialSubtractiveNormalization_2dkernel()
    local batchSize = 4
    local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
    input2[2]:copy(input)
-   
+
    local output2 = module:forward(input2)
    local gradOutput2 = output2:clone():uniform(0,1)
    gradOutput2[2]:copy(gradOutput)
    local gradInput2 = module:backward(input2, gradOutput2)
-   
+
    mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialSubstractiveNormalization 2d forward batch err")
    mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialSubstractiveNormalization 2d backward batch err")
-   
+
    local err = jac.testJacobian(module,input2)
    mytester:assertlt(err,precision, 'error on state ')
 
    local ferr,berr = jac.testIO(module,input2)
    mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
    mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-   
+
 end
 
 function nntest.SpatialSubtractiveNormalization_1dkernel()
@@ -1201,7 +1201,7 @@ function nntest.SpatialSubtractiveNormalization_1dkernel()
    local ferr,berr = jac.testIO(module,input)
    mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
    mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-   
+
     -- test batch mode
    local output = module:forward(input):clone()
    local gradOutput = output:clone():uniform(0,1)
@@ -1209,15 +1209,15 @@ function nntest.SpatialSubtractiveNormalization_1dkernel()
    local batchSize = 4
    local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
    input2[2]:copy(input)
-   
+
    local output2 = module:forward(input2)
    local gradOutput2 = output2:clone():uniform(0,1)
    gradOutput2[2]:copy(gradOutput)
    local gradInput2 = module:backward(input2, gradOutput2)
-   
+
    mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialSubstractiveNormalization 1d forward batch err")
    mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialSubstractiveNormalization 1d backward batch err")
-   
+
    local err = jac.testJacobian(module,input2)
    mytester:assertlt(err,precision, 'error on state ')
 
@@ -1240,7 +1240,7 @@ function nntest.SpatialDivisiveNormalization_2dkernel()
    local ferr,berr = jac.testIO(module,input)
    mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
    mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-   
+
    -- test batch mode
    local output = module:forward(input):clone()
    local gradOutput = output:clone():uniform(0,1)
@@ -1248,15 +1248,15 @@ function nntest.SpatialDivisiveNormalization_2dkernel()
    local batchSize = 4
    local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
    input2[2]:copy(input)
-   
+
    local output2 = module:forward(input2)
    local gradOutput2 = output2:clone():uniform(0,1)
    gradOutput2[2]:copy(gradOutput)
    local gradInput2 = module:backward(input2, gradOutput2)
-   
+
    mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialDivisiveNormalization 2d forward batch err")
    mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialDivisiveNormalization 2d backward batch err")
-   
+
    local err = jac.testJacobian(module,input2)
    mytester:assertlt(err,precision, 'error on state ')
 
@@ -1279,7 +1279,7 @@ function nntest.SpatialDivisiveNormalization_1dkernel()
    local ferr,berr = jac.testIO(module,input)
    mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
    mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-   
+
     -- test batch mode
    local output = module:forward(input):clone()
    local gradOutput = output:clone():uniform(0,1)
@@ -1287,15 +1287,15 @@ function nntest.SpatialDivisiveNormalization_1dkernel()
    local batchSize = 4
    local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2)
    input2[2]:copy(input)
-   
+
    local output2 = module:forward(input2)
    local gradOutput2 = output2:clone():uniform(0,1)
    gradOutput2[2]:copy(gradOutput)
    local gradInput2 = module:backward(input2, gradOutput2)
-   
+
    mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialDivisiveNormalization 1d forward batch err")
    mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialDivisiveNormalization 1d backward batch err")
-   
+
    local err = jac.testJacobian(module,input2)
    mytester:assertlt(err,precision, 'error on state ')
 
@@ -1318,7 +1318,7 @@ function nntest.SpatialContrastiveNormalization()
    local ferr,berr = jac.testIO(module,input)
    mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
    mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
-   
+
    -- test batch mode and type
    local output = module:forward(input):clone()
    local gradOutput = output:clone():uniform(0,1)
@@ -1326,16 +1326,16 @@ function nntest.SpatialContrastiveNormalization()
    local batchSize = 4
    local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2):float()
    input2[2]:copy(input)
-   
+
    module:float() -- type-cast
    local output2 = module:forward(input2)
    local gradOutput2 = output2:clone():uniform(0,1)
    gradOutput2[2]:copy(gradOutput)
    local gradInput2 = module:backward(input2, gradOutput2)
-   
+
    mytester:assertTensorEq(output2[2], output:float(), 0.000001, "SpatialContrastiveNormalization 2d forward batch err")
    mytester:assertTensorEq(gradOutput2[2], gradOutput:float(), 0.000001, "SpatialContrastiveNormalization 2d backward batch err")
-   
+
    module:double()
    input2 = input2:double()
    local err = jac.testJacobian(module,input2)
@@ -2044,9 +2044,9 @@ function nntest.SpatialAdaptiveMaxPooling()
    local ferr, berr = jac.testIO(module, input)
    mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ')
    mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ')
-   
+
    -- non-contiguous
-   
+
    input = torch.rand(from,ini,inj):transpose(2,3)
    module = nn.SpatialAdaptiveMaxPooling(ki,kj)
    local inputc = input:contiguous() -- contiguous
@@ -2056,13 +2056,13 @@ function nntest.SpatialAdaptiveMaxPooling()
    local gradInput = module:backward(input, output):clone()
    local gradInputc = module:backward(inputc, outputc):clone()
    mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - non-contiguous err ')
-   
+
    -- non-contiguous batch
    local nbatch = math.random(1,3)
    input = torch.rand(nbatch,from,ini,inj):transpose(1,3):transpose(2,4)
    local inputc = input:contiguous() -- contiguous
    module = nn.SpatialAdaptiveMaxPooling(ki,kj)
-   
+
    local output = module:forward(input):clone()
    local outputc = module:forward(inputc):clone()
    mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - batch non-contiguous err ')
@@ -2365,6 +2365,43 @@ function nntest.VolumetricConvolutionBatchCompare()
    batchcompare(module,input, {'weight','bias','gradWeight','gradBias'})
 end
 
+function nntest.VolumetricAveragePooling()
+   local from = math.random(2,3)
+   local kt = math.random(3,4)
+   local ki = math.random(3,4)
+   local kj = math.random(3,4)
+   local st = math.random(2,3)
+   local si = math.random(2,3)
+   local sj = math.random(2,3)
+   local outt = math.random(3,4)
+   local outi = math.random(3,4)
+   local outj = math.random(3,4)
+   local int = (outt-1)*st+kt
+   local ini = (outi-1)*si+ki
+   local inj = (outj-1)*sj+kj
+   local module = nn.VolumetricAveragePooling(kt, ki, kj, st, si, sj)
+   local input = torch.Tensor(from, int, inj, ini):zero()
+
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ')
+
+      -- batch
+   local nbatch = math.random(2,3)
+   module = nn.VolumetricAveragePooling(kt, ki, kj, st, si, sj)
+   input = torch.Tensor(nbatch, from, int, inj, ini):zero()
+
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state (Batch) ')
+
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ')
+end
+
 function nntest.VolumetricMaxPooling()
    local from = math.random(2,3)
    local kt = math.random(3,4)
@@ -2740,42 +2777,42 @@ function nntest.AddConstant()
   -- Test BPROP
   local err = jac.testJacobian(mod, input)
   mytester:assertlt(err, precision, 'bprop error ')
-    
+
   -- inplace comparisons
   local ini = math.random(3,5)
   local inj = math.random(3,5)
   local ink = math.random(3,5)
   local constant = torch.uniform()*math.random(1,10)
-  
+
   local input1 = torch.rand(ink, inj, ini)
   local input2 = input1:clone()
-   
+
   local module1 = nn.AddConstant(constant,true)
   local module2 = nn.AddConstant(constant)
-   
+
   local gradOutput1 = torch.rand(ink, inj, ini)
   local gradOutput2 = gradOutput1:clone()
-   
+
   local out1 = module1:forward(input1)
   local out2 = module2:forward(input2)
-   
-  mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) .. 
+
+  mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) ..
                     ' - in-place forward err ')
 
   local gradInput1 = module1:backward(input1, gradOutput1)
   local gradInput2 = module2:backward(input2, gradOutput2)
-  
-  mytester:asserteq(0, (gradInput1-gradInput2):abs():max(), 
+
+  mytester:asserteq(0, (gradInput1-gradInput2):abs():max(),
                 torch.typename(module1) .. ' - in-place backward err ')
-   
+
   local input1 = torch.rand(ink, inj, ini)
   local input2 = input1:clone()
-   
+
   module1:forward(input1)
   module1:backward(module1.output,torch.rand(input1:size()))
-   
+
   local err = (input1-input2):abs():max()
-  mytester:asserteq(err, 0, torch.typename(module1) .. 
+  mytester:asserteq(err, 0, torch.typename(module1) ..
                           ' - inplace input change err ')
 end
 
@@ -2797,42 +2834,42 @@ function nntest.MulConstant()
   -- Test BPROP
   local err = jac.testJacobian(mod, input)
   mytester:assertlt(err, precision, 'bprop error ')
-    
+
   -- inplace comparisons
   local ini = math.random(3,5)
   local inj = math.random(3,5)
   local ink = math.random(3,5)
   local constant = torch.uniform()*math.random(1,10)
-  
+
   local input1 = torch.rand(ink, inj, ini)
   local input2 = input1:clone()
-   
+
   local module1 = nn.MulConstant(constant,true)
   local module2 = nn.MulConstant(constant)
-   
+
   local gradOutput1 = torch.rand(ink, inj, ini)
   local gradOutput2 = gradOutput1:clone()
-   
+
   local out1 = module1:forward(input1)
   local out2 = module2:forward(input2)
-   
-  mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) .. 
+
+  mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) ..
                     ' - in-place forward err ')
 
   local gradInput1 = module1:backward(input1, gradOutput1)
   local gradInput2 = module2:backward(input2, gradOutput2)
-  
-  mytester:asserteq(0, (gradInput1-gradInput2):abs():max(), 
+
+  mytester:asserteq(0, (gradInput1-gradInput2):abs():max(),
                 torch.typename(module1) .. ' - in-place backward err ')
-   
+
   local input1 = torch.rand(ink, inj, ini)
   local input2 = input1:clone()
-   
+
   module1:forward(input1)
   module1:backward(module1.output,torch.rand(input1:size()))
-   
+
   local err = (input1-input2):abs():max()
-  mytester:assertalmosteq(err, 0, 1e-15, torch.typename(module1) .. 
+  mytester:assertalmosteq(err, 0, 1e-15, torch.typename(module1) ..
                           ' - inplace input change err ')
 end
 
@@ -3563,10 +3600,10 @@ function nntest.Replicate()
 
    mytester:assertTensorEq(vOutput1, expected1, precision, 'Wrong tiling of data when replicating vector.')
    mytester:assertTensorEq(vOutput2, expected2, precision, 'Wrong tiling of data when replicating vector.')
-   
+
    -- batch mode
    local vector = torch.rand(4,3)
-   
+
    local r1 = nn.Replicate(2, 1, 1)
    local r2 = nn.Replicate(2, 2, 1)
author	soumith <soumith@fb.com>	2015-05-28 07:42:59 +0300
committer	soumith <soumith@fb.com>	2015-05-28 07:42:59 +0300
commit	ec6953806d02ea189cc9fb5ba83ada05eca7d28c (patch)
tree	8a51527d9e30802b97aeed5999b9939f3f62b0ee
parent	e35f09a1f7ff6a123e841dfae24485999f7ce31d (diff)