5 files changed, 332 insertions, 2 deletions
diff --git a/doc/convolution.md b/doc/convolution.md
index c65222d..5d85b61 100755
--- a/doc/convolution.md
+++ b/doc/convolution.md
@@ -13,6 +13,7 @@ A convolution is an integral that expresses the amount of overlap of one functio
    * [SpatialSubSampling](#nn.SpatialSubSampling) : a 2D sub-sampling over an input image ;
    * [SpatialMaxPooling](#nn.SpatialMaxPooling) : a 2D max-pooling operation over an input image ;
    * [SpatialAveragePooling](#nn.SpatialAveragePooling) : a 2D average-pooling operation over an input image ;
+   * [SpatialAdaptiveMaxPooling](#nn.SpatialAdaptiveMaxPooling) : a 2D max-pooling operation which adapts its parameters dynamically such that the output is of fixed size ;
    * [SpatialLPPooling](#nn.SpatialLPPooling) : computes the `p` norm in a convolutional manner on a set of input images ;
    * [SpatialConvolutionMap](#nn.SpatialConvolutionMap) : a 2D convolution that uses a generic connection table ;
    * [SpatialZeroPadding](#nn.SpatialZeroPadding) : padds a feature map with specified number of zeros ;
@@ -368,6 +369,29 @@ Applies 2D average-pooling operation in `kWxkH` regions by step size
 `dWxdH` steps. The number of output features is equal to the number of
 input planes.
 
+<a name="nn.SpatialAdaptiveMaxPooling"/>
+### SpatialAdaptiveMaxPooling ###
+
+```lua
+module = nn.SpatialAdaptiveMaxPooling(W, H)
+```
+
+Applies 2D max-pooling operation in an image such that the output is of
+size `WxH`, for any input size. The number of output features is equal
+to the number of input planes.
+
+For an output of dimensions `(owidth,oheight)`, the indexes of the pooling
+region `(j,i)` in the input image of dimensions `(iwidth,iheight)` are
+given by:
+
+```
+x_j_start = floor((j   /owidth)  * iwidth)
+x_j_end   = ceil(((j+1)/owidth)  * iwidth)
+
+y_i_start = floor((i   /oheight) * iheight)
+y_i_end   = ceil(((i+1)/oheight) * iheight)
+```
+
 <a name="nn.SpatialSubSampling"/>
 ### SpatialSubSampling ###
 
@@ -419,8 +443,8 @@ Applies a 2D up-sampling over an input image composed of several input planes. T
 `forward(input)` is expected to be a 3D or 4D tensor (i.e. for 4D: `nBatchPlane x nInputPlane x height x width`). The number of output planes will be the same.  The v dimension is assumed to be the second last dimension (i.e. for 4D it will be the 3rd dim), and the u dimension is assumed to be the last dimension.
 
 The parameters are the following:
-  * `scale`: The upscale ratio.  Must be a positive integer
-
+  * `scale`: The upscale ratio.  Must be a positive integer
+
 The up-scaling method is simple nearest neighbor, ie: 
 
 ```lua
diff --git a/generic/SpatialAdaptiveMaxPooling.c b/generic/SpatialAdaptiveMaxPooling.c
new file mode 100644
index 0000000..4c46f28
--- /dev/null
+++ b/generic/SpatialAdaptiveMaxPooling.c
@@ -0,0 +1,269 @@
+#ifndef TH_GENERIC_FILE
+#define TH_GENERIC_FILE "generic/SpatialAdaptiveMaxPooling.c"
+#else
+
+static void nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(real *input_p,real *output_p,
+                                                              real *indx_p, real *indy_p,
+                                                              long nslices,
+                                                              long iwidth, long iheight,
+                                                              long owidth, long oheight)
+{
+  long k;
+#pragma omp parallel for private(k)
+  for (k = 0; k < nslices; k++)
+  {
+    /* loop over output */
+    long i, j;
+    for(i = 0; i < oheight; i++)
+    {
+      int y_start = (int)floor((float)i / oheight * iheight);
+      int y_end   = (int)ceil((float)(i + 1) / oheight * iheight);
+      int kH = y_end-y_start;
+
+      for(j = 0; j < owidth; j++)
+      {
+        
+        int x_start = (int)floor((float)j / owidth * iwidth);
+        int x_end   = (int)ceil((float)(j + 1) / owidth * iwidth);
+        int kW = x_end-x_start;
+
+        /* local pointers */
+        real *ip = input_p   + k*iwidth*iheight + y_start*iwidth + x_start;
+        real *op = output_p  + k*owidth*oheight + i*owidth + j;
+        real *indyp = indy_p + k*owidth*oheight + i*owidth + j;
+        real *indxp = indx_p + k*owidth*oheight + i*owidth + j;
+
+        /* compute local max: */
+        long maxindex = -1;
+        real maxval = -FLT_MAX;
+        long tcntr = 0;
+        int x,y;
+        for(y = 0; y < kH; y++)
+        {
+          for(x = 0; x < kW; x++)
+          {
+            real val = *(ip + y*iwidth + x);
+            if (val > maxval)
+            {
+              maxval = val;
+              maxindex = tcntr;
+            }
+            tcntr++;
+          }
+        }
+
+        /* set output to local max */
+        *op = maxval;
+
+        /* store location of max (x,y) */
+        *indyp = (int)(maxindex / kW)+1;
+        *indxp = (maxindex % kW) +1;
+      }
+    }
+  }
+}
+
+static int nn_(SpatialAdaptiveMaxPooling_updateOutput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+  long oheight = luaT_getfieldcheckint(L, 1, "H");
+  long owidth = luaT_getfieldcheckint(L, 1, "W");
+  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
+  THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor);
+  int dimw = 2;
+  int dimh = 1;
+  long nbatch = 1;
+  long nslices;
+  long iheight;
+  long iwidth;
+
+  real *input_data;
+  real *output_data;
+  real *indices_data;
+
+
+  luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected");
+
+  if (input->nDimension == 4) 
+  {
+    nbatch = input->size[0];
+    dimw++;
+    dimh++;
+  }
+
+  /* sizes */
+  nslices = input->size[dimh-1];
+  iheight = input->size[dimh];
+  iwidth = input->size[dimw];
+
+  /* get contiguous input */
+  input = THTensor_(newContiguous)(input);
+
+  /* resize output */
+  if (input->nDimension == 3)
+  {
+    THTensor_(resize3d)(output, nslices, oheight, owidth);
+    /* indices will contain i,j locations for each output point */
+    THTensor_(resize4d)(indices, 2, nslices, oheight, owidth);
+
+    input_data = THTensor_(data)(input);
+    output_data = THTensor_(data)(output);
+    indices_data = THTensor_(data)(indices);
+
+    nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data, output_data,
+                                                      indices_data+nslices*owidth*oheight, indices_data,
+                                                      nslices,
+                                                      iwidth, iheight,
+                                                      owidth, oheight);
+  }
+  else
+  {
+    long p;
+
+    THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth);
+    /* indices will contain i,j locations for each output point */
+    THTensor_(resize5d)(indices, 2, nbatch, nslices, oheight, owidth);
+
+    input_data = THTensor_(data)(input);
+    output_data = THTensor_(data)(output);
+    indices_data = THTensor_(data)(indices);
+
+#pragma omp parallel for private(p)
+    for (p = 0; p < nbatch; p++)
+    {
+      nn_(SpatialAdaptiveMaxPooling_updateOutput_frame)(input_data+p*nslices*iwidth*iheight, output_data+p*nslices*owidth*oheight,
+                                                        indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight,
+                                                        nslices,
+                                                        iwidth, iheight,
+                                                        owidth, oheight);
+    }
+  }
+
+  /* cleanup */
+  THTensor_(free)(input);
+  return 1;
+}
+
+
+
+static void nn_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(real *gradInput_p, real *gradOutput_p,
+                                                                 real *indx_p, real *indy_p,
+                                                                 long nslices,
+                                                                 long iwidth, long iheight,
+                                                                 long owidth, long oheight)
+{
+  long k;
+#pragma omp parallel for private(k)
+  for (k = 0; k < nslices; k++)
+  {
+    real *gradInput_p_k = gradInput_p + k*iwidth*iheight;
+    real *gradOutput_p_k = gradOutput_p + k*owidth*oheight;
+    real *indx_p_k = indx_p + k*owidth*oheight;
+    real *indy_p_k = indy_p + k*owidth*oheight;
+    
+    /* calculate max points */
+    long i, j;
+    for(i = 0; i < oheight; i++)
+    {
+      int y_start = (int)floor((float) i / oheight * iheight);
+      for(j = 0; j < owidth; j++)
+      {
+        int x_start = (int)floor((float) j / owidth * iwidth);
+        /* retrieve position of max */
+        long maxi = indy_p_k[i*owidth + j] - 1 + y_start;
+        long maxj = indx_p_k[i*owidth + j] - 1 + x_start;
+        
+        /* update gradient */
+        gradInput_p_k[maxi*iwidth + maxj] += gradOutput_p_k[i*owidth + j];
+      }
+    }
+  }
+}
+
+static int nn_(SpatialAdaptiveMaxPooling_updateGradInput)(lua_State *L)
+{
+  THTensor *input = luaT_checkudata(L, 2, torch_Tensor);
+  THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor);
+  THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor);
+  THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor);
+  int dimw = 2;
+  int dimh = 1;
+  long nbatch = 1;
+  int nslices;
+  int iheight;
+  int iwidth;
+  int oheight;
+  int owidth;
+  real *gradInput_data;
+  real *gradOutput_data;
+  real *indices_data;
+
+  /* get contiguous gradOutput */
+  gradOutput = THTensor_(newContiguous)(gradOutput);
+
+  /* resize */
+  THTensor_(resizeAs)(gradInput, input);
+  THTensor_(zero)(gradInput);
+
+  if (input->nDimension == 4) {
+    nbatch = input->size[0];
+    dimw++;
+    dimh++;
+  }
+
+  /* sizes */
+  nslices = input->size[dimh-1];
+  iheight = input->size[dimh];
+  iwidth = input->size[dimw];
+  oheight = gradOutput->size[dimh];
+  owidth = gradOutput->size[dimw];
+
+  /* get raw pointers */
+  gradInput_data = THTensor_(data)(gradInput);
+  gradOutput_data = THTensor_(data)(gradOutput);
+  indices_data = THTensor_(data)(indices);
+
+  /* backprop */
+  if (input->nDimension == 3)
+  {
+    nn_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data,
+                                                         indices_data+nslices*owidth*oheight, indices_data,
+                                                         nslices,
+                                                         iwidth, iheight,
+                                                         owidth, oheight);
+  }
+  else
+  {
+    long p;
+#pragma omp parallel for private(p)
+    for (p = 0; p < nbatch; p++)
+    {
+      nn_(SpatialAdaptiveMaxPooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight,
+                                                           indices_data+(p+nbatch)*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight,
+                                                           nslices,
+                                                           iwidth, iheight,
+                                                           owidth, oheight);
+    }
+  }
+
+  /* cleanup */
+  THTensor_(free)(gradOutput);
+
+  return 1;
+}
+
+static const struct luaL_Reg nn_(SpatialAdaptiveMaxPooling__) [] = {
+  {"SpatialAdaptiveMaxPooling_updateOutput", nn_(SpatialAdaptiveMaxPooling_updateOutput)},
+  {"SpatialAdaptiveMaxPooling_updateGradInput", nn_(SpatialAdaptiveMaxPooling_updateGradInput)},
+  {NULL, NULL}
+};
+
+static void nn_(SpatialAdaptiveMaxPooling_init)(lua_State *L)
+{
+  luaT_pushmetatable(L, torch_Tensor);
+  luaT_registeratname(L, nn_(SpatialAdaptiveMaxPooling__), "nn");
+  lua_pop(L,1);
+}
+
+#endif
+
diff --git a/init.c b/init.c
index 2704973..5433722 100644
--- a/init.c
+++ b/init.c
@@ -101,6 +101,9 @@
 #include "generic/SpatialAveragePooling.c"
 #include "THGenerateFloatTypes.h"
 
+#include "generic/SpatialAdaptiveMaxPooling.c"
+#include "THGenerateFloatTypes.h"
+
 #include "generic/VolumetricConvolution.c"
 #include "THGenerateFloatTypes.h"
 
@@ -159,6 +162,7 @@ int luaopen_libnn(lua_State *L)
   nn_FloatSpatialSubSampling_init(L);
   nn_FloatSpatialMaxPooling_init(L);
   nn_FloatSpatialAveragePooling_init(L);
+  nn_FloatSpatialAdaptiveMaxPooling_init(L);
   nn_FloatVolumetricConvolution_init(L);
   nn_FloatVolumetricMaxPooling_init(L);
   nn_FloatMultiMarginCriterion_init(L);
@@ -198,6 +202,7 @@ int luaopen_libnn(lua_State *L)
   nn_DoubleSpatialSubSampling_init(L);
   nn_DoubleSpatialMaxPooling_init(L);
   nn_DoubleSpatialAveragePooling_init(L);
+  nn_DoubleSpatialAdaptiveMaxPooling_init(L);
   nn_DoubleVolumetricConvolution_init(L);
   nn_DoubleVolumetricMaxPooling_init(L);
   nn_DoubleMultiMarginCriterion_init(L);
diff --git a/init.lua b/init.lua
index c556321..26d7304 100644
--- a/init.lua
+++ b/init.lua
@@ -76,6 +76,7 @@ include('SpatialMaxPooling.lua')
 include('SpatialMaxPoolingCUDA.lua')
 include('SpatialLPPooling.lua')
 include('SpatialAveragePooling.lua')
+include('SpatialAdaptiveMaxPooling.lua')
 include('TemporalConvolution.lua')
 include('TemporalSubSampling.lua')
 include('TemporalMaxPooling.lua')
diff --git a/test.lua b/test.lua
index ab91abb..889675a 100644
--- a/test.lua
+++ b/test.lua
@@ -1678,6 +1678,37 @@ function nntest.SpatialAveragePooling()
    mytester:assertTensorEq(gradInput, gradInput2, 0.000001, torch.typename(module) .. ' backward err (Batch) ')
 end
 
+function nntest.SpatialAdaptiveMaxPooling()
+   local from = math.random(1,5)
+   local ki = math.random(1,12)
+   local kj = math.random(1,12)
+   local ini = math.random(1,64)
+   local inj = math.random(1,64)
+
+   local module = nn.SpatialAdaptiveMaxPooling(ki,kj)
+   local input = torch.rand(from,ini,inj)
+
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state ')
+
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ')
+
+   -- batch
+   local nbatch = math.random(2,5)
+   input = torch.rand(nbatch,from,ini,inj)
+   module = nn.SpatialAdaptiveMaxPooling(ki,kj)
+
+   local err = jac.testJacobian(module, input)
+   mytester:assertlt(err, precision, 'error on state (Batch) ')
+
+   local ferr, berr = jac.testIO(module, input)
+   mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ')
+   mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ')
+
+end
+
 function nntest.SpatialLPPooling()
    local fanin = math.random(1,4)
    local osizex = math.random(1,4)