diff options
author | soumith <soumith@fb.com> | 2015-05-28 07:42:59 +0300 |
---|---|---|
committer | soumith <soumith@fb.com> | 2015-05-28 07:42:59 +0300 |
commit | ec6953806d02ea189cc9fb5ba83ada05eca7d28c (patch) | |
tree | 8a51527d9e30802b97aeed5999b9939f3f62b0ee | |
parent | e35f09a1f7ff6a123e841dfae24485999f7ce31d (diff) |
Volumetric Average Pooling + doc + unit test, better performance for Volumetric Max Pooling
-rw-r--r-- | VolumetricAveragePooling.lua | 34 | ||||
-rwxr-xr-x | doc/convolution.md | 12 | ||||
-rw-r--r-- | generic/VolumetricAveragePooling.c | 263 | ||||
-rw-r--r-- | generic/VolumetricMaxPooling.c | 259 | ||||
-rw-r--r-- | init.c | 5 | ||||
-rw-r--r-- | init.lua | 1 | ||||
-rw-r--r-- | test.lua | 143 |
7 files changed, 519 insertions, 198 deletions
diff --git a/VolumetricAveragePooling.lua b/VolumetricAveragePooling.lua new file mode 100644 index 0000000..1fc0ec9 --- /dev/null +++ b/VolumetricAveragePooling.lua @@ -0,0 +1,34 @@ +local VolumetricAveragePooling, parent = torch.class( + 'nn.VolumetricAveragePooling', 'nn.Module') + +function VolumetricAveragePooling:__init(kT, kW, kH, dT, dW, dH) + parent.__init(self) + + dT = dT or kT + dW = dW or kW + dH = dH or kH + + self.kT = kT + self.kH = kH + self.kW = kW + self.dT = dT + self.dW = dW + self.dH = dH +end + +function VolumetricAveragePooling:updateOutput(input) + input.nn.VolumetricAveragePooling_updateOutput(self, input) + return self.output +end + +function VolumetricAveragePooling:updateGradInput(input, gradOutput) + input.nn.VolumetricAveragePooling_updateGradInput(self, input, gradOutput) + return self.gradInput +end + +function VolumetricAveragePooling:empty() + self.gradInput:resize() + self.gradInput:storage():resize(0) + self.output:resize() + self.output:storage():resize(0) +end diff --git a/doc/convolution.md b/doc/convolution.md index f2380f2..d0a02fa 100755 --- a/doc/convolution.md +++ b/doc/convolution.md @@ -23,6 +23,7 @@ a kernel for computing the weighted average in a neighborhood ; * [Volumetric Modules](#nn.VolumetricModules) apply to inputs with three-dimensional relationships (e.g. videos) : * [VolumetricConvolution](#nn.VolumetricConvolution) : a 3D convolution over an input video (a sequence of images) ; * [VolumetricMaxPooling](#nn.VolumetricMaxPooling) : a 3D max-pooling operation over an input video. + * [VolumetricAveragePooling](#nn.VolumetricAveragePooling) : a 3D average-pooling operation over an input video. <a name="nn.TemporalModules"/> ## Temporal Modules ## @@ -605,3 +606,14 @@ module = nn.VolumetricMaxPooling(kT, kW, kH [, dT, dW, dH]) Applies 3D max-pooling operation in `kTxkWxkH` regions by step size `dTxdWxdH` steps. The number of output features is equal to the number of input planes / dT. + +<a name="nn.VolumetricAveragePooling"/> +### VolumetricAveragePooling ### + +```lua +module = nn.VolumetricAveragePooling(kT, kW, kH [, dT, dW, dH]) +``` + +Applies 3D average-pooling operation in `kTxkWxkH` regions by step size +`dTxdWxdH` steps. The number of output features is equal to the number of +input planes / dT. diff --git a/generic/VolumetricAveragePooling.c b/generic/VolumetricAveragePooling.c new file mode 100644 index 0000000..28bd0b0 --- /dev/null +++ b/generic/VolumetricAveragePooling.c @@ -0,0 +1,263 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricAveragePooling.c" +#else + +static void nn_(VolumetricAveragePooling_updateOutput_frame)( + real *input_p, real *output_p, long nslices, + long itime, long iwidth, long iheight, + long otime, long owidth, long oheight, + int kT, int kW, int kH, int dT, int dW, int dH) { + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) { + /* loop over output */ + long i, j, ti; + for(ti = 0; ti < otime; ti++) { + for(i = 0; i < oheight; i++) { + for(j = 0; j < owidth; j++) { + /* local pointers */ + real *ip = input_p + k * itime * iwidth * iheight + + ti * iwidth * iheight * dT + i * iwidth * dH + j * dW; + real *op = output_p + k * otime * owidth * oheight + + ti * owidth * oheight + i * owidth + j; + + /* compute local sum: */ + real sum = 0.0; + int x,y,z; + + for(z=0; z < kT; z++) { + for(y = 0; y < kH; y++) { + for(x = 0; x < kW; x++) { + sum += *(ip + z * iwidth * iheight + y * iwidth + x); + } + } + } + + /* set output to local max */ + *op = sum / (kT * kW * kH); + } + } + } + } +} + +static int nn_(VolumetricAveragePooling_updateOutput)(lua_State *L) { + THTensor *input = luaT_checkudata(L, 2, torch_Tensor); + int kT = luaT_getfieldcheckint(L, 1, "kT"); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int kH = luaT_getfieldcheckint(L, 1, "kH"); + int dT = luaT_getfieldcheckint(L, 1, "dT"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); + long nslices; + long itime; + long iheight; + long iwidth; + long otime; + long oheight; + long owidth; + real *input_data; + real *output_data; + + luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2, + "4D or 5D (batch-mode) tensor expected"); + + int dimN = 0; + int dimt = 1; + int dimh = 2; + int dimw = 3; + + if (input->nDimension == 5) { + dimN++; + dimt++; + dimh++; + dimw++; + } + + luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH && + input->size[dimt] >= kT, 2, + "input image smaller than kernel size"); + + /* sizes */ + nslices = input->size[dimN]; + itime = input->size[dimt]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + otime = (itime - kT) / dT + 1; + oheight = (iheight - kH) / dH + 1; + owidth = (iwidth - kW) / dW + 1; + + /* get contiguous input */ + input = THTensor_(newContiguous)(input); + + if (input->nDimension == 4) { /* non-batch mode */ + /* resize output */ + THTensor_(resize4d)(output, nslices, otime, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + + nn_(VolumetricAveragePooling_updateOutput_frame)(input_data, output_data, + nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + kT, kW, kH, dT, dW, dH); + } else { /* batch mode */ + long p; + long nBatch = input->size[0]; + + long istride = nslices * itime * iwidth * iheight; + long ostride = nslices * otime * owidth * oheight; + + /* resize output */ + THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + +#pragma omp parallel for private(p) + for (p=0; p < nBatch; p++) { + nn_(VolumetricAveragePooling_updateOutput_frame)( + input_data + p * istride, output_data + p * ostride, + nslices, itime, iwidth, iheight, otime, owidth, oheight, + kT, kW, kH, dT, dW, dH); + } + } + + /* cleanup */ + THTensor_(free)(input); + return 1; +} + +static void nn_(VolumetricAveragePooling_updateGradInput_frame)( + real *gradInput_p, real *gradOutput_p, long nslices, + long itime, long iwidth, long iheight, + long otime, long owidth, long oheight, + int kT, int kW, int kH, int dT, int dW, int dH) { + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) { + /* loop over output */ + long i, j, ti; + for(ti = 0; ti < otime; ti++) { + for(i = 0; i < oheight; i++) { + for(j = 0; j < owidth; j++) { + /* local pointers */ + real *ip = gradInput_p + k * itime * iwidth * iheight + + ti * iwidth * iheight * dT + i * iwidth * dH + j * dW; + real *op = gradOutput_p + k * otime * owidth * oheight + + ti * owidth * oheight + i * owidth + j; + + /* scatter gradients out to footprint: */ + real val = *op / (kT * kW * kH); + int x,y,z; + for(z=0; z < kT; z++) { + for(y = 0; y < kH; y++) { + for(x = 0; x < kW; x++) { + *(ip + z * iwidth * iheight + y * iwidth + x) += val; + } + } + } + } + } + } + } +} + +static int nn_(VolumetricAveragePooling_updateGradInput)(lua_State *L) { + THTensor *input = luaT_checkudata(L, 2, torch_Tensor); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); + int dT = luaT_getfieldcheckint(L, 1, "dT"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + int kT = luaT_getfieldcheckint(L, 1, "kT"); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int kH = luaT_getfieldcheckint(L, 1, "kH"); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", + torch_Tensor); + int nslices; + int itime; + int iheight; + int iwidth; + int otime; + int oheight; + int owidth; + real *gradInput_data; + real *gradOutput_data; + real *indices_data; + + int dimN = 0; + int dimt = 1; + int dimh = 2; + int dimw = 3; + + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + if (input->nDimension == 5) { + dimN++; + dimt++; + dimh++; + dimw++; + } + + /* sizes */ + nslices = input->size[dimN]; + itime = input->size[dimt]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + otime = gradOutput->size[dimt]; + oheight = gradOutput->size[dimh]; + owidth = gradOutput->size[dimw]; + + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + + /* backprop */ + if (input->nDimension == 4) { /* non-batch mode*/ + nn_(VolumetricAveragePooling_updateGradInput_frame)( + gradInput_data, gradOutput_data, nslices, + itime, iwidth, iheight, otime, owidth, oheight, + kT, kW, kH, dT, dW, dH); + } else { /* batch mode */ + long p; + long nBatch = input->size[0]; + + long istride = nslices * itime * iwidth * iheight; + long ostride = nslices * otime * owidth * oheight; + +#pragma omp parallel for private(p) + for (p = 0; p < nBatch; p++) { + nn_(VolumetricAveragePooling_updateGradInput_frame)( + gradInput_data + p * istride, gradOutput_data + p * ostride, nslices, + itime, iwidth, iheight, otime, owidth, oheight, + kT, kW, kH, dT, dW, dH); + } + } + + /* cleanup */ + THTensor_(free)(gradOutput); + return 1; +} + +static const struct luaL_Reg nn_(VolumetricAveragePooling__) [] = { + {"VolumetricAveragePooling_updateOutput", + nn_(VolumetricAveragePooling_updateOutput)}, + {"VolumetricAveragePooling_updateGradInput", + nn_(VolumetricAveragePooling_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(VolumetricAveragePooling_init)(lua_State *L) { + luaT_pushmetatable(L, torch_Tensor); + luaT_registeratname(L, nn_(VolumetricAveragePooling__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/generic/VolumetricMaxPooling.c b/generic/VolumetricMaxPooling.c index 28fd5fe..4114889 100644 --- a/generic/VolumetricMaxPooling.c +++ b/generic/VolumetricMaxPooling.c @@ -2,64 +2,55 @@ #define TH_GENERIC_FILE "generic/VolumetricMaxPooling.c" #else -static void nn_(VolumetricMaxPooling_updateOutput_frame)(real *input_p, real *output_p, - real *indx_p, real *indy_p, real *indz_p, - long nslices, - long itime, long iwidth, long iheight, - long otime, long owidth, long oheight, - int kT, int kW, int kH, int dT, int dW, int dH) -{ +static void nn_(VolumetricMaxPooling_updateOutput_frame)( + real *input_p, real *output_p, real *indz_p, + long nslices, long itime, long iwidth, long iheight, + long otime, long owidth, long oheight, + int kT, int kW, int kH, int dT, int dW, int dH) { long k; #pragma omp parallel for private(k) for (k = 0; k < nslices; k++) { /* loop over output */ long i, j, ti; - for(ti = 0; ti < otime; ti++) - { - for(i = 0; i < oheight; i++) - { - for(j = 0; j < owidth; j++) - { - /* local pointers */ - real *ip = input_p + k*itime*iwidth*iheight + ti*iwidth*iheight*dT + i*iwidth*dH + j*dW; - real *op = output_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j; - real *indzp = indz_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j; - real *indyp = indy_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j; - real *indxp = indx_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j; - - /* compute local max: */ - real maxval = -THInf; - int x,y,z; - - *indzp = -1; - *indyp = -1; - *indxp = -1; - for(z=0; z < kT; z++) - { - for(y = 0; y < kH; y++) - { - for(x = 0; x < kW; x++) - { - real val = *(ip + z*iwidth*iheight + y*iwidth + x); - if (val > maxval) - { - maxval = val; - *indzp = z+1; - *indyp = y+1; - *indxp = x+1; - } - } - } - } - - /* set output to local max */ - *op = maxval; - - /* store location of max (x,y) */ - /**indyp = (int)(maxindex / kW)+1;*/ - /**indxp = (maxindex % kW) +1;*/ - } + for(ti = 0; ti < otime; ti++) { + for(i = 0; i < oheight; i++) { + for(j = 0; j < owidth; j++) { + /* local pointers */ + real *ip = input_p + k * itime * iwidth * iheight + + ti * iwidth * iheight * dT + i * iwidth * dH + j * dW; + real *op = output_p + k * otime * owidth * oheight + + ti * owidth * oheight + i * owidth + j; + real *indzp = indz_p + k * otime * owidth * oheight + + ti * owidth * oheight + i * owidth + j; + + /* compute local max: */ + real maxval = -THInf; + int x,y,z; + int mx, my, mz; + + for(z = 0; z < kT; z++) { + for(y = 0; y < kH; y++) { + for(x = 0; x < kW; x++) { + real val = *(ip + z * iwidth * iheight + y * iwidth + x); + if (val > maxval) { + maxval = val; + mz = z; + my = y; + mx = x; + } + } + } + } + + // set max values + ((unsigned char*)(indzp))[0] = mz; + ((unsigned char*)(indzp))[1] = my; + ((unsigned char*)(indzp))[2] = mx; + ((unsigned char*)(indzp))[3] = 0; + /* set output to local max */ + *op = maxval; + } } } } @@ -87,7 +78,8 @@ static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L) real *output_data; real *indices_data; - luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch-mode) tensor expected"); + luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2, + "4D or 5D (batch-mode) tensor expected"); int dimN = 0; int dimt = 1; @@ -101,16 +93,18 @@ static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L) dimw++; } - luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2, "input image smaller than kernel size"); + luaL_argcheck(L, input->size[dimw] >= kW && + input->size[dimh] >= kH && input->size[dimt] >= kT, 2, + "input image smaller than kernel size"); /* sizes */ nslices = input->size[dimN]; - itime = input->size[dimt]; + itime = input->size[dimt]; iheight = input->size[dimh]; - iwidth = input->size[dimw]; - otime = (itime - kT) / dT + 1; + iwidth = input->size[dimw]; + otime = (itime - kT) / dT + 1; oheight = (iheight - kH) / dH + 1; - owidth = (iwidth - kW) / dW + 1; + owidth = (iwidth - kW) / dW + 1; /* get contiguous input */ input = THTensor_(newContiguous)(input); @@ -118,61 +112,46 @@ static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L) if (input->nDimension == 4) { /* non-batch mode */ /* resize output */ THTensor_(resize4d)(output, nslices, otime, oheight, owidth); - /* indices will contain ti,i,j locations for each output point */ - THTensor_(resize5d)(indices, 3, nslices, otime, oheight, owidth); - + /* indices will contain ti,i,j uchar locations packed into float/double */ + THTensor_(resize4d)(indices, nslices, otime, oheight, owidth); + input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); - + nn_(VolumetricMaxPooling_updateOutput_frame)(input_data, output_data, - indices_data+nslices*otime*owidth*oheight*2, - indices_data+nslices*otime*owidth*oheight, - indices_data, - nslices, - itime, iwidth, iheight, - otime, owidth, oheight, - kT, kW, kH, dT, dW, dH); - } - else { /* batch mode */ + indices_data, + nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + kT, kW, kH, dT, dW, dH); + } else { /* batch mode */ long p; long nBatch = input->size[0]; - long istride = nslices*itime*iwidth*iheight; - long ostride = nslices*otime*owidth*oheight; + long istride = nslices * itime * iwidth * iheight; + long ostride = nslices * otime * owidth * oheight; /* resize output */ THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth); /* indices will contain ti,i,j locations for each output point */ - - THLongStorage* size = THLongStorage_newWithSize(6); - size->data[0] = 3; size->data[1] = nBatch; - size->data[2] = nslices; size->data[3] = otime; - size->data[4] = oheight; size->data[5] = owidth; - THTensor_(resize)(indices, size, NULL); /* resize6d not available */ - //TODO: Replace with resize6d when available - //THTensor_(resize6d)(indices, 3, nBatch, nslices, otime, oheight, owidth); + THTensor_(resize5d)(indices, nBatch, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); #pragma omp parallel for private(p) - for (p=0; p < nBatch; p++) - { + for (p=0; p < nBatch; p++) { nn_(VolumetricMaxPooling_updateOutput_frame)( - input_data+p*istride, - output_data+p*ostride, - indices_data+(p+nBatch+nBatch)*ostride, - indices_data+(p+nBatch)*ostride, - indices_data+p*ostride, - nslices, - itime, iwidth, iheight, - otime, owidth, oheight, - kT, kW, kH, dT, dW, dH); + input_data + p * istride, + output_data + p * ostride, + indices_data + p * ostride, + nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + kT, kW, kH, dT, dW, dH); } - - THLongStorage_free(size); } /* cleanup */ @@ -180,39 +159,34 @@ static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L) return 1; } -static void nn_(VolumetricMaxPooling_updateGradInput_frame)(real *gradInput_p, real *gradOutput_p, - real *indx_p, real *indy_p, real *indz_p, - long nslices, - long itime, long iwidth, long iheight, - long otime, long owidth, long oheight, - int dT, int dW, int dH) -{ +static void nn_(VolumetricMaxPooling_updateGradInput_frame)( + real *gradInput_p, real *gradOutput_p, real *indz_p, + long nslices, + long itime, long iwidth, long iheight, + long otime, long owidth, long oheight, + int dT, int dW, int dH) { long k; #pragma omp parallel for private(k) - for (k = 0; k < nslices; k++) - { - real *gradInput_p_k = gradInput_p + k*itime*iwidth*iheight; - real *gradOutput_p_k = gradOutput_p + k*otime*owidth*oheight; - real *indx_p_k = indx_p + k*otime*owidth*oheight; - real *indy_p_k = indy_p + k*otime*owidth*oheight; - real *indz_p_k = indz_p + k*otime*owidth*oheight; + for (k = 0; k < nslices; k++) { + real *gradInput_p_k = gradInput_p + k * itime * iwidth * iheight; + real *gradOutput_p_k = gradOutput_p + k * otime * owidth * oheight; + real *indz_p_k = indz_p + k * otime * owidth * oheight; /* calculate max points */ long ti, i, j; - for(ti = 0; ti < otime; ti++) - { - for(i = 0; i < oheight; i++) - { - for(j = 0; j < owidth; j++) - { - /* retrieve position of max */ - long maxti = indz_p_k[ti*oheight*owidth + i*owidth + j] - 1 + ti*dT; - long maxi = indy_p_k[ti*oheight*owidth + i*owidth + j] - 1 + i*dH; - long maxj = indx_p_k[ti*oheight*owidth + i*owidth + j] - 1 + j*dW; - - /* update gradient */ - gradInput_p_k[maxti*iheight*iwidth + maxi*iwidth + maxj] += gradOutput_p_k[ti*oheight*owidth + i*owidth + j]; - } + for(ti = 0; ti < otime; ti++) { + for(i = 0; i < oheight; i++) { + for(j = 0; j < owidth; j++) { + /* retrieve position of max */ + real * indzp = &indz_p_k[ti * oheight * owidth + i * owidth + j]; + long maxti = ((unsigned char*)(indzp))[0] + ti * dT; + long maxi = ((unsigned char*)(indzp))[1] + i * dH; + long maxj = ((unsigned char*)(indzp))[2] + j * dW; + + /* update gradient */ + gradInput_p_k[maxti * iheight * iwidth + maxi * iwidth + maxj] += + gradOutput_p_k[ti * oheight * owidth + i * owidth + j]; + } } } } @@ -274,36 +248,31 @@ static int nn_(VolumetricMaxPooling_updateGradInput)(lua_State *L) /* backprop */ if (input->nDimension == 4) { /* non-batch mode*/ - - nn_(VolumetricMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data, - indices_data+nslices*otime*owidth*oheight*2, - indices_data+nslices*otime*owidth*oheight, - indices_data, - nslices, - itime, iwidth, iheight, - otime, owidth, oheight, - dT, dW, dH); + nn_(VolumetricMaxPooling_updateGradInput_frame)( + gradInput_data, gradOutput_data, + indices_data, + nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + dT, dW, dH); } else { /* batch mode */ long p; long nBatch = input->size[0]; - long istride = nslices*itime*iwidth*iheight; - long ostride = nslices*otime*owidth*oheight; - + long istride = nslices * itime * iwidth * iheight; + long ostride = nslices * otime * owidth * oheight; + #pragma omp parallel for private(p) - for (p = 0; p < nBatch; p++) - { + for (p = 0; p < nBatch; p++) { nn_(VolumetricMaxPooling_updateGradInput_frame)( - gradInput_data+p*istride, - gradOutput_data+p*ostride, - indices_data+(p+nBatch+nBatch)*ostride, - indices_data+(p+nBatch)*ostride, - indices_data+p*ostride, - nslices, - itime, iwidth, iheight, - otime, owidth, oheight, - dT, dW, dH); + gradInput_data + p * istride, + gradOutput_data + p * ostride, + indices_data + p * ostride, + nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + dT, dW, dH); } } @@ -110,6 +110,9 @@ #include "generic/VolumetricMaxPooling.c" #include "THGenerateFloatTypes.h" +#include "generic/VolumetricAveragePooling.c" +#include "THGenerateFloatTypes.h" + #include "generic/MultiMarginCriterion.c" #include "THGenerateFloatTypes.h" @@ -165,6 +168,7 @@ int luaopen_libnn(lua_State *L) nn_FloatSpatialAdaptiveMaxPooling_init(L); nn_FloatVolumetricConvolution_init(L); nn_FloatVolumetricMaxPooling_init(L); + nn_FloatVolumetricAveragePooling_init(L); nn_FloatMultiMarginCriterion_init(L); nn_FloatMultiLabelMarginCriterion_init(L); nn_FloatL1Cost_init(L); @@ -205,6 +209,7 @@ int luaopen_libnn(lua_State *L) nn_DoubleSpatialAdaptiveMaxPooling_init(L); nn_DoubleVolumetricConvolution_init(L); nn_DoubleVolumetricMaxPooling_init(L); + nn_DoubleVolumetricAveragePooling_init(L); nn_DoubleMultiMarginCriterion_init(L); nn_DoubleMultiLabelMarginCriterion_init(L); nn_DoubleL1Cost_init(L); @@ -92,6 +92,7 @@ include('SpatialBatchNormalization.lua') include('VolumetricConvolution.lua') include('VolumetricMaxPooling.lua') +include('VolumetricAveragePooling.lua') include('ParallelTable.lua') include('ConcatTable.lua') @@ -1161,7 +1161,7 @@ function nntest.SpatialSubtractiveNormalization_2dkernel() local ferr,berr = jac.testIO(module,input) mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') - + -- test batch mode local output = module:forward(input):clone() local gradOutput = output:clone():uniform(0,1) @@ -1169,22 +1169,22 @@ function nntest.SpatialSubtractiveNormalization_2dkernel() local batchSize = 4 local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2) input2[2]:copy(input) - + local output2 = module:forward(input2) local gradOutput2 = output2:clone():uniform(0,1) gradOutput2[2]:copy(gradOutput) local gradInput2 = module:backward(input2, gradOutput2) - + mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialSubstractiveNormalization 2d forward batch err") mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialSubstractiveNormalization 2d backward batch err") - + local err = jac.testJacobian(module,input2) mytester:assertlt(err,precision, 'error on state ') local ferr,berr = jac.testIO(module,input2) mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') - + end function nntest.SpatialSubtractiveNormalization_1dkernel() @@ -1201,7 +1201,7 @@ function nntest.SpatialSubtractiveNormalization_1dkernel() local ferr,berr = jac.testIO(module,input) mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') - + -- test batch mode local output = module:forward(input):clone() local gradOutput = output:clone():uniform(0,1) @@ -1209,15 +1209,15 @@ function nntest.SpatialSubtractiveNormalization_1dkernel() local batchSize = 4 local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2) input2[2]:copy(input) - + local output2 = module:forward(input2) local gradOutput2 = output2:clone():uniform(0,1) gradOutput2[2]:copy(gradOutput) local gradInput2 = module:backward(input2, gradOutput2) - + mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialSubstractiveNormalization 1d forward batch err") mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialSubstractiveNormalization 1d backward batch err") - + local err = jac.testJacobian(module,input2) mytester:assertlt(err,precision, 'error on state ') @@ -1240,7 +1240,7 @@ function nntest.SpatialDivisiveNormalization_2dkernel() local ferr,berr = jac.testIO(module,input) mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') - + -- test batch mode local output = module:forward(input):clone() local gradOutput = output:clone():uniform(0,1) @@ -1248,15 +1248,15 @@ function nntest.SpatialDivisiveNormalization_2dkernel() local batchSize = 4 local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2) input2[2]:copy(input) - + local output2 = module:forward(input2) local gradOutput2 = output2:clone():uniform(0,1) gradOutput2[2]:copy(gradOutput) local gradInput2 = module:backward(input2, gradOutput2) - + mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialDivisiveNormalization 2d forward batch err") mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialDivisiveNormalization 2d backward batch err") - + local err = jac.testJacobian(module,input2) mytester:assertlt(err,precision, 'error on state ') @@ -1279,7 +1279,7 @@ function nntest.SpatialDivisiveNormalization_1dkernel() local ferr,berr = jac.testIO(module,input) mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') - + -- test batch mode local output = module:forward(input):clone() local gradOutput = output:clone():uniform(0,1) @@ -1287,15 +1287,15 @@ function nntest.SpatialDivisiveNormalization_1dkernel() local batchSize = 4 local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2) input2[2]:copy(input) - + local output2 = module:forward(input2) local gradOutput2 = output2:clone():uniform(0,1) gradOutput2[2]:copy(gradOutput) local gradInput2 = module:backward(input2, gradOutput2) - + mytester:assertTensorEq(output2[2], output, 0.000001, "SpatialDivisiveNormalization 1d forward batch err") mytester:assertTensorEq(gradOutput2[2], gradOutput, 0.000001, "SpatialDivisiveNormalization 1d backward batch err") - + local err = jac.testJacobian(module,input2) mytester:assertlt(err,precision, 'error on state ') @@ -1318,7 +1318,7 @@ function nntest.SpatialContrastiveNormalization() local ferr,berr = jac.testIO(module,input) mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err ') mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') - + -- test batch mode and type local output = module:forward(input):clone() local gradOutput = output:clone():uniform(0,1) @@ -1326,16 +1326,16 @@ function nntest.SpatialContrastiveNormalization() local batchSize = 4 local input2 = torch.rand(batchSize,nbfeatures,inputSize,inputSize/2):float() input2[2]:copy(input) - + module:float() -- type-cast local output2 = module:forward(input2) local gradOutput2 = output2:clone():uniform(0,1) gradOutput2[2]:copy(gradOutput) local gradInput2 = module:backward(input2, gradOutput2) - + mytester:assertTensorEq(output2[2], output:float(), 0.000001, "SpatialContrastiveNormalization 2d forward batch err") mytester:assertTensorEq(gradOutput2[2], gradOutput:float(), 0.000001, "SpatialContrastiveNormalization 2d backward batch err") - + module:double() input2 = input2:double() local err = jac.testJacobian(module,input2) @@ -2044,9 +2044,9 @@ function nntest.SpatialAdaptiveMaxPooling() local ferr, berr = jac.testIO(module, input) mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ') mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ') - + -- non-contiguous - + input = torch.rand(from,ini,inj):transpose(2,3) module = nn.SpatialAdaptiveMaxPooling(ki,kj) local inputc = input:contiguous() -- contiguous @@ -2056,13 +2056,13 @@ function nntest.SpatialAdaptiveMaxPooling() local gradInput = module:backward(input, output):clone() local gradInputc = module:backward(inputc, outputc):clone() mytester:asserteq(0, (gradInput-gradInputc):abs():max(), torch.typename(module) .. ' - non-contiguous err ') - + -- non-contiguous batch local nbatch = math.random(1,3) input = torch.rand(nbatch,from,ini,inj):transpose(1,3):transpose(2,4) local inputc = input:contiguous() -- contiguous module = nn.SpatialAdaptiveMaxPooling(ki,kj) - + local output = module:forward(input):clone() local outputc = module:forward(inputc):clone() mytester:asserteq(0, (output-outputc):abs():max(), torch.typename(module) .. ' - batch non-contiguous err ') @@ -2365,6 +2365,43 @@ function nntest.VolumetricConvolutionBatchCompare() batchcompare(module,input, {'weight','bias','gradWeight','gradBias'}) end +function nntest.VolumetricAveragePooling() + local from = math.random(2,3) + local kt = math.random(3,4) + local ki = math.random(3,4) + local kj = math.random(3,4) + local st = math.random(2,3) + local si = math.random(2,3) + local sj = math.random(2,3) + local outt = math.random(3,4) + local outi = math.random(3,4) + local outj = math.random(3,4) + local int = (outt-1)*st+kt + local ini = (outi-1)*si+ki + local inj = (outj-1)*sj+kj + local module = nn.VolumetricAveragePooling(kt, ki, kj, st, si, sj) + local input = torch.Tensor(from, int, inj, ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ') + + -- batch + local nbatch = math.random(2,3) + module = nn.VolumetricAveragePooling(kt, ki, kj, st, si, sj) + input = torch.Tensor(nbatch, from, int, inj, ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state (Batch) ') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(ferr, 0, torch.typename(module) .. ' - i/o forward err (Batch) ') + mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err (Batch) ') +end + function nntest.VolumetricMaxPooling() local from = math.random(2,3) local kt = math.random(3,4) @@ -2740,42 +2777,42 @@ function nntest.AddConstant() -- Test BPROP local err = jac.testJacobian(mod, input) mytester:assertlt(err, precision, 'bprop error ') - + -- inplace comparisons local ini = math.random(3,5) local inj = math.random(3,5) local ink = math.random(3,5) local constant = torch.uniform()*math.random(1,10) - + local input1 = torch.rand(ink, inj, ini) local input2 = input1:clone() - + local module1 = nn.AddConstant(constant,true) local module2 = nn.AddConstant(constant) - + local gradOutput1 = torch.rand(ink, inj, ini) local gradOutput2 = gradOutput1:clone() - + local out1 = module1:forward(input1) local out2 = module2:forward(input2) - - mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) .. + + mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) .. ' - in-place forward err ') local gradInput1 = module1:backward(input1, gradOutput1) local gradInput2 = module2:backward(input2, gradOutput2) - - mytester:asserteq(0, (gradInput1-gradInput2):abs():max(), + + mytester:asserteq(0, (gradInput1-gradInput2):abs():max(), torch.typename(module1) .. ' - in-place backward err ') - + local input1 = torch.rand(ink, inj, ini) local input2 = input1:clone() - + module1:forward(input1) module1:backward(module1.output,torch.rand(input1:size())) - + local err = (input1-input2):abs():max() - mytester:asserteq(err, 0, torch.typename(module1) .. + mytester:asserteq(err, 0, torch.typename(module1) .. ' - inplace input change err ') end @@ -2797,42 +2834,42 @@ function nntest.MulConstant() -- Test BPROP local err = jac.testJacobian(mod, input) mytester:assertlt(err, precision, 'bprop error ') - + -- inplace comparisons local ini = math.random(3,5) local inj = math.random(3,5) local ink = math.random(3,5) local constant = torch.uniform()*math.random(1,10) - + local input1 = torch.rand(ink, inj, ini) local input2 = input1:clone() - + local module1 = nn.MulConstant(constant,true) local module2 = nn.MulConstant(constant) - + local gradOutput1 = torch.rand(ink, inj, ini) local gradOutput2 = gradOutput1:clone() - + local out1 = module1:forward(input1) local out2 = module2:forward(input2) - - mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) .. + + mytester:asserteq(0, (out1-out2):abs():max(), torch.typename(module1) .. ' - in-place forward err ') local gradInput1 = module1:backward(input1, gradOutput1) local gradInput2 = module2:backward(input2, gradOutput2) - - mytester:asserteq(0, (gradInput1-gradInput2):abs():max(), + + mytester:asserteq(0, (gradInput1-gradInput2):abs():max(), torch.typename(module1) .. ' - in-place backward err ') - + local input1 = torch.rand(ink, inj, ini) local input2 = input1:clone() - + module1:forward(input1) module1:backward(module1.output,torch.rand(input1:size())) - + local err = (input1-input2):abs():max() - mytester:assertalmosteq(err, 0, 1e-15, torch.typename(module1) .. + mytester:assertalmosteq(err, 0, 1e-15, torch.typename(module1) .. ' - inplace input change err ') end @@ -3563,10 +3600,10 @@ function nntest.Replicate() mytester:assertTensorEq(vOutput1, expected1, precision, 'Wrong tiling of data when replicating vector.') mytester:assertTensorEq(vOutput2, expected2, precision, 'Wrong tiling of data when replicating vector.') - + -- batch mode local vector = torch.rand(4,3) - + local r1 = nn.Replicate(2, 1, 1) local r2 = nn.Replicate(2, 2, 1) |