#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/SpatialMaxPooling.c" #else static void nn_(SpatialMaxPooling_updateOutput_frame)(real *input_p, real *output_p, real *ind_p, long nslices, long iwidth, long iheight, long owidth, long oheight, int kW, int kH, int dW, int dH, int padW, int padH) { long k; #pragma omp parallel for private(k) for (k = 0; k < nslices; k++) { /* loop over output */ long i, j; real *ip = input_p + k*iwidth*iheight; for(i = 0; i < oheight; i++) { for(j = 0; j < owidth; j++) { long hstart = i * dH - padH; long wstart = j * dW - padW; long hend = fminf(hstart + kH, iheight); long wend = fminf(wstart + kW, iwidth); hstart = fmaxf(hstart, 0); wstart = fmaxf(wstart, 0); /* local pointers */ real *op = output_p + k*owidth*oheight + i*owidth + j; real *indp = ind_p + k*owidth*oheight + i*owidth + j; /* compute local max: */ long maxindex = -1; real maxval = -THInf; long tcntr = 0; long x,y; for(y = hstart; y < hend; y++) { for(x = wstart; x < wend; x++) { tcntr = y*iwidth + x; real val = *(ip + tcntr); if (val > maxval) { maxval = val; maxindex = tcntr; } } } /* set output to local max */ *op = maxval; /* store location of max */ *indp = maxindex + 1; } } } } static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int padW = luaT_getfieldcheckint(L, 1, "padW"); int padH = luaT_getfieldcheckint(L, 1, "padH"); int ceil_mode = luaT_getfieldcheckboolean(L,1,"ceil_mode"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); int dimw = 2; int dimh = 1; long nbatch = 1; long nslices; long iheight; long iwidth; long oheight; long owidth; real *input_data; real *output_data; real *indices_data; luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected"); if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } luaL_argcheck(L, input->size[dimw] >= kW - padW && input->size[dimh] >= kH - padH, 2, "input image smaller than kernel size"); luaL_argcheck(L, kW/2 >= padW && kH/2 >= padH, 2, "pad should be smaller than half of kernel size"); /* sizes */ nslices = input->size[dimh-1]; iheight = input->size[dimh]; iwidth = input->size[dimw]; if (ceil_mode) { oheight = (long)(ceil((float)(iheight - kH + 2*padH) / dH)) + 1; owidth = (long)(ceil((float)(iwidth - kW + 2*padW) / dW)) + 1; } else { oheight = (long)(floor((float)(iheight - kH + 2*padH) / dH)) + 1; owidth = (long)(floor((float)(iwidth - kW + 2*padW) / dW)) + 1; } if (padW || padH) { // ensure that the last pooling starts inside the image if ((oheight - 1)*dH >= iheight + padH) --oheight; if ((owidth - 1)*dW >= iwidth + padW) --owidth; } /* get contiguous input */ input = THTensor_(newContiguous)(input); /* resize output */ if (input->nDimension == 3) { THTensor_(resize3d)(output, nslices, oheight, owidth); /* indices will contain the locations for each output point */ THTensor_(resize3d)(indices, nslices, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); nn_(SpatialMaxPooling_updateOutput_frame)(input_data, output_data, indices_data, nslices, iwidth, iheight, owidth, oheight, kW, kH, dW, dH, padW, padH); } else { long p; THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); /* indices will contain the locations for each output point */ THTensor_(resize4d)(indices, nbatch, nslices, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); indices_data = THTensor_(data)(indices); #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { nn_(SpatialMaxPooling_updateOutput_frame)(input_data+p*nslices*iwidth*iheight, output_data+p*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight, nslices, iwidth, iheight, owidth, oheight, kW, kH, dW, dH, padW, padH); } } /* cleanup */ THTensor_(free)(input); return 1; } static void nn_(SpatialMaxPooling_updateGradInput_frame)(real *gradInput_p, real *gradOutput_p, real *ind_p, long nslices, long iwidth, long iheight, long owidth, long oheight, int dW, int dH) { long k; #pragma omp parallel for private(k) for (k = 0; k < nslices; k++) { real *gradInput_p_k = gradInput_p + k*iwidth*iheight; real *gradOutput_p_k = gradOutput_p + k*owidth*oheight; real *ind_p_k = ind_p + k*owidth*oheight; /* calculate max points */ long i, j; for(i = 0; i < oheight; i++) { for(j = 0; j < owidth; j++) { /* retrieve position of max */ long maxp = ind_p_k[i*owidth + j] - 1; /* update gradient */ gradInput_p_k[maxp] += gradOutput_p_k[i*owidth + j]; } } } } static int nn_(SpatialMaxPooling_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); int dimw = 2; int dimh = 1; long nbatch = 1; int nslices; int iheight; int iwidth; int oheight; int owidth; real *gradInput_data; real *gradOutput_data; real *indices_data; /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } /* sizes */ nslices = input->size[dimh-1]; iheight = input->size[dimh]; iwidth = input->size[dimw]; oheight = gradOutput->size[dimh]; owidth = gradOutput->size[dimw]; /* get raw pointers */ gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); indices_data = THTensor_(data)(indices); /* backprop */ if (input->nDimension == 3) { nn_(SpatialMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data, indices_data, nslices, iwidth, iheight, owidth, oheight, dW, dH); } else { long p; #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { nn_(SpatialMaxPooling_updateGradInput_frame)(gradInput_data+p*nslices*iwidth*iheight, gradOutput_data+p*nslices*owidth*oheight, indices_data+p*nslices*owidth*oheight, nslices, iwidth, iheight, owidth, oheight, dW, dH); } } /* cleanup */ THTensor_(free)(gradOutput); return 1; } static const struct luaL_Reg nn_(SpatialMaxPooling__) [] = { {"SpatialMaxPooling_updateOutput", nn_(SpatialMaxPooling_updateOutput)}, {"SpatialMaxPooling_updateGradInput", nn_(SpatialMaxPooling_updateGradInput)}, {NULL, NULL} }; static void nn_(SpatialMaxPooling_init)(lua_State *L) { luaT_pushmetatable(L, torch_Tensor); luaT_registeratname(L, nn_(SpatialMaxPooling__), "nn"); lua_pop(L,1); } #endif