#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/SpatialAveragePooling.c" #else static int nn_(SpatialAveragePooling_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); real *output_data; real *input_data; int dimw = 2; int dimh = 1; int dimc = 0; long nbatch = 1; long inputWidth; long inputHeight; long outputWidth; long outputHeight; long nInputPlane; // number of channels (or colors) long k; luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; dimc++; } inputWidth = input->size[dimw]; inputHeight = input->size[dimh]; nInputPlane = input->size[dimc]; outputWidth = (inputWidth - kW) / dW + 1; outputHeight = (inputHeight - kH) / dH + 1; luaL_argcheck(L, inputWidth >= kW && inputHeight >= kH, 2, "input image smaller than kernel size"); if (input->nDimension == 3) THTensor_(resize3d)(output, nInputPlane, outputHeight, outputWidth); else THTensor_(resize4d)(output, input->size[0], nInputPlane, outputHeight, outputWidth); input = THTensor_(newContiguous)(input); luaL_argcheck(L, THTensor_(isContiguous)(output), 1, ""); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); #pragma omp parallel for private(k) for(k = 0; k < nInputPlane; k++) { long p; for(p = 0; p < nbatch; p++) { long xx, yy; /* For all output pixels... */ real *ptr_output = output_data + p*nInputPlane*outputWidth*outputHeight + k*outputWidth*outputHeight; long i; for(i = 0; i < outputWidth*outputHeight; i++) ptr_output[i] = 0; for(yy = 0; yy < outputHeight; yy++) { for(xx = 0; xx < outputWidth; xx++) { /* Compute the mean of the input image... */ real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW; real sum = 0; long kx, ky; for(ky = 0; ky < kH; ky++) { for(kx = 0; kx < kW; kx++) sum += ptr_input[kx]; ptr_input += inputWidth; /* next input line */ } /* Update output */ *ptr_output++ += sum/(kW*kH); } } } } THTensor_(free)(input); return 1; } static int nn_(SpatialAveragePooling_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); int dimw = 2; int dimh = 1; int dimc = 0; long nbatch = 1; long inputWidth; long inputHeight; long outputWidth; long outputHeight; long nInputPlane; // number of channels (or colors) real *gradOutput_data; real *input_data, *gradInput_data; long k; if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; dimc++; } inputWidth = input->size[dimw]; inputHeight = input->size[dimh]; nInputPlane = input->size[dimc]; outputWidth = (inputWidth - kW) / dW + 1; outputHeight = (inputHeight - kH) / dH + 1; input_data = THTensor_(data)(input); THTensor_(resizeAs)(gradInput, input); input = THTensor_(newContiguous)(input); gradOutput = THTensor_(newContiguous)(gradOutput); luaL_argcheck(L, THTensor_(isContiguous)(gradInput), 1, ""); gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); #pragma omp parallel for private(k) for(k = 0; k < nInputPlane; k++) { long p; for(p = 0; p < nbatch; p++) { real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight; long xx, yy; real* ptr_gi = gradInput_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight; long i; for(i=0; i