#ifndef TH_GENERIC_FILE #define TH_GENERIC_FILE "generic/VolumetricAveragePooling.c" #else static void nn_(VolumetricAveragePooling_updateOutput_frame)( real *input_p, real *output_p, long nslices, long itime, long iwidth, long iheight, long otime, long owidth, long oheight, int kT, int kW, int kH, int dT, int dW, int dH) { long k; #pragma omp parallel for private(k) for (k = 0; k < nslices; k++) { /* loop over output */ long i, j, ti; for(ti = 0; ti < otime; ti++) { for(i = 0; i < oheight; i++) { for(j = 0; j < owidth; j++) { /* local pointers */ real *ip = input_p + k * itime * iwidth * iheight + ti * iwidth * iheight * dT + i * iwidth * dH + j * dW; real *op = output_p + k * otime * owidth * oheight + ti * owidth * oheight + i * owidth + j; /* compute local sum: */ real sum = 0.0; int x,y,z; for(z=0; z < kT; z++) { for(y = 0; y < kH; y++) { for(x = 0; x < kW; x++) { sum += *(ip + z * iwidth * iheight + y * iwidth + x); } } } /* set output to local max */ *op = sum / (kT * kW * kH); } } } } } static int nn_(VolumetricAveragePooling_updateOutput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); int kT = luaT_getfieldcheckint(L, 1, "kT"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); int dT = luaT_getfieldcheckint(L, 1, "dT"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); long nslices; long itime; long iheight; long iwidth; long otime; long oheight; long owidth; real *input_data; real *output_data; luaL_argcheck(L, input->nDimension == 4 || input->nDimension == 5, 2, "4D or 5D (batch-mode) tensor expected"); int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH && input->size[dimt] >= kT, 2, "input image smaller than kernel size"); /* sizes */ nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; otime = (itime - kT) / dT + 1; oheight = (iheight - kH) / dH + 1; owidth = (iwidth - kW) / dW + 1; /* get contiguous input */ input = THTensor_(newContiguous)(input); if (input->nDimension == 4) { /* non-batch mode */ /* resize output */ THTensor_(resize4d)(output, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); nn_(VolumetricAveragePooling_updateOutput_frame)(input_data, output_data, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } else { /* batch mode */ long p; long nBatch = input->size[0]; long istride = nslices * itime * iwidth * iheight; long ostride = nslices * otime * owidth * oheight; /* resize output */ THTensor_(resize5d)(output, nBatch, nslices, otime, oheight, owidth); input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); #pragma omp parallel for private(p) for (p=0; p < nBatch; p++) { nn_(VolumetricAveragePooling_updateOutput_frame)( input_data + p * istride, output_data + p * ostride, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } } /* cleanup */ THTensor_(free)(input); return 1; } static void nn_(VolumetricAveragePooling_updateGradInput_frame)( real *gradInput_p, real *gradOutput_p, long nslices, long itime, long iwidth, long iheight, long otime, long owidth, long oheight, int kT, int kW, int kH, int dT, int dW, int dH) { long k; #pragma omp parallel for private(k) for (k = 0; k < nslices; k++) { /* loop over output */ long i, j, ti; for(ti = 0; ti < otime; ti++) { for(i = 0; i < oheight; i++) { for(j = 0; j < owidth; j++) { /* local pointers */ real *ip = gradInput_p + k * itime * iwidth * iheight + ti * iwidth * iheight * dT + i * iwidth * dH + j * dW; real *op = gradOutput_p + k * otime * owidth * oheight + ti * owidth * oheight + i * owidth + j; /* scatter gradients out to footprint: */ real val = *op / (kT * kW * kH); int x,y,z; for(z=0; z < kT; z++) { for(y = 0; y < kH; y++) { for(x = 0; x < kW; x++) { *(ip + z * iwidth * iheight + y * iwidth + x) += val; } } } } } } } } static int nn_(VolumetricAveragePooling_updateGradInput)(lua_State *L) { THTensor *input = luaT_checkudata(L, 2, torch_Tensor); THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); int dT = luaT_getfieldcheckint(L, 1, "dT"); int dW = luaT_getfieldcheckint(L, 1, "dW"); int dH = luaT_getfieldcheckint(L, 1, "dH"); int kT = luaT_getfieldcheckint(L, 1, "kT"); int kW = luaT_getfieldcheckint(L, 1, "kW"); int kH = luaT_getfieldcheckint(L, 1, "kH"); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); int nslices; int itime; int iheight; int iwidth; int otime; int oheight; int owidth; real *gradInput_data; real *gradOutput_data; int dimN = 0; int dimt = 1; int dimh = 2; int dimw = 3; /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); if (input->nDimension == 5) { dimN++; dimt++; dimh++; dimw++; } /* sizes */ nslices = input->size[dimN]; itime = input->size[dimt]; iheight = input->size[dimh]; iwidth = input->size[dimw]; otime = gradOutput->size[dimt]; oheight = gradOutput->size[dimh]; owidth = gradOutput->size[dimw]; /* get raw pointers */ gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); /* backprop */ if (input->nDimension == 4) { /* non-batch mode*/ nn_(VolumetricAveragePooling_updateGradInput_frame)( gradInput_data, gradOutput_data, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } else { /* batch mode */ long p; long nBatch = input->size[0]; long istride = nslices * itime * iwidth * iheight; long ostride = nslices * otime * owidth * oheight; #pragma omp parallel for private(p) for (p = 0; p < nBatch; p++) { nn_(VolumetricAveragePooling_updateGradInput_frame)( gradInput_data + p * istride, gradOutput_data + p * ostride, nslices, itime, iwidth, iheight, otime, owidth, oheight, kT, kW, kH, dT, dW, dH); } } /* cleanup */ THTensor_(free)(gradOutput); return 1; } static const struct luaL_Reg nn_(VolumetricAveragePooling__) [] = { {"VolumetricAveragePooling_updateOutput", nn_(VolumetricAveragePooling_updateOutput)}, {"VolumetricAveragePooling_updateGradInput", nn_(VolumetricAveragePooling_updateGradInput)}, {NULL, NULL} }; static void nn_(VolumetricAveragePooling_init)(lua_State *L) { luaT_pushmetatable(L, torch_Tensor); luaT_registeratname(L, nn_(VolumetricAveragePooling__), "nn"); lua_pop(L,1); } #endif