From e96b6054090a06a2e63a1d564b8b150a55a1e465 Mon Sep 17 00:00:00 2001 From: Ronan Collobert Date: Tue, 8 Oct 2013 12:20:36 +0200 Subject: pkg/nn: C89 --- generic/SoftMax.c | 4 +- generic/SpatialConvolution.c | 117 ++++++++++++++----------- generic/SpatialConvolutionMM.c | 38 +++++---- generic/SpatialConvolutionMap.c | 166 +++++++++++++++++++++++------------- generic/SpatialFullConvolution.c | 117 ++++++++++++++----------- generic/SpatialFullConvolutionMap.c | 165 ++++++++++++++++++++++------------- generic/SpatialMaxPooling.c | 109 +++++++++++++---------- generic/SpatialSubSampling.c | 84 ++++++++++++------ generic/TemporalMaxPooling.c | 71 +++++++++------ generic/VolumetricConvolution.c | 62 ++++++++------ 10 files changed, 566 insertions(+), 367 deletions(-) diff --git a/generic/SoftMax.c b/generic/SoftMax.c index fd73b3e..bddb70d 100644 --- a/generic/SoftMax.c +++ b/generic/SoftMax.c @@ -31,11 +31,13 @@ static int nn_(SoftMax_updateOutput)(lua_State *L) for(t = 0; t < nframe; t++) { real inputMax = -THInf; + accreal sum; + for(d = 0; d < dim; d++) { if (input_data[d] >= inputMax) inputMax = input_data[d]; } - accreal sum = 0; + sum = 0; for(d = 0; d < dim; d++) { real z = THExpMinusApprox(inputMax - input_data[d]); output_data[d] = z; diff --git a/generic/SpatialConvolution.c b/generic/SpatialConvolution.c index bfe5698..bcadf38 100644 --- a/generic/SpatialConvolution.c +++ b/generic/SpatialConvolution.c @@ -12,70 +12,79 @@ static int nn_(SpatialConvolution_updateOutput)(lua_State *L) THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); - luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); - int dimw = 2; int dimh = 1; + + luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); + if (input->nDimension == 4) { dimw++; dimh++; } - long nOutputPlane = weight->size[0]; - long kW = weight->size[3]; - long kH = weight->size[2]; - long inputWidth = input->size[dimw]; - long inputHeight = input->size[dimh]; - long outputWidth = (inputWidth - kW) / dW + 1; - long outputHeight = (inputHeight - kH) / dH + 1; - - if (input->nDimension == 3) { - THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); - /* add bias */ - long i; - /*THTensor *outn = THTensor_(new)();*/ - real* bias_data = THTensor_(data)(bias); - real* output_data = THTensor_(data)(output); -#pragma omp parallel for private(i) - for (i=0; isize[0]; i++) + long nOutputPlane = weight->size[0]; + long kW = weight->size[3]; + long kH = weight->size[2]; + long inputWidth = input->size[dimw]; + long inputHeight = input->size[dimh]; + long outputWidth = (inputWidth - kW) / dW + 1; + long outputHeight = (inputHeight - kH) / dH + 1; + + if (input->nDimension == 3) { - /*THTensor_(select)(outn,output,0,i);*/ - /*TH_TENSOR_APPLY(real,outn, *outn_data = bias_data[i];);*/ - real *ptr_output = output_data + i*outputWidth*outputHeight; - long j; - for(j = 0; j < outputWidth*outputHeight; j++) - ptr_output[j] = bias_data[i]; - } - /*THTensor_(free)(outn);*/ - - /* do convolutions */ - THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "V","X"); - } - else - { - THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth); + long i; + real* bias_data; + real* output_data; - real* bias_data = THTensor_(data)(bias); - real* output_data = THTensor_(data)(output); + THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); + /* add bias */ + bias_data = THTensor_(data)(bias); + output_data = THTensor_(data)(output); - long p; -#pragma omp parallel for private(p) - for (p=0; psize[0]; p++) - { - /* BIAS */ - long i; +#pragma omp parallel for private(i) for (i=0; isize[0]; i++) { - real *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight; + /*THTensor_(select)(outn,output,0,i);*/ + /*TH_TENSOR_APPLY(real,outn, *outn_data = bias_data[i];);*/ + real *ptr_output = output_data + i*outputWidth*outputHeight; long j; for(j = 0; j < outputWidth*outputHeight; j++) ptr_output[j] = bias_data[i]; } + /*THTensor_(free)(outn);*/ + + /* do convolutions */ + THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "V","X"); } + else + { + real* bias_data; + real* output_data; + long p; - /* do convolutions */ - THTensor_(conv2Dmm)(output, 1.0, 1.0, input, weight, dH, dW, "V","X"); + THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth); + + bias_data = THTensor_(data)(bias); + output_data = THTensor_(data)(output); + +#pragma omp parallel for private(p) + for (p=0; psize[0]; p++) + { + /* BIAS */ + long i; + for (i=0; isize[0]; i++) + { + real *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight; + long j; + for(j = 0; j < outputWidth*outputHeight; j++) + ptr_output[j] = bias_data[i]; + } + } + + /* do convolutions */ + THTensor_(conv2Dmm)(output, 1.0, 1.0, input, weight, dH, dW, "V","X"); + } } return 1; } @@ -92,10 +101,12 @@ static int nn_(SpatialConvolution_updateGradInput)(lua_State *L) THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); + THTensor *tweight; + THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); /* gradient to input */ - THTensor *tweight = THTensor_(newTranspose)(weight,0,1); + tweight = THTensor_(newTranspose)(weight,0,1); if (input->nDimension == 3) { @@ -122,11 +133,15 @@ static int nn_(SpatialConvolution_accGradParameters)(lua_State *L) THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); - THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); - int dimw = 2; int dimh = 1; + real *gradBias_data; + real *gradOutput_data; + long noutSlice; + + THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); + if (input->nDimension == 4) { dimw++; @@ -134,9 +149,9 @@ static int nn_(SpatialConvolution_accGradParameters)(lua_State *L) } /* gradient to bias */ - real *gradBias_data = THTensor_(data)(gradBias); - real *gradOutput_data = THTensor_(data)(gradOutput); - long noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw]; + gradBias_data = THTensor_(data)(gradBias); + gradOutput_data = THTensor_(data)(gradOutput); + noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw]; /*THTensor* gradOutSlice = THTensor_(new)();*/ if (input->nDimension == 3) diff --git a/generic/SpatialConvolutionMM.c b/generic/SpatialConvolutionMM.c index a8014f7..d4fd953 100644 --- a/generic/SpatialConvolutionMM.c +++ b/generic/SpatialConvolutionMM.c @@ -61,12 +61,13 @@ static void nn_(SpatialConvolutionMM_updateOutput_frame)(THTensor *input, THTens long nOutputPlane, long outputWidth, long outputHeight) { long i; + THTensor *output2d; nn_(unfolded_copy)(finput, input, kW, kH, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight); - THTensor *output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset, - nOutputPlane, -1, - outputHeight*outputWidth, -1); + output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset, + nOutputPlane, -1, + outputHeight*outputWidth, -1); for(i = 0; i < nOutputPlane; i++) THVector_(fill)(output->storage->data+output->storageOffset+output->stride[0]*i, THTensor_(get1d)(bias, i), outputHeight*outputWidth); @@ -87,23 +88,31 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L) THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); - luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); - int dimf = 0; int dimw = 2; int dimh = 1; + + long nInputPlane; + long inputWidth; + long inputHeight; + long nOutputPlane; + long outputWidth; + long outputHeight; + + luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); + if (input->nDimension == 4) { dimf++; dimw++; dimh++; } - long nInputPlane = input->size[dimf]; - long inputWidth = input->size[dimw]; - long inputHeight = input->size[dimh]; - long nOutputPlane = weight->size[0]; - long outputWidth = (inputWidth - kW) + 1; - long outputHeight = (inputHeight - kH) + 1; + nInputPlane = input->size[dimf]; + inputWidth = input->size[dimw]; + inputHeight = input->size[dimh]; + nOutputPlane = weight->size[0]; + outputWidth = (inputWidth - kW) + 1; + outputHeight = (inputHeight - kH) + 1; if(input->nDimension == 3) { @@ -126,7 +135,7 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L) THStorage_(clearFlag)(input->storage, TH_STORAGE_REFCOUNTED); THStorage_(clearFlag)(output->storage, TH_STORAGE_REFCOUNTED); THStorage_(clearFlag)(finput->storage, TH_STORAGE_REFCOUNTED); -// mkl_set_num_threads(1); + #pragma omp parallel for private(t) for(t = 0; t < T; t++) { @@ -147,7 +156,6 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L) THStorage_(setFlag)(output->storage, TH_STORAGE_REFCOUNTED); THStorage_(setFlag)(finput->storage, TH_STORAGE_REFCOUNTED); } -// mkl_set_num_threads(4); return 1; } @@ -227,15 +235,15 @@ static void nn_(SpatialConvolutionMM_accGradParameters_frame)(THTensor *gradOutp real scale) { long i; - + THTensor *gradOutputPlane = THTensor_(new)(); THTensor *gradOutput2d = THTensor_(newWithStorage2d)(gradOutput->storage, gradOutput->storageOffset, gradOutput->size[0], -1, gradOutput->size[1]*gradOutput->size[2], -1); + THTensor_(transpose)(finput, finput, 0, 1); THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, finput); THTensor_(transpose)(finput, finput, 0, 1); - THTensor *gradOutputPlane = THTensor_(new)(); for(i = 0; i < gradBias->size[0]; i++) { long k; diff --git a/generic/SpatialConvolutionMap.c b/generic/SpatialConvolutionMap.c index a1d20bc..eed0ca9 100644 --- a/generic/SpatialConvolutionMap.c +++ b/generic/SpatialConvolutionMap.c @@ -17,6 +17,24 @@ static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L) THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); + real *input_data; + real *output_data; + real *weight_data; + real *bias_data; + real *connTable_data; + + long input_h; + long input_w; + long output_h; + long output_w; + long weight_h; + long weight_w; + + long p, k; + int nweight; + + + luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected"); luaL_argcheck(L, input->size[0] >= nInputPlane, 2, "invalid number of input planes"); luaL_argcheck(L, input->size[2] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size"); @@ -25,39 +43,37 @@ static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L) (input->size[1] - kH) / dH + 1, (input->size[2] - kW) / dW + 1); - // contiguous + /* contiguous */ input = THTensor_(newContiguous)(input); output = THTensor_(newContiguous)(output); - // get raw pointers - real *input_data = THTensor_(data)(input); - real *output_data = THTensor_(data)(output); - real *weight_data = THTensor_(data)(weight); - real *bias_data = THTensor_(data)(bias); - real *connTable_data = THTensor_(data)(connTable); - - // and dims - long input_h = input->size[1]; - long input_w = input->size[2]; - long output_h = output->size[1]; - long output_w = output->size[2]; - long weight_h = weight->size[1]; - long weight_w = weight->size[2]; + /* get raw pointers */ + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + weight_data = THTensor_(data)(weight); + bias_data = THTensor_(data)(bias); + connTable_data = THTensor_(data)(connTable); + + /* and dims */ + input_h = input->size[1]; + input_w = input->size[2]; + output_h = output->size[1]; + output_w = output->size[2]; + weight_h = weight->size[1]; + weight_w = weight->size[2]; - long p; #pragma omp parallel for private(p) for (p = 0; p < nOutputPlane; p++) { - // add bias + /* add bias */ real *ptr_output = output_data + p*output_w*output_h; long j; for(j = 0; j < output_h*output_w; j++) ptr_output[j] = bias_data[p]; - // convolve all maps - int nweight = connTable->size[0]; - long k; + /* convolve all maps */ + nweight = connTable->size[0]; for (k = 0; k < nweight; k++) { - // get offsets for input/output + /* get offsets for input/output */ int o = (int)connTable_data[k*2+1]-1; int i = (int)connTable_data[k*2+0]-1; @@ -72,7 +88,7 @@ static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L) } } - // clean up + /* clean up */ THTensor_(free)(input); THTensor_(free)(output); @@ -91,34 +107,47 @@ static int nn_(SpatialConvolutionMap_updateGradInput)(lua_State *L) THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); - // contiguous + real *gradInput_data; + real *gradOutput_data; + real *weight_data; + real *connTable_data; + + long input_h; + long input_w; + long output_h; + long output_w; + long weight_h; + long weight_w; + + long p; + + /* contiguous */ gradInput = THTensor_(newContiguous)(gradInput); gradOutput = THTensor_(newContiguous)(gradOutput); - // Resize/Zero + /* Resize/Zero */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); - // get raw pointers - real *gradInput_data = THTensor_(data)(gradInput); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *weight_data = THTensor_(data)(weight); - real *connTable_data = THTensor_(data)(connTable); + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + weight_data = THTensor_(data)(weight); + connTable_data = THTensor_(data)(connTable); - // and dims - long input_h = input->size[1]; - long input_w = input->size[2]; - long output_h = gradOutput->size[1]; - long output_w = gradOutput->size[2]; - long weight_h = weight->size[1]; - long weight_w = weight->size[2]; + /* and dims */ + input_h = input->size[1]; + input_w = input->size[2]; + output_h = gradOutput->size[1]; + output_w = gradOutput->size[2]; + weight_h = weight->size[1]; + weight_w = weight->size[2]; - long p; #pragma omp parallel for private(p) for(p = 0; p < nInputPlane; p++) { long k; - // backward all + /* backward all */ int nkernel = connTable->size[0]; for(k = 0; k < nkernel; k++) { @@ -126,7 +155,7 @@ static int nn_(SpatialConvolutionMap_updateGradInput)(lua_State *L) int i = (int)connTable_data[k*2+0]-1; if (i == p) { - // gradient to input + /* gradient to input */ THTensor_(fullConv2Dptr)(gradInput_data + i*input_w*input_h, 1.0, gradOutput_data + o*output_w*output_h, output_h, output_w, @@ -136,7 +165,7 @@ static int nn_(SpatialConvolutionMap_updateGradInput)(lua_State *L) } } - // clean up + /* clean up */ THTensor_(free)(gradInput); THTensor_(free)(gradOutput); @@ -157,26 +186,41 @@ static int nn_(SpatialConvolutionMap_accGradParameters)(lua_State *L) THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); - // contiguous + real *input_data; + real *gradOutput_data; + real *gradWeight_data; + real *gradBias_data; + + /* and dims */ + long input_h; + long input_w; + long output_h; + long output_w; + long weight_h; + long weight_w; + + long k; + int nkernel; + + /* contiguous */ input = THTensor_(newContiguous)(input); gradOutput = THTensor_(newContiguous)(gradOutput); - // get raw pointers - real *input_data = THTensor_(data)(input); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *gradWeight_data = THTensor_(data)(gradWeight); - real *gradBias_data = THTensor_(data)(gradBias); - - // and dims - long input_h = input->size[1]; - long input_w = input->size[2]; - long output_h = gradOutput->size[1]; - long output_w = gradOutput->size[2]; - long weight_h = weight->size[1]; - long weight_w = weight->size[2]; - - // gradients wrt bias - long k; + /* get raw pointers */ + input_data = THTensor_(data)(input); + gradOutput_data = THTensor_(data)(gradOutput); + gradWeight_data = THTensor_(data)(gradWeight); + gradBias_data = THTensor_(data)(gradBias); + + /* and dims */ + input_h = input->size[1]; + input_w = input->size[2]; + output_h = gradOutput->size[1]; + output_w = gradOutput->size[2]; + weight_h = weight->size[1]; + weight_w = weight->size[2]; + + /* gradients wrt bias */ #pragma omp parallel for private(k) for(k = 0; k < nOutputPlane; k++) { real *ptr_gradOutput = gradOutput_data + k*output_w*output_h; @@ -185,15 +229,15 @@ static int nn_(SpatialConvolutionMap_accGradParameters)(lua_State *L) gradBias_data[k] += scale*ptr_gradOutput[l]; } - // gradients wrt weight - int nkernel = connTable->size[0]; + /* gradients wrt weight */ + nkernel = connTable->size[0]; #pragma omp parallel for private(k) for(k = 0; k < nkernel; k++) { int o = (int)THTensor_(get2d)(connTable,k,1)-1; int i = (int)THTensor_(get2d)(connTable,k,0)-1; - // gradient to kernel + /* gradient to kernel */ THTensor_(validXCorr2DRevptr)(gradWeight_data + k*weight_w*weight_h, scale, input_data + i*input_w*input_h, input_h, input_w, @@ -201,7 +245,7 @@ static int nn_(SpatialConvolutionMap_accGradParameters)(lua_State *L) dH, dW); } - // clean up + /* clean up */ THTensor_(free)(input); THTensor_(free)(gradOutput); return 0; diff --git a/generic/SpatialFullConvolution.c b/generic/SpatialFullConvolution.c index cb2e340..3b55297 100644 --- a/generic/SpatialFullConvolution.c +++ b/generic/SpatialFullConvolution.c @@ -12,69 +12,80 @@ static int nn_(SpatialFullConvolution_updateOutput)(lua_State *L) THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); - luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); int dimw = 2; int dimh = 1; + + luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); if (input->nDimension == 4) { dimw++; dimh++; } - long nOutputPlane = weight->size[1]; - long kW = weight->size[3]; - long kH = weight->size[2]; - long inputWidth = input->size[dimw]; - long inputHeight = input->size[dimh]; - long outputWidth = (inputWidth - 1) * dW + kW; - long outputHeight = (inputHeight - 1) * dH + kH; - - if (input->nDimension == 3) - { - THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); - /* add bias */ - long i; - real* bias_data = THTensor_(data)(bias); - real* output_data = THTensor_(data)(output); -#pragma omp parallel for private(i) - for (i=0; isize[0]; i++) - { - /*THTensor_(select)(outn,output,0,i);*/ - /*TH_TENSOR_APPLY(real,outn, *outn_data = bias_data[i];);*/ - real *ptr_output = output_data + i*outputWidth*outputHeight; - long j; - for(j = 0; j < outputWidth*outputHeight; j++) - ptr_output[j] = bias_data[i]; - } - - /* do convolutions */ - THTensor *tweight = THTensor_(newTranspose)(weight,0,1); - THTensor_(conv2Dmv)(output, 1.0, 1.0, input, tweight, dH, dW, "F", "C"); - THTensor_(free)(tweight); - } - else { - THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth); - real* bias_data = THTensor_(data)(bias); - real* output_data = THTensor_(data)(output); - - long p; -#pragma omp parallel for private(p) - for (p=0; psize[0]; p++) + long nOutputPlane = weight->size[1]; + long kW = weight->size[3]; + long kH = weight->size[2]; + long inputWidth = input->size[dimw]; + long inputHeight = input->size[dimh]; + long outputWidth = (inputWidth - 1) * dW + kW; + long outputHeight = (inputHeight - 1) * dH + kH; + + if (input->nDimension == 3) { - /* BIAS */ long i; + real* bias_data; + real* output_data; + + THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); + /* add bias */ + bias_data = THTensor_(data)(bias); + output_data = THTensor_(data)(output); +#pragma omp parallel for private(i) for (i=0; isize[0]; i++) { - real *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight; + real *ptr_output = output_data + i*outputWidth*outputHeight; long j; for(j = 0; j < outputWidth*outputHeight; j++) ptr_output[j] = bias_data[i]; } + + /* do convolutions */ + { + THTensor *tweight = THTensor_(newTranspose)(weight,0,1); + THTensor_(conv2Dmv)(output, 1.0, 1.0, input, tweight, dH, dW, "F", "C"); + THTensor_(free)(tweight); + } + } + else + { + real* bias_data; + real* output_data; + long p; + + THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth); + bias_data = THTensor_(data)(bias); + output_data = THTensor_(data)(output); + +#pragma omp parallel for private(p) + for (p=0; psize[0]; p++) + { + /* BIAS */ + long i; + for (i=0; isize[0]; i++) + { + real *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight; + long j; + for(j = 0; j < outputWidth*outputHeight; j++) + ptr_output[j] = bias_data[i]; + } + } + /* do convolutions */ + { + THTensor *tweight = THTensor_(newTranspose)(weight,0,1); + THTensor_(conv2Dmm)(output, 1.0, 1.0, input, tweight, dH, dW, "F", "C"); + THTensor_(free)(tweight); + } } - /* do convolutions */ - THTensor *tweight = THTensor_(newTranspose)(weight,0,1); - THTensor_(conv2Dmm)(output, 1.0, 1.0, input, tweight, dH, dW, "F", "C"); - THTensor_(free)(tweight); } return 1; } @@ -120,20 +131,26 @@ static int nn_(SpatialFullConvolution_accGradParameters)(lua_State *L) THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); long nOutputPlane = weight->size[1]; - THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); int dimw = 2; int dimh = 1; + real *gradBias_data; + real *gradOutput_data; + long noutSlice; + + THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); + + if (input->nDimension == 4) { dimw++; dimh++; } /* gradient to bias */ - real *gradBias_data = THTensor_(data)(gradBias); - real *gradOutput_data = THTensor_(data)(gradOutput); - long noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw]; + gradBias_data = THTensor_(data)(gradBias); + gradOutput_data = THTensor_(data)(gradOutput); + noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw]; /*THTensor* gradOutSlice = THTensor_(new)();*/ if (input->nDimension == 3) diff --git a/generic/SpatialFullConvolutionMap.c b/generic/SpatialFullConvolutionMap.c index 8a5d9df..9d5cff2 100644 --- a/generic/SpatialFullConvolutionMap.c +++ b/generic/SpatialFullConvolutionMap.c @@ -18,6 +18,21 @@ static int nn_(SpatialFullConvolutionMap_updateOutput)(lua_State *L) THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); + real *input_data; + real *output_data; + real *weight_data; + real *bias_data; + real *connTable_data; + + long input_h; + long input_w; + long output_h; + long output_w; + long weight_h; + long weight_w; + + long p; + luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected"); luaL_argcheck(L, input->size[0] >= nInputPlane, 2, "invalid number of input planes"); @@ -26,39 +41,40 @@ static int nn_(SpatialFullConvolutionMap_updateOutput)(lua_State *L) (input->size[1] - 1) * dH + kH, (input->size[2] - 1) * dW + kW); - // contiguous + /* contiguous */ input = THTensor_(newContiguous)(input); output = THTensor_(newContiguous)(output); - // get raw pointers - real *input_data = THTensor_(data)(input); - real *output_data = THTensor_(data)(output); - real *weight_data = THTensor_(data)(weight); - real *bias_data = THTensor_(data)(bias); - real *connTable_data = THTensor_(data)(connTable); - - // and dims - long input_h = input->size[1]; - long input_w = input->size[2]; - long output_h = output->size[1]; - long output_w = output->size[2]; - long weight_h = weight->size[1]; - long weight_w = weight->size[2]; + /* get raw pointers */ + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + weight_data = THTensor_(data)(weight); + bias_data = THTensor_(data)(bias); + connTable_data = THTensor_(data)(connTable); + + /* and dims */ + input_h = input->size[1]; + input_w = input->size[2]; + output_h = output->size[1]; + output_w = output->size[2]; + weight_h = weight->size[1]; + weight_w = weight->size[2]; - long p; #pragma omp parallel for private(p) for (p = 0; p < nOutputPlane; p++) { - // add bias + /* add bias */ real *ptr_output = output_data + p*output_w*output_h; long j; + int nweight; + long k; + for(j = 0; j < output_h*output_w; j++) ptr_output[j] = bias_data[p]; - // convolve all maps - int nweight = connTable->size[0]; - long k; + /* convolve all maps */ + nweight = connTable->size[0]; for (k = 0; k < nweight; k++) { - // get offsets for input/output + /* get offsets for input/output */ int o = (int)connTable_data[k*2+1]-1; int i = (int)connTable_data[k*2+0]-1; @@ -73,7 +89,7 @@ static int nn_(SpatialFullConvolutionMap_updateOutput)(lua_State *L) } } - // clean up + /* clean up */ THTensor_(free)(input); THTensor_(free)(output); @@ -92,34 +108,47 @@ static int nn_(SpatialFullConvolutionMap_updateGradInput)(lua_State *L) THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); - // contiguous + real *gradInput_data; + real *gradOutput_data; + real *weight_data; + real *connTable_data; + + long input_h; + long input_w; + long output_h; + long output_w; + long weight_h; + long weight_w; + + long p; + + /* contiguous */ gradInput = THTensor_(newContiguous)(gradInput); gradOutput = THTensor_(newContiguous)(gradOutput); - // Resize/Zero + /* Resize/Zero */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); - // get raw pointers - real *gradInput_data = THTensor_(data)(gradInput); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *weight_data = THTensor_(data)(weight); - real *connTable_data = THTensor_(data)(connTable); + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + weight_data = THTensor_(data)(weight); + connTable_data = THTensor_(data)(connTable); - // and dims - long input_h = input->size[1]; - long input_w = input->size[2]; - long output_h = gradOutput->size[1]; - long output_w = gradOutput->size[2]; - long weight_h = weight->size[1]; - long weight_w = weight->size[2]; + /* and dims */ + input_h = input->size[1]; + input_w = input->size[2]; + output_h = gradOutput->size[1]; + output_w = gradOutput->size[2]; + weight_h = weight->size[1]; + weight_w = weight->size[2]; - long p; #pragma omp parallel for private(p) for(p = 0; p < nInputPlane; p++) { long k; - // backward all + /* backward all */ int nkernel = connTable->size[0]; for(k = 0; k < nkernel; k++) { @@ -127,7 +156,7 @@ static int nn_(SpatialFullConvolutionMap_updateGradInput)(lua_State *L) int i = (int)connTable_data[k*2+0]-1; if (i == p) { - // gradient to input + /* gradient to input */ THTensor_(validXCorr2Dptr)(gradInput_data + i*input_w*input_h, 1.0, gradOutput_data + o*output_w*output_h, output_h, output_w, @@ -137,7 +166,7 @@ static int nn_(SpatialFullConvolutionMap_updateGradInput)(lua_State *L) } } - // clean up + /* clean up */ THTensor_(free)(gradInput); THTensor_(free)(gradOutput); @@ -158,26 +187,40 @@ static int nn_(SpatialFullConvolutionMap_accGradParameters)(lua_State *L) THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); - // contiguous + real *input_data; + real *gradOutput_data; + real *gradWeight_data; + real *gradBias_data; + + long input_h; + long input_w; + long output_h; + long output_w; + long weight_h; + long weight_w; + + long k; + int nkernel; + + /* contiguous */ input = THTensor_(newContiguous)(input); gradOutput = THTensor_(newContiguous)(gradOutput); - // get raw pointers - real *input_data = THTensor_(data)(input); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *gradWeight_data = THTensor_(data)(gradWeight); - real *gradBias_data = THTensor_(data)(gradBias); - - // and dims - long input_h = input->size[1]; - long input_w = input->size[2]; - long output_h = gradOutput->size[1]; - long output_w = gradOutput->size[2]; - long weight_h = weight->size[1]; - long weight_w = weight->size[2]; - - // gradients wrt bias - long k; + /* get raw pointers */ + input_data = THTensor_(data)(input); + gradOutput_data = THTensor_(data)(gradOutput); + gradWeight_data = THTensor_(data)(gradWeight); + gradBias_data = THTensor_(data)(gradBias); + + /* and dims */ + input_h = input->size[1]; + input_w = input->size[2]; + output_h = gradOutput->size[1]; + output_w = gradOutput->size[2]; + weight_h = weight->size[1]; + weight_w = weight->size[2]; + + /* gradients wrt bias */ #pragma omp parallel for private(k) for(k = 0; k < nOutputPlane; k++) { real *ptr_gradOutput = gradOutput_data + k*output_w*output_h; @@ -186,15 +229,15 @@ static int nn_(SpatialFullConvolutionMap_accGradParameters)(lua_State *L) gradBias_data[k] += scale*ptr_gradOutput[l]; } - // gradients wrt weight - int nkernel = connTable->size[0]; + /* gradients wrt weight */ + nkernel = connTable->size[0]; #pragma omp parallel for private(k) for(k = 0; k < nkernel; k++) { int o = (int)THTensor_(get2d)(connTable,k,1)-1; int i = (int)THTensor_(get2d)(connTable,k,0)-1; - // gradient to kernel + /* gradient to kernel */ THTensor_(validXCorr2DRevptr)(gradWeight_data + k*weight_w*weight_h, scale, gradOutput_data + o*output_w*output_h, output_h, output_w, @@ -202,7 +245,7 @@ static int nn_(SpatialFullConvolutionMap_accGradParameters)(lua_State *L) dH, dW); } - // clean up + /* clean up */ THTensor_(free)(input); THTensor_(free)(gradOutput); return 0; diff --git a/generic/SpatialMaxPooling.c b/generic/SpatialMaxPooling.c index 7faa0ee..8dd04c9 100644 --- a/generic/SpatialMaxPooling.c +++ b/generic/SpatialMaxPooling.c @@ -13,19 +13,19 @@ static void nn_(SpatialMaxPooling_updateOutput_frame)(real *input_p, real *outpu #pragma omp parallel for private(k) for (k = 0; k < nslices; k++) { - // loop over output + /* loop over output */ long i, j; for(i = 0; i < oheight; i++) { for(j = 0; j < owidth; j++) { - // local pointers + /* local pointers */ real *ip = input_p + k*iwidth*iheight + i*iwidth*dH + j*dW; real *op = output_p + k*owidth*oheight + i*owidth + j; real *indyp = indy_p + k*owidth*oheight + i*owidth + j; real *indxp = indx_p + k*owidth*oheight + i*owidth + j; - // compute local max: + /* compute local max: */ long maxindex = -1; real maxval = -THInf; long tcntr = 0; @@ -44,10 +44,10 @@ static void nn_(SpatialMaxPooling_updateOutput_frame)(real *input_p, real *outpu } } - // set output to local max + /* set output to local max */ *op = maxval; - // store location of max (x,y) + /* store location of max (x,y) */ *indyp = (int)(maxindex / kW)+1; *indxp = (maxindex % kW) +1; } @@ -64,11 +64,21 @@ static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L) int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); - - luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected"); int dimw = 2; int dimh = 1; long nbatch = 1; + long nslices; + long iheight; + long iwidth; + long oheight; + long owidth; + real *input_data; + real *output_data; + real *indices_data; + + + luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected"); + if (input->nDimension == 4) { nbatch = input->size[0]; @@ -77,26 +87,26 @@ static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L) } luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH, 2, "input image smaller than kernel size"); - // sizes - long nslices = input->size[dimh-1]; - long iheight = input->size[dimh]; - long iwidth = input->size[dimw]; - long oheight = (iheight - kH) / dH + 1; - long owidth = (iwidth - kW) / dW + 1; + /* sizes */ + nslices = input->size[dimh-1]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + oheight = (iheight - kH) / dH + 1; + owidth = (iwidth - kW) / dW + 1; - // get contiguous input + /* get contiguous input */ input = THTensor_(newContiguous)(input); - // resize output + /* resize output */ if (input->nDimension == 3) { THTensor_(resize3d)(output, nslices, oheight, owidth); - // indices will contain i,j locations for each output point + /* indices will contain i,j locations for each output point */ THTensor_(resize4d)(indices, 2, nslices, oheight, owidth); - real *input_data = THTensor_(data)(input); - real *output_data = THTensor_(data)(output); - real *indices_data = THTensor_(data)(indices); + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THTensor_(data)(indices); nn_(SpatialMaxPooling_updateOutput_frame)(input_data, output_data, indices_data+nslices*owidth*oheight, indices_data, @@ -107,15 +117,16 @@ static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L) } else { + long p; + THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); - // indices will contain i,j locations for each output point + /* indices will contain i,j locations for each output point */ THTensor_(resize5d)(indices, 2, nbatch, nslices, oheight, owidth); - real *input_data = THTensor_(data)(input); - real *output_data = THTensor_(data)(output); - real *indices_data = THTensor_(data)(indices); + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THTensor_(data)(indices); - long p; #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { @@ -128,7 +139,7 @@ static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L) } } - // cleanup + /* cleanup */ THTensor_(free)(input); return 1; } @@ -149,17 +160,17 @@ static void nn_(SpatialMaxPooling_updateGradInput_frame)(real *gradInput_p, real real *indx_p_k = indx_p + k*owidth*oheight; real *indy_p_k = indy_p + k*owidth*oheight; - // calculate max points + /* calculate max points */ long i, j; for(i = 0; i < oheight; i++) { for(j = 0; j < owidth; j++) { - // retrieve position of max + /* retrieve position of max */ long maxi = indy_p_k[i*owidth + j] - 1 + i*dH; long maxj = indx_p_k[i*owidth + j] - 1 + j*dW; - // update gradient + /* update gradient */ gradInput_p_k[maxi*iwidth + maxj] += gradOutput_p_k[i*owidth + j]; } } @@ -174,36 +185,44 @@ static int nn_(SpatialMaxPooling_updateGradInput)(lua_State *L) int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); + int dimw = 2; + int dimh = 1; + long nbatch = 1; + int nslices; + int iheight; + int iwidth; + int oheight; + int owidth; + real *gradInput_data; + real *gradOutput_data; + real *indices_data; - // get contiguous gradOutput + /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); - // resize + /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); - int dimw = 2; - int dimh = 1; - long nbatch = 1; if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } - // sizes - int nslices = input->size[dimh-1]; - int iheight = input->size[dimh]; - int iwidth = input->size[dimw]; - int oheight = gradOutput->size[dimh]; - int owidth = gradOutput->size[dimw]; + /* sizes */ + nslices = input->size[dimh-1]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + oheight = gradOutput->size[dimh]; + owidth = gradOutput->size[dimw]; - // get raw pointers - real *gradInput_data = THTensor_(data)(gradInput); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *indices_data = THTensor_(data)(indices); + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + indices_data = THTensor_(data)(indices); - // backprop + /* backprop */ if (input->nDimension == 3) { nn_(SpatialMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data, @@ -228,7 +247,7 @@ static int nn_(SpatialMaxPooling_updateGradInput)(lua_State *L) } } - // cleanup + /* cleanup */ THTensor_(free)(gradOutput); return 1; diff --git a/generic/SpatialSubSampling.c b/generic/SpatialSubSampling.c index ed9c059..912592c 100644 --- a/generic/SpatialSubSampling.c +++ b/generic/SpatialSubSampling.c @@ -20,21 +20,29 @@ static int nn_(SpatialSubSampling_updateOutput)(lua_State *L) real *output_data; real *input_data; - luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); - int dimw = 2; int dimh = 1; long nbatch = 1; + + long inputWidth; + long inputHeight; + long outputWidth; + long outputHeight; + + long k; + + luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); + if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } - long inputWidth = input->size[dimw]; - long inputHeight = input->size[dimh]; - long outputWidth = (inputWidth - kW) / dW + 1; - long outputHeight = (inputHeight - kH) / dH + 1; + inputWidth = input->size[dimw]; + inputHeight = input->size[dimh]; + outputWidth = (inputWidth - kW) / dW + 1; + outputHeight = (inputHeight - kH) / dH + 1; luaL_argcheck(L, input->size[dimh-1] == nInputPlane, 2, "invalid number of input planes"); luaL_argcheck(L, inputWidth >= kW && inputHeight >= kH, 2, "input image smaller than kernel size"); @@ -48,7 +56,6 @@ static int nn_(SpatialSubSampling_updateOutput)(lua_State *L) input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); - long k; #pragma omp parallel for private(k) for(k = 0; k < nInputPlane; k++) { @@ -70,7 +77,7 @@ static int nn_(SpatialSubSampling_updateOutput)(lua_State *L) { for(xx = 0; xx < outputWidth; xx++) { - // Compute the mean of the input image... + /* Compute the mean of the input image... */ real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW; real sum = 0; long kx, ky; @@ -79,9 +86,9 @@ static int nn_(SpatialSubSampling_updateOutput)(lua_State *L) { for(kx = 0; kx < kW; kx++) sum += ptr_input[kx]; - ptr_input += inputWidth; // next input line + ptr_input += inputWidth; /* next input line */ } - // Update output + /* Update output */ *ptr_output++ += the_weight*sum; } } @@ -108,20 +115,31 @@ static int nn_(SpatialSubSampling_updateGradInput)(lua_State *L) int dimw = 2; int dimh = 1; long nbatch = 1; + + long inputWidth; + long inputHeight; + long outputWidth; + long outputHeight; + + real *weight_data; + real *gradOutput_data; + real *input_data, *gradInput_data; + + long k; + if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } - long inputWidth = input->size[dimw]; - long inputHeight = input->size[dimh]; - long outputWidth = (inputWidth - kW) / dW + 1; - long outputHeight = (inputHeight - kH) / dH + 1; + inputWidth = input->size[dimw]; + inputHeight = input->size[dimh]; + outputWidth = (inputWidth - kW) / dW + 1; + outputHeight = (inputHeight - kH) / dH + 1; - real *weight_data = THTensor_(data)(weight); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *input_data, *gradInput_data; + weight_data = THTensor_(data)(weight); + gradOutput_data = THTensor_(data)(gradOutput); input_data = THTensor_(data)(input); @@ -129,7 +147,6 @@ static int nn_(SpatialSubSampling_updateGradInput)(lua_State *L) gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); - long k; #pragma omp parallel for private(k) for(k = 0; k < nInputPlane; k++) { @@ -184,26 +201,37 @@ static int nn_(SpatialSubSampling_accGradParameters)(lua_State *L) long nbatch = 1; long dimw = 2; long dimh = 1; + + long inputWidth; + long inputHeight; + long outputWidth; + long outputHeight; + + real *gradWeight_data; + real *gradBias_data; + real *gradOutput_data; + real *input_data; + + long k; + if (input->nDimension == 4) { dimw++; dimh++; nbatch = input->size[0]; } - long inputWidth = input->size[dimw]; - long inputHeight = input->size[dimh]; - long outputWidth = (inputWidth - kW) / dW + 1; - long outputHeight = (inputHeight - kH) / dH + 1; + inputWidth = input->size[dimw]; + inputHeight = input->size[dimh]; + outputWidth = (inputWidth - kW) / dW + 1; + outputHeight = (inputHeight - kH) / dH + 1; - real *gradWeight_data = THTensor_(data)(gradWeight); - real *gradBias_data = THTensor_(data)(gradBias); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *input_data; + gradWeight_data = THTensor_(data)(gradWeight); + gradBias_data = THTensor_(data)(gradBias); + gradOutput_data = THTensor_(data)(gradOutput); input = THTensor_(newContiguous)(input); input_data = THTensor_(data)(input); - long k; #pragma omp parallel for private(k) for(k = 0; k < nInputPlane; k++) { @@ -213,9 +241,9 @@ static int nn_(SpatialSubSampling_accGradParameters)(lua_State *L) real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight; real sum; long xx, yy; + long i; sum = 0; - long i; for(i = 0; i < outputWidth*outputHeight; i++) sum += ptr_gradOutput[i]; gradBias_data[k] += scale*sum; diff --git a/generic/TemporalMaxPooling.c b/generic/TemporalMaxPooling.c index 0111cb5..5e269cf 100644 --- a/generic/TemporalMaxPooling.c +++ b/generic/TemporalMaxPooling.c @@ -10,29 +10,38 @@ static int nn_(TemporalMaxPooling_updateOutput)(lua_State *L) THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); + long niframe; + long framesize; + long noframe; + + real *input_data; + real *output_data; + real *indices_data; + + long t, x, y; + luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected"); luaL_argcheck(L, input->size[0] >= kW, 2, "input sequence smaller than kernel size"); - // sizes - long niframe = input->size[0]; - long framesize = input->size[1]; - long noframe = (niframe - kW) / dW + 1; + /* sizes */ + niframe = input->size[0]; + framesize = input->size[1]; + noframe = (niframe - kW) / dW + 1; - // get contiguous input + /* get contiguous input */ input = THTensor_(newContiguous)(input); - // resize output + /* resize output */ THTensor_(resize2d)(output, noframe, framesize); - // indices will contain index locations for each output point + /* indices will contain index locations for each output point */ THTensor_(resize2d)(indices, noframe, framesize); - // get raw pointers - real *input_data = THTensor_(data)(input); - real *output_data = THTensor_(data)(output); - real *indices_data = THTensor_(data)(indices); + /* get raw pointers */ + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THTensor_(data)(indices); - long t, x, y; for(t = 0; t < noframe; t++) { real *ip = input_data + t*framesize*dW; @@ -41,7 +50,7 @@ static int nn_(TemporalMaxPooling_updateOutput)(lua_State *L) #pragma omp parallel for private(y) for(y = 0; y < framesize; y++) { - // compute local max: + /* compute local max: */ long maxindex = -1; real maxval = -THInf; for(x = 0; x < kW; x++) @@ -54,13 +63,13 @@ static int nn_(TemporalMaxPooling_updateOutput)(lua_State *L) } } - // set output to local max + /* set output to local max */ op[y] = maxval; xp[y] = (real)maxindex; } } - // cleanup + /* cleanup */ THTensor_(free)(input); return 1; @@ -74,23 +83,31 @@ static int nn_(TemporalMaxPooling_updateGradInput)(lua_State *L) THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); - // get contiguous gradOutput + int noframe; + long framesize; + + real *gradInput_data; + real *gradOutput_data; + real *indices_data; + + long t, y; + + /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); - // resize and zero + /* resize and zero */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); - // sizes - int noframe = gradOutput->size[0]; - long framesize = gradOutput->size[1]; + /* sizes */ + noframe = gradOutput->size[0]; + framesize = gradOutput->size[1]; - // get raw pointers - real *gradInput_data = THTensor_(data)(gradInput); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *indices_data = THTensor_(data)(indices); + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + indices_data = THTensor_(data)(indices); - long t, y; for(t = 0; t < noframe; t++) { real *gip = gradInput_data + t*framesize*dW; @@ -99,13 +116,13 @@ static int nn_(TemporalMaxPooling_updateGradInput)(lua_State *L) #pragma omp parallel for private(y) for(y = 0; y < framesize; y++) { - // compute local max: + /* compute local max: */ long maxindex = (long)xp[y]; gip[maxindex*framesize+y] += gop[y]; } } - // cleanup + /* cleanup */ THTensor_(free)(gradOutput); return 1; diff --git a/generic/VolumetricConvolution.c b/generic/VolumetricConvolution.c index 6e0b6d8..feeaf05 100644 --- a/generic/VolumetricConvolution.c +++ b/generic/VolumetricConvolution.c @@ -15,30 +15,33 @@ static int nn_(VolumetricConvolution_updateOutput)(lua_State *L) luaL_argcheck(L, input->nDimension == 4, 2, "4D tensor expected"); - long nOutputPlane = weight->size[0]; - long kT = weight->size[2]; - long kH = weight->size[3]; - long kW = weight->size[4]; - long inputDepth = input->size[1]; - long inputHeight = input->size[2]; - long inputWidth = input->size[3]; - long outputDepth = (inputDepth - kT) / dT + 1; - long outputWidth = (inputWidth - kW) / dW + 1; - long outputHeight = (inputHeight - kH) / dH + 1; - - THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth); - - /* add bias */ - long i; - THTensor *outn = THTensor_(new)(); - for (i=0; isize[0]; i++) { - THTensor_(select)(outn,output,0,i); - THTensor_(fill)(outn, THTensor_(get1d)(bias, i)); - } - THTensor_(free)(outn); + { + long nOutputPlane = weight->size[0]; + long kT = weight->size[2]; + long kH = weight->size[3]; + long kW = weight->size[4]; + long inputDepth = input->size[1]; + long inputHeight = input->size[2]; + long inputWidth = input->size[3]; + long outputDepth = (inputDepth - kT) / dT + 1; + long outputWidth = (inputWidth - kW) / dW + 1; + long outputHeight = (inputHeight - kH) / dH + 1; + THTensor *outn = THTensor_(new)(); + long i; + + THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth); + + /* add bias */ + for (i=0; isize[0]; i++) { + THTensor_(select)(outn,output,0,i); + THTensor_(fill)(outn, THTensor_(get1d)(bias, i)); + } - /* do convolutions */ - THTensor_(conv3Dmv)(output, 1.0, 1.0, input, weight, dT, dH, dW, "V", "X"); + THTensor_(free)(outn); + + /* do convolutions */ + THTensor_(conv3Dmv)(output, 1.0, 1.0, input, weight, dT, dH, dW, "V", "X"); + } return 1; } @@ -54,11 +57,12 @@ static int nn_(VolumetricConvolution_updateGradInput)(lua_State *L) THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); + THTensor *tweight; THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" ); /* gradient to input */ - THTensor *tweight = THTensor_(newTranspose)(weight,0,1); + tweight = THTensor_(newTranspose)(weight,0,1); THTensor_(conv3Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dT, dH, dW, "F", "C"); THTensor_(free)(tweight); @@ -77,14 +81,16 @@ static int nn_(VolumetricConvolution_accGradParameters)(lua_State *L) THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); - - THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" ); long k; + real *gradBias_data; + THTensor* gradOutSlice; + + THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" ); /* gradient to bias */ - real *gradBias_data = THTensor_(data)(gradBias); - THTensor* gradOutSlice = THTensor_(new)(); + gradBias_data = THTensor_(data)(gradBias); + gradOutSlice = THTensor_(new)(); for(k = 0; k < nOutputPlane; k++) { THTensor_(select)(gradOutSlice, gradOutput, 0, k); -- cgit v1.2.3