From 4b3a22b5fc63bce5d7db6d53a587777d7c27ea97 Mon Sep 17 00:00:00 2001 From: Volodymyr Mnih Date: Fri, 21 Jun 2013 11:36:31 +0100 Subject: Added minibatch support to MarginRankingCriterion and PairwiseDistance. --- MarginRankingCriterion.lua | 66 +++++++++++++++++++++++++++++++++++++++------- PairwiseDistance.lua | 52 ++++++++++++++++++++++++++++-------- 2 files changed, 97 insertions(+), 21 deletions(-) diff --git a/MarginRankingCriterion.lua b/MarginRankingCriterion.lua index ec85fb9..5012c2a 100644 --- a/MarginRankingCriterion.lua +++ b/MarginRankingCriterion.lua @@ -8,18 +8,64 @@ function MarginRankingCriterion:__init(margin) end function MarginRankingCriterion:updateOutput(input,y) - self.output=math.max(0, -y*(input[1][1]-input[2][1]) + self.margin ) + if type(input[1]) == "number" then + self.output=math.max(0, -y*(input[1]-input[2]) + self.margin ) + else + if type(self.output) == "number" then + self.output = input[1]:clone() + end + self.output = self.output or input[1]:clone() + self.output:resizeAs(input[1]) + self.output:copy(input[1]) + + self.output:add(-1, input[2]) + self.output:mul(-y) + self.output:add(self.margin) + + self.mask = self.mask or self.output:clone() + self.mask:resizeAs(self.output) + self.mask:copy(self.output) + + self.mask:ge(self.output, 0.0) + self.output:cmul(self.mask) + end + return self.output end function MarginRankingCriterion:updateGradInput(input, y) - local dist = -y*(input[1][1]-input[2][1]) + self.margin - if dist < 0 then - self.gradInput[1][1]=0; - self.gradInput[2][1]=0; - else - self.gradInput[1][1]=-y - self.gradInput[2][1]=y - end - return self.gradInput + if type(input[1]) == "number" then + local dist = -y*(input[1][1]-input[2][1]) + self.margin + if dist < 0 then + self.gradInput[1][1]=0; + self.gradInput[2][1]=0; + else + self.gradInput[1][1]=-y + self.gradInput[2][1]=y + end + else + self.dist = self.dist or input[1].new() + self.dist = self.dist:resizeAs(input[1]):copy(input[1]) + local dist = self.dist + + dist:add(-1, input[2]) + dist:mul(-y) + dist:add(self.margin) + + self.mask = self.mask or input[1].new() + self.mask = self.mask:resizeAs(input[1]):copy(dist) + local mask = self.mask + + mask:ge(dist, 0) + + self.gradInput[1]:resize(dist:size()) + self.gradInput[2]:resize(dist:size()) + + self.gradInput[1]:copy(mask) + self.gradInput[1]:mul(-y) + self.gradInput[2]:copy(mask) + self.gradInput[2]:mul(y) + + end + return self.gradInput end diff --git a/PairwiseDistance.lua b/PairwiseDistance.lua index 638c58f..f108b97 100644 --- a/PairwiseDistance.lua +++ b/PairwiseDistance.lua @@ -5,12 +5,29 @@ function PairwiseDistance:__init(p) -- state self.gradInput = {torch.Tensor(), torch.Tensor()} - self.output = torch.Tensor(1) + self.output = torch.Tensor() self.norm=p end function PairwiseDistance:updateOutput(input) - self.output[1]=input[1]:dist(input[2],self.norm); + if input[1]:dim() == 1 then + self.output[1]=input[1]:dist(input[2],self.norm) + elseif input[1]:dim() == 2 then + self.diff = self.diff or input[1].new() + self.diff:resizeAs(input[1]) + + local diff = self.diff:zero() + --local diff = torch.add(input[1], -1, input[2]) + diff:add(input[1], -1, input[2]) + + self.output:resize(input[1]:size(1)) + self.output:zero() + self.output:add(diff:pow(self.norm):sum(2)) + self.output:pow(1./self.norm) + else + error('input must be vector or matrix') + end + return self.output end @@ -20,14 +37,27 @@ local function mathsign(x) end function PairwiseDistance:updateGradInput(input, gradOutput) - self.gradInput[1]:resizeAs(input[1]) - self.gradInput[2]:resizeAs(input[2]) - self.gradInput[1]:copy(input[1]) - self.gradInput[1]:add(-1, input[2]) - if self.norm==1 then + self.gradInput[1]:resize(input[1]:size()) + self.gradInput[2]:resize(input[2]:size()) + self.gradInput[1]:copy(input[1]) + self.gradInput[1]:add(-1, input[2]) + if self.norm==1 then self.gradInput[1]:apply(mathsign) - end - self.gradInput[1]:mul(gradOutput[1]); - self.gradInput[2]:zero():add(-1, self.gradInput[1]) - return self.gradInput + end + if input[1]:dim() == 1 then + self.gradInput[1]:mul(gradOutput[1]) + elseif input[1]:dim() == 2 then + self.grad = self.grad or gradOutput.new() + self.ones = self.ones or gradOutput.new() + + self.grad:resizeAs(input[1]):zero() + self.ones:resize(input[1]:size(2)):fill(1) + + self.grad:addr(gradOutput, self.ones) + self.gradInput[1]:cmul(self.grad) + else + error('input must be vector or matrix') + end + self.gradInput[2]:zero():add(-1, self.gradInput[1]) + return self.gradInput end -- cgit v1.2.3 From e96b6054090a06a2e63a1d564b8b150a55a1e465 Mon Sep 17 00:00:00 2001 From: Ronan Collobert Date: Tue, 8 Oct 2013 12:20:36 +0200 Subject: pkg/nn: C89 --- generic/SoftMax.c | 4 +- generic/SpatialConvolution.c | 117 ++++++++++++++----------- generic/SpatialConvolutionMM.c | 38 +++++---- generic/SpatialConvolutionMap.c | 166 +++++++++++++++++++++++------------- generic/SpatialFullConvolution.c | 117 ++++++++++++++----------- generic/SpatialFullConvolutionMap.c | 165 ++++++++++++++++++++++------------- generic/SpatialMaxPooling.c | 109 +++++++++++++---------- generic/SpatialSubSampling.c | 84 ++++++++++++------ generic/TemporalMaxPooling.c | 71 +++++++++------ generic/VolumetricConvolution.c | 62 ++++++++------ 10 files changed, 566 insertions(+), 367 deletions(-) diff --git a/generic/SoftMax.c b/generic/SoftMax.c index fd73b3e..bddb70d 100644 --- a/generic/SoftMax.c +++ b/generic/SoftMax.c @@ -31,11 +31,13 @@ static int nn_(SoftMax_updateOutput)(lua_State *L) for(t = 0; t < nframe; t++) { real inputMax = -THInf; + accreal sum; + for(d = 0; d < dim; d++) { if (input_data[d] >= inputMax) inputMax = input_data[d]; } - accreal sum = 0; + sum = 0; for(d = 0; d < dim; d++) { real z = THExpMinusApprox(inputMax - input_data[d]); output_data[d] = z; diff --git a/generic/SpatialConvolution.c b/generic/SpatialConvolution.c index bfe5698..bcadf38 100644 --- a/generic/SpatialConvolution.c +++ b/generic/SpatialConvolution.c @@ -12,70 +12,79 @@ static int nn_(SpatialConvolution_updateOutput)(lua_State *L) THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); - luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); - int dimw = 2; int dimh = 1; + + luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); + if (input->nDimension == 4) { dimw++; dimh++; } - long nOutputPlane = weight->size[0]; - long kW = weight->size[3]; - long kH = weight->size[2]; - long inputWidth = input->size[dimw]; - long inputHeight = input->size[dimh]; - long outputWidth = (inputWidth - kW) / dW + 1; - long outputHeight = (inputHeight - kH) / dH + 1; - - if (input->nDimension == 3) { - THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); - /* add bias */ - long i; - /*THTensor *outn = THTensor_(new)();*/ - real* bias_data = THTensor_(data)(bias); - real* output_data = THTensor_(data)(output); -#pragma omp parallel for private(i) - for (i=0; isize[0]; i++) + long nOutputPlane = weight->size[0]; + long kW = weight->size[3]; + long kH = weight->size[2]; + long inputWidth = input->size[dimw]; + long inputHeight = input->size[dimh]; + long outputWidth = (inputWidth - kW) / dW + 1; + long outputHeight = (inputHeight - kH) / dH + 1; + + if (input->nDimension == 3) { - /*THTensor_(select)(outn,output,0,i);*/ - /*TH_TENSOR_APPLY(real,outn, *outn_data = bias_data[i];);*/ - real *ptr_output = output_data + i*outputWidth*outputHeight; - long j; - for(j = 0; j < outputWidth*outputHeight; j++) - ptr_output[j] = bias_data[i]; - } - /*THTensor_(free)(outn);*/ - - /* do convolutions */ - THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "V","X"); - } - else - { - THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth); + long i; + real* bias_data; + real* output_data; - real* bias_data = THTensor_(data)(bias); - real* output_data = THTensor_(data)(output); + THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); + /* add bias */ + bias_data = THTensor_(data)(bias); + output_data = THTensor_(data)(output); - long p; -#pragma omp parallel for private(p) - for (p=0; psize[0]; p++) - { - /* BIAS */ - long i; +#pragma omp parallel for private(i) for (i=0; isize[0]; i++) { - real *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight; + /*THTensor_(select)(outn,output,0,i);*/ + /*TH_TENSOR_APPLY(real,outn, *outn_data = bias_data[i];);*/ + real *ptr_output = output_data + i*outputWidth*outputHeight; long j; for(j = 0; j < outputWidth*outputHeight; j++) ptr_output[j] = bias_data[i]; } + /*THTensor_(free)(outn);*/ + + /* do convolutions */ + THTensor_(conv2Dmv)(output, 1.0, 1.0, input, weight, dH, dW, "V","X"); } + else + { + real* bias_data; + real* output_data; + long p; - /* do convolutions */ - THTensor_(conv2Dmm)(output, 1.0, 1.0, input, weight, dH, dW, "V","X"); + THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth); + + bias_data = THTensor_(data)(bias); + output_data = THTensor_(data)(output); + +#pragma omp parallel for private(p) + for (p=0; psize[0]; p++) + { + /* BIAS */ + long i; + for (i=0; isize[0]; i++) + { + real *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight; + long j; + for(j = 0; j < outputWidth*outputHeight; j++) + ptr_output[j] = bias_data[i]; + } + } + + /* do convolutions */ + THTensor_(conv2Dmm)(output, 1.0, 1.0, input, weight, dH, dW, "V","X"); + } } return 1; } @@ -92,10 +101,12 @@ static int nn_(SpatialConvolution_updateGradInput)(lua_State *L) THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); + THTensor *tweight; + THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); /* gradient to input */ - THTensor *tweight = THTensor_(newTranspose)(weight,0,1); + tweight = THTensor_(newTranspose)(weight,0,1); if (input->nDimension == 3) { @@ -122,11 +133,15 @@ static int nn_(SpatialConvolution_accGradParameters)(lua_State *L) THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); - THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); - int dimw = 2; int dimh = 1; + real *gradBias_data; + real *gradOutput_data; + long noutSlice; + + THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); + if (input->nDimension == 4) { dimw++; @@ -134,9 +149,9 @@ static int nn_(SpatialConvolution_accGradParameters)(lua_State *L) } /* gradient to bias */ - real *gradBias_data = THTensor_(data)(gradBias); - real *gradOutput_data = THTensor_(data)(gradOutput); - long noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw]; + gradBias_data = THTensor_(data)(gradBias); + gradOutput_data = THTensor_(data)(gradOutput); + noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw]; /*THTensor* gradOutSlice = THTensor_(new)();*/ if (input->nDimension == 3) diff --git a/generic/SpatialConvolutionMM.c b/generic/SpatialConvolutionMM.c index a8014f7..d4fd953 100644 --- a/generic/SpatialConvolutionMM.c +++ b/generic/SpatialConvolutionMM.c @@ -61,12 +61,13 @@ static void nn_(SpatialConvolutionMM_updateOutput_frame)(THTensor *input, THTens long nOutputPlane, long outputWidth, long outputHeight) { long i; + THTensor *output2d; nn_(unfolded_copy)(finput, input, kW, kH, nInputPlane, inputWidth, inputHeight, outputWidth, outputHeight); - THTensor *output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset, - nOutputPlane, -1, - outputHeight*outputWidth, -1); + output2d = THTensor_(newWithStorage2d)(output->storage, output->storageOffset, + nOutputPlane, -1, + outputHeight*outputWidth, -1); for(i = 0; i < nOutputPlane; i++) THVector_(fill)(output->storage->data+output->storageOffset+output->stride[0]*i, THTensor_(get1d)(bias, i), outputHeight*outputWidth); @@ -87,23 +88,31 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L) THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); - luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); - int dimf = 0; int dimw = 2; int dimh = 1; + + long nInputPlane; + long inputWidth; + long inputHeight; + long nOutputPlane; + long outputWidth; + long outputHeight; + + luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); + if (input->nDimension == 4) { dimf++; dimw++; dimh++; } - long nInputPlane = input->size[dimf]; - long inputWidth = input->size[dimw]; - long inputHeight = input->size[dimh]; - long nOutputPlane = weight->size[0]; - long outputWidth = (inputWidth - kW) + 1; - long outputHeight = (inputHeight - kH) + 1; + nInputPlane = input->size[dimf]; + inputWidth = input->size[dimw]; + inputHeight = input->size[dimh]; + nOutputPlane = weight->size[0]; + outputWidth = (inputWidth - kW) + 1; + outputHeight = (inputHeight - kH) + 1; if(input->nDimension == 3) { @@ -126,7 +135,7 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L) THStorage_(clearFlag)(input->storage, TH_STORAGE_REFCOUNTED); THStorage_(clearFlag)(output->storage, TH_STORAGE_REFCOUNTED); THStorage_(clearFlag)(finput->storage, TH_STORAGE_REFCOUNTED); -// mkl_set_num_threads(1); + #pragma omp parallel for private(t) for(t = 0; t < T; t++) { @@ -147,7 +156,6 @@ static int nn_(SpatialConvolutionMM_updateOutput)(lua_State *L) THStorage_(setFlag)(output->storage, TH_STORAGE_REFCOUNTED); THStorage_(setFlag)(finput->storage, TH_STORAGE_REFCOUNTED); } -// mkl_set_num_threads(4); return 1; } @@ -227,15 +235,15 @@ static void nn_(SpatialConvolutionMM_accGradParameters_frame)(THTensor *gradOutp real scale) { long i; - + THTensor *gradOutputPlane = THTensor_(new)(); THTensor *gradOutput2d = THTensor_(newWithStorage2d)(gradOutput->storage, gradOutput->storageOffset, gradOutput->size[0], -1, gradOutput->size[1]*gradOutput->size[2], -1); + THTensor_(transpose)(finput, finput, 0, 1); THTensor_(addmm)(gradWeight, 1, gradWeight, scale, gradOutput2d, finput); THTensor_(transpose)(finput, finput, 0, 1); - THTensor *gradOutputPlane = THTensor_(new)(); for(i = 0; i < gradBias->size[0]; i++) { long k; diff --git a/generic/SpatialConvolutionMap.c b/generic/SpatialConvolutionMap.c index a1d20bc..eed0ca9 100644 --- a/generic/SpatialConvolutionMap.c +++ b/generic/SpatialConvolutionMap.c @@ -17,6 +17,24 @@ static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L) THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); + real *input_data; + real *output_data; + real *weight_data; + real *bias_data; + real *connTable_data; + + long input_h; + long input_w; + long output_h; + long output_w; + long weight_h; + long weight_w; + + long p, k; + int nweight; + + + luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected"); luaL_argcheck(L, input->size[0] >= nInputPlane, 2, "invalid number of input planes"); luaL_argcheck(L, input->size[2] >= kW && input->size[1] >= kH, 2, "input image smaller than kernel size"); @@ -25,39 +43,37 @@ static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L) (input->size[1] - kH) / dH + 1, (input->size[2] - kW) / dW + 1); - // contiguous + /* contiguous */ input = THTensor_(newContiguous)(input); output = THTensor_(newContiguous)(output); - // get raw pointers - real *input_data = THTensor_(data)(input); - real *output_data = THTensor_(data)(output); - real *weight_data = THTensor_(data)(weight); - real *bias_data = THTensor_(data)(bias); - real *connTable_data = THTensor_(data)(connTable); - - // and dims - long input_h = input->size[1]; - long input_w = input->size[2]; - long output_h = output->size[1]; - long output_w = output->size[2]; - long weight_h = weight->size[1]; - long weight_w = weight->size[2]; + /* get raw pointers */ + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + weight_data = THTensor_(data)(weight); + bias_data = THTensor_(data)(bias); + connTable_data = THTensor_(data)(connTable); + + /* and dims */ + input_h = input->size[1]; + input_w = input->size[2]; + output_h = output->size[1]; + output_w = output->size[2]; + weight_h = weight->size[1]; + weight_w = weight->size[2]; - long p; #pragma omp parallel for private(p) for (p = 0; p < nOutputPlane; p++) { - // add bias + /* add bias */ real *ptr_output = output_data + p*output_w*output_h; long j; for(j = 0; j < output_h*output_w; j++) ptr_output[j] = bias_data[p]; - // convolve all maps - int nweight = connTable->size[0]; - long k; + /* convolve all maps */ + nweight = connTable->size[0]; for (k = 0; k < nweight; k++) { - // get offsets for input/output + /* get offsets for input/output */ int o = (int)connTable_data[k*2+1]-1; int i = (int)connTable_data[k*2+0]-1; @@ -72,7 +88,7 @@ static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L) } } - // clean up + /* clean up */ THTensor_(free)(input); THTensor_(free)(output); @@ -91,34 +107,47 @@ static int nn_(SpatialConvolutionMap_updateGradInput)(lua_State *L) THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); - // contiguous + real *gradInput_data; + real *gradOutput_data; + real *weight_data; + real *connTable_data; + + long input_h; + long input_w; + long output_h; + long output_w; + long weight_h; + long weight_w; + + long p; + + /* contiguous */ gradInput = THTensor_(newContiguous)(gradInput); gradOutput = THTensor_(newContiguous)(gradOutput); - // Resize/Zero + /* Resize/Zero */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); - // get raw pointers - real *gradInput_data = THTensor_(data)(gradInput); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *weight_data = THTensor_(data)(weight); - real *connTable_data = THTensor_(data)(connTable); + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + weight_data = THTensor_(data)(weight); + connTable_data = THTensor_(data)(connTable); - // and dims - long input_h = input->size[1]; - long input_w = input->size[2]; - long output_h = gradOutput->size[1]; - long output_w = gradOutput->size[2]; - long weight_h = weight->size[1]; - long weight_w = weight->size[2]; + /* and dims */ + input_h = input->size[1]; + input_w = input->size[2]; + output_h = gradOutput->size[1]; + output_w = gradOutput->size[2]; + weight_h = weight->size[1]; + weight_w = weight->size[2]; - long p; #pragma omp parallel for private(p) for(p = 0; p < nInputPlane; p++) { long k; - // backward all + /* backward all */ int nkernel = connTable->size[0]; for(k = 0; k < nkernel; k++) { @@ -126,7 +155,7 @@ static int nn_(SpatialConvolutionMap_updateGradInput)(lua_State *L) int i = (int)connTable_data[k*2+0]-1; if (i == p) { - // gradient to input + /* gradient to input */ THTensor_(fullConv2Dptr)(gradInput_data + i*input_w*input_h, 1.0, gradOutput_data + o*output_w*output_h, output_h, output_w, @@ -136,7 +165,7 @@ static int nn_(SpatialConvolutionMap_updateGradInput)(lua_State *L) } } - // clean up + /* clean up */ THTensor_(free)(gradInput); THTensor_(free)(gradOutput); @@ -157,26 +186,41 @@ static int nn_(SpatialConvolutionMap_accGradParameters)(lua_State *L) THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); - // contiguous + real *input_data; + real *gradOutput_data; + real *gradWeight_data; + real *gradBias_data; + + /* and dims */ + long input_h; + long input_w; + long output_h; + long output_w; + long weight_h; + long weight_w; + + long k; + int nkernel; + + /* contiguous */ input = THTensor_(newContiguous)(input); gradOutput = THTensor_(newContiguous)(gradOutput); - // get raw pointers - real *input_data = THTensor_(data)(input); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *gradWeight_data = THTensor_(data)(gradWeight); - real *gradBias_data = THTensor_(data)(gradBias); - - // and dims - long input_h = input->size[1]; - long input_w = input->size[2]; - long output_h = gradOutput->size[1]; - long output_w = gradOutput->size[2]; - long weight_h = weight->size[1]; - long weight_w = weight->size[2]; - - // gradients wrt bias - long k; + /* get raw pointers */ + input_data = THTensor_(data)(input); + gradOutput_data = THTensor_(data)(gradOutput); + gradWeight_data = THTensor_(data)(gradWeight); + gradBias_data = THTensor_(data)(gradBias); + + /* and dims */ + input_h = input->size[1]; + input_w = input->size[2]; + output_h = gradOutput->size[1]; + output_w = gradOutput->size[2]; + weight_h = weight->size[1]; + weight_w = weight->size[2]; + + /* gradients wrt bias */ #pragma omp parallel for private(k) for(k = 0; k < nOutputPlane; k++) { real *ptr_gradOutput = gradOutput_data + k*output_w*output_h; @@ -185,15 +229,15 @@ static int nn_(SpatialConvolutionMap_accGradParameters)(lua_State *L) gradBias_data[k] += scale*ptr_gradOutput[l]; } - // gradients wrt weight - int nkernel = connTable->size[0]; + /* gradients wrt weight */ + nkernel = connTable->size[0]; #pragma omp parallel for private(k) for(k = 0; k < nkernel; k++) { int o = (int)THTensor_(get2d)(connTable,k,1)-1; int i = (int)THTensor_(get2d)(connTable,k,0)-1; - // gradient to kernel + /* gradient to kernel */ THTensor_(validXCorr2DRevptr)(gradWeight_data + k*weight_w*weight_h, scale, input_data + i*input_w*input_h, input_h, input_w, @@ -201,7 +245,7 @@ static int nn_(SpatialConvolutionMap_accGradParameters)(lua_State *L) dH, dW); } - // clean up + /* clean up */ THTensor_(free)(input); THTensor_(free)(gradOutput); return 0; diff --git a/generic/SpatialFullConvolution.c b/generic/SpatialFullConvolution.c index cb2e340..3b55297 100644 --- a/generic/SpatialFullConvolution.c +++ b/generic/SpatialFullConvolution.c @@ -12,69 +12,80 @@ static int nn_(SpatialFullConvolution_updateOutput)(lua_State *L) THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); - luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); int dimw = 2; int dimh = 1; + + luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); if (input->nDimension == 4) { dimw++; dimh++; } - long nOutputPlane = weight->size[1]; - long kW = weight->size[3]; - long kH = weight->size[2]; - long inputWidth = input->size[dimw]; - long inputHeight = input->size[dimh]; - long outputWidth = (inputWidth - 1) * dW + kW; - long outputHeight = (inputHeight - 1) * dH + kH; - - if (input->nDimension == 3) - { - THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); - /* add bias */ - long i; - real* bias_data = THTensor_(data)(bias); - real* output_data = THTensor_(data)(output); -#pragma omp parallel for private(i) - for (i=0; isize[0]; i++) - { - /*THTensor_(select)(outn,output,0,i);*/ - /*TH_TENSOR_APPLY(real,outn, *outn_data = bias_data[i];);*/ - real *ptr_output = output_data + i*outputWidth*outputHeight; - long j; - for(j = 0; j < outputWidth*outputHeight; j++) - ptr_output[j] = bias_data[i]; - } - - /* do convolutions */ - THTensor *tweight = THTensor_(newTranspose)(weight,0,1); - THTensor_(conv2Dmv)(output, 1.0, 1.0, input, tweight, dH, dW, "F", "C"); - THTensor_(free)(tweight); - } - else { - THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth); - real* bias_data = THTensor_(data)(bias); - real* output_data = THTensor_(data)(output); - - long p; -#pragma omp parallel for private(p) - for (p=0; psize[0]; p++) + long nOutputPlane = weight->size[1]; + long kW = weight->size[3]; + long kH = weight->size[2]; + long inputWidth = input->size[dimw]; + long inputHeight = input->size[dimh]; + long outputWidth = (inputWidth - 1) * dW + kW; + long outputHeight = (inputHeight - 1) * dH + kH; + + if (input->nDimension == 3) { - /* BIAS */ long i; + real* bias_data; + real* output_data; + + THTensor_(resize3d)(output, nOutputPlane, outputHeight, outputWidth); + /* add bias */ + bias_data = THTensor_(data)(bias); + output_data = THTensor_(data)(output); +#pragma omp parallel for private(i) for (i=0; isize[0]; i++) { - real *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight; + real *ptr_output = output_data + i*outputWidth*outputHeight; long j; for(j = 0; j < outputWidth*outputHeight; j++) ptr_output[j] = bias_data[i]; } + + /* do convolutions */ + { + THTensor *tweight = THTensor_(newTranspose)(weight,0,1); + THTensor_(conv2Dmv)(output, 1.0, 1.0, input, tweight, dH, dW, "F", "C"); + THTensor_(free)(tweight); + } + } + else + { + real* bias_data; + real* output_data; + long p; + + THTensor_(resize4d)(output, input->size[0], nOutputPlane, outputHeight, outputWidth); + bias_data = THTensor_(data)(bias); + output_data = THTensor_(data)(output); + +#pragma omp parallel for private(p) + for (p=0; psize[0]; p++) + { + /* BIAS */ + long i; + for (i=0; isize[0]; i++) + { + real *ptr_output = output_data + p*nOutputPlane*outputWidth*outputHeight + i*outputWidth*outputHeight; + long j; + for(j = 0; j < outputWidth*outputHeight; j++) + ptr_output[j] = bias_data[i]; + } + } + /* do convolutions */ + { + THTensor *tweight = THTensor_(newTranspose)(weight,0,1); + THTensor_(conv2Dmm)(output, 1.0, 1.0, input, tweight, dH, dW, "F", "C"); + THTensor_(free)(tweight); + } } - /* do convolutions */ - THTensor *tweight = THTensor_(newTranspose)(weight,0,1); - THTensor_(conv2Dmm)(output, 1.0, 1.0, input, tweight, dH, dW, "F", "C"); - THTensor_(free)(tweight); } return 1; } @@ -120,20 +131,26 @@ static int nn_(SpatialFullConvolution_accGradParameters)(lua_State *L) THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); long nOutputPlane = weight->size[1]; - THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); int dimw = 2; int dimh = 1; + real *gradBias_data; + real *gradOutput_data; + long noutSlice; + + THArgCheck( nOutputPlane == gradOutput->size[input->nDimension == 4 ? 1 : 0], 1, "Number of output features is not equal to nOutputPlane" ); + + if (input->nDimension == 4) { dimw++; dimh++; } /* gradient to bias */ - real *gradBias_data = THTensor_(data)(gradBias); - real *gradOutput_data = THTensor_(data)(gradOutput); - long noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw]; + gradBias_data = THTensor_(data)(gradBias); + gradOutput_data = THTensor_(data)(gradOutput); + noutSlice = gradOutput->size[dimh]*gradOutput->size[dimw]; /*THTensor* gradOutSlice = THTensor_(new)();*/ if (input->nDimension == 3) diff --git a/generic/SpatialFullConvolutionMap.c b/generic/SpatialFullConvolutionMap.c index 8a5d9df..9d5cff2 100644 --- a/generic/SpatialFullConvolutionMap.c +++ b/generic/SpatialFullConvolutionMap.c @@ -18,6 +18,21 @@ static int nn_(SpatialFullConvolutionMap_updateOutput)(lua_State *L) THTensor *bias = luaT_getfieldcheckudata(L, 1, "bias", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); + real *input_data; + real *output_data; + real *weight_data; + real *bias_data; + real *connTable_data; + + long input_h; + long input_w; + long output_h; + long output_w; + long weight_h; + long weight_w; + + long p; + luaL_argcheck(L, input->nDimension == 3, 2, "3D tensor expected"); luaL_argcheck(L, input->size[0] >= nInputPlane, 2, "invalid number of input planes"); @@ -26,39 +41,40 @@ static int nn_(SpatialFullConvolutionMap_updateOutput)(lua_State *L) (input->size[1] - 1) * dH + kH, (input->size[2] - 1) * dW + kW); - // contiguous + /* contiguous */ input = THTensor_(newContiguous)(input); output = THTensor_(newContiguous)(output); - // get raw pointers - real *input_data = THTensor_(data)(input); - real *output_data = THTensor_(data)(output); - real *weight_data = THTensor_(data)(weight); - real *bias_data = THTensor_(data)(bias); - real *connTable_data = THTensor_(data)(connTable); - - // and dims - long input_h = input->size[1]; - long input_w = input->size[2]; - long output_h = output->size[1]; - long output_w = output->size[2]; - long weight_h = weight->size[1]; - long weight_w = weight->size[2]; + /* get raw pointers */ + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + weight_data = THTensor_(data)(weight); + bias_data = THTensor_(data)(bias); + connTable_data = THTensor_(data)(connTable); + + /* and dims */ + input_h = input->size[1]; + input_w = input->size[2]; + output_h = output->size[1]; + output_w = output->size[2]; + weight_h = weight->size[1]; + weight_w = weight->size[2]; - long p; #pragma omp parallel for private(p) for (p = 0; p < nOutputPlane; p++) { - // add bias + /* add bias */ real *ptr_output = output_data + p*output_w*output_h; long j; + int nweight; + long k; + for(j = 0; j < output_h*output_w; j++) ptr_output[j] = bias_data[p]; - // convolve all maps - int nweight = connTable->size[0]; - long k; + /* convolve all maps */ + nweight = connTable->size[0]; for (k = 0; k < nweight; k++) { - // get offsets for input/output + /* get offsets for input/output */ int o = (int)connTable_data[k*2+1]-1; int i = (int)connTable_data[k*2+0]-1; @@ -73,7 +89,7 @@ static int nn_(SpatialFullConvolutionMap_updateOutput)(lua_State *L) } } - // clean up + /* clean up */ THTensor_(free)(input); THTensor_(free)(output); @@ -92,34 +108,47 @@ static int nn_(SpatialFullConvolutionMap_updateGradInput)(lua_State *L) THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); - // contiguous + real *gradInput_data; + real *gradOutput_data; + real *weight_data; + real *connTable_data; + + long input_h; + long input_w; + long output_h; + long output_w; + long weight_h; + long weight_w; + + long p; + + /* contiguous */ gradInput = THTensor_(newContiguous)(gradInput); gradOutput = THTensor_(newContiguous)(gradOutput); - // Resize/Zero + /* Resize/Zero */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); - // get raw pointers - real *gradInput_data = THTensor_(data)(gradInput); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *weight_data = THTensor_(data)(weight); - real *connTable_data = THTensor_(data)(connTable); + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + weight_data = THTensor_(data)(weight); + connTable_data = THTensor_(data)(connTable); - // and dims - long input_h = input->size[1]; - long input_w = input->size[2]; - long output_h = gradOutput->size[1]; - long output_w = gradOutput->size[2]; - long weight_h = weight->size[1]; - long weight_w = weight->size[2]; + /* and dims */ + input_h = input->size[1]; + input_w = input->size[2]; + output_h = gradOutput->size[1]; + output_w = gradOutput->size[2]; + weight_h = weight->size[1]; + weight_w = weight->size[2]; - long p; #pragma omp parallel for private(p) for(p = 0; p < nInputPlane; p++) { long k; - // backward all + /* backward all */ int nkernel = connTable->size[0]; for(k = 0; k < nkernel; k++) { @@ -127,7 +156,7 @@ static int nn_(SpatialFullConvolutionMap_updateGradInput)(lua_State *L) int i = (int)connTable_data[k*2+0]-1; if (i == p) { - // gradient to input + /* gradient to input */ THTensor_(validXCorr2Dptr)(gradInput_data + i*input_w*input_h, 1.0, gradOutput_data + o*output_w*output_h, output_h, output_w, @@ -137,7 +166,7 @@ static int nn_(SpatialFullConvolutionMap_updateGradInput)(lua_State *L) } } - // clean up + /* clean up */ THTensor_(free)(gradInput); THTensor_(free)(gradOutput); @@ -158,26 +187,40 @@ static int nn_(SpatialFullConvolutionMap_accGradParameters)(lua_State *L) THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); - // contiguous + real *input_data; + real *gradOutput_data; + real *gradWeight_data; + real *gradBias_data; + + long input_h; + long input_w; + long output_h; + long output_w; + long weight_h; + long weight_w; + + long k; + int nkernel; + + /* contiguous */ input = THTensor_(newContiguous)(input); gradOutput = THTensor_(newContiguous)(gradOutput); - // get raw pointers - real *input_data = THTensor_(data)(input); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *gradWeight_data = THTensor_(data)(gradWeight); - real *gradBias_data = THTensor_(data)(gradBias); - - // and dims - long input_h = input->size[1]; - long input_w = input->size[2]; - long output_h = gradOutput->size[1]; - long output_w = gradOutput->size[2]; - long weight_h = weight->size[1]; - long weight_w = weight->size[2]; - - // gradients wrt bias - long k; + /* get raw pointers */ + input_data = THTensor_(data)(input); + gradOutput_data = THTensor_(data)(gradOutput); + gradWeight_data = THTensor_(data)(gradWeight); + gradBias_data = THTensor_(data)(gradBias); + + /* and dims */ + input_h = input->size[1]; + input_w = input->size[2]; + output_h = gradOutput->size[1]; + output_w = gradOutput->size[2]; + weight_h = weight->size[1]; + weight_w = weight->size[2]; + + /* gradients wrt bias */ #pragma omp parallel for private(k) for(k = 0; k < nOutputPlane; k++) { real *ptr_gradOutput = gradOutput_data + k*output_w*output_h; @@ -186,15 +229,15 @@ static int nn_(SpatialFullConvolutionMap_accGradParameters)(lua_State *L) gradBias_data[k] += scale*ptr_gradOutput[l]; } - // gradients wrt weight - int nkernel = connTable->size[0]; + /* gradients wrt weight */ + nkernel = connTable->size[0]; #pragma omp parallel for private(k) for(k = 0; k < nkernel; k++) { int o = (int)THTensor_(get2d)(connTable,k,1)-1; int i = (int)THTensor_(get2d)(connTable,k,0)-1; - // gradient to kernel + /* gradient to kernel */ THTensor_(validXCorr2DRevptr)(gradWeight_data + k*weight_w*weight_h, scale, gradOutput_data + o*output_w*output_h, output_h, output_w, @@ -202,7 +245,7 @@ static int nn_(SpatialFullConvolutionMap_accGradParameters)(lua_State *L) dH, dW); } - // clean up + /* clean up */ THTensor_(free)(input); THTensor_(free)(gradOutput); return 0; diff --git a/generic/SpatialMaxPooling.c b/generic/SpatialMaxPooling.c index 7faa0ee..8dd04c9 100644 --- a/generic/SpatialMaxPooling.c +++ b/generic/SpatialMaxPooling.c @@ -13,19 +13,19 @@ static void nn_(SpatialMaxPooling_updateOutput_frame)(real *input_p, real *outpu #pragma omp parallel for private(k) for (k = 0; k < nslices; k++) { - // loop over output + /* loop over output */ long i, j; for(i = 0; i < oheight; i++) { for(j = 0; j < owidth; j++) { - // local pointers + /* local pointers */ real *ip = input_p + k*iwidth*iheight + i*iwidth*dH + j*dW; real *op = output_p + k*owidth*oheight + i*owidth + j; real *indyp = indy_p + k*owidth*oheight + i*owidth + j; real *indxp = indx_p + k*owidth*oheight + i*owidth + j; - // compute local max: + /* compute local max: */ long maxindex = -1; real maxval = -THInf; long tcntr = 0; @@ -44,10 +44,10 @@ static void nn_(SpatialMaxPooling_updateOutput_frame)(real *input_p, real *outpu } } - // set output to local max + /* set output to local max */ *op = maxval; - // store location of max (x,y) + /* store location of max (x,y) */ *indyp = (int)(maxindex / kW)+1; *indxp = (maxindex % kW) +1; } @@ -64,11 +64,21 @@ static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L) int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); - - luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected"); int dimw = 2; int dimh = 1; long nbatch = 1; + long nslices; + long iheight; + long iwidth; + long oheight; + long owidth; + real *input_data; + real *output_data; + real *indices_data; + + + luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4 , 2, "3D or 4D (batch mode) tensor expected"); + if (input->nDimension == 4) { nbatch = input->size[0]; @@ -77,26 +87,26 @@ static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L) } luaL_argcheck(L, input->size[dimw] >= kW && input->size[dimh] >= kH, 2, "input image smaller than kernel size"); - // sizes - long nslices = input->size[dimh-1]; - long iheight = input->size[dimh]; - long iwidth = input->size[dimw]; - long oheight = (iheight - kH) / dH + 1; - long owidth = (iwidth - kW) / dW + 1; + /* sizes */ + nslices = input->size[dimh-1]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + oheight = (iheight - kH) / dH + 1; + owidth = (iwidth - kW) / dW + 1; - // get contiguous input + /* get contiguous input */ input = THTensor_(newContiguous)(input); - // resize output + /* resize output */ if (input->nDimension == 3) { THTensor_(resize3d)(output, nslices, oheight, owidth); - // indices will contain i,j locations for each output point + /* indices will contain i,j locations for each output point */ THTensor_(resize4d)(indices, 2, nslices, oheight, owidth); - real *input_data = THTensor_(data)(input); - real *output_data = THTensor_(data)(output); - real *indices_data = THTensor_(data)(indices); + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THTensor_(data)(indices); nn_(SpatialMaxPooling_updateOutput_frame)(input_data, output_data, indices_data+nslices*owidth*oheight, indices_data, @@ -107,15 +117,16 @@ static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L) } else { + long p; + THTensor_(resize4d)(output, nbatch, nslices, oheight, owidth); - // indices will contain i,j locations for each output point + /* indices will contain i,j locations for each output point */ THTensor_(resize5d)(indices, 2, nbatch, nslices, oheight, owidth); - real *input_data = THTensor_(data)(input); - real *output_data = THTensor_(data)(output); - real *indices_data = THTensor_(data)(indices); + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THTensor_(data)(indices); - long p; #pragma omp parallel for private(p) for (p = 0; p < nbatch; p++) { @@ -128,7 +139,7 @@ static int nn_(SpatialMaxPooling_updateOutput)(lua_State *L) } } - // cleanup + /* cleanup */ THTensor_(free)(input); return 1; } @@ -149,17 +160,17 @@ static void nn_(SpatialMaxPooling_updateGradInput_frame)(real *gradInput_p, real real *indx_p_k = indx_p + k*owidth*oheight; real *indy_p_k = indy_p + k*owidth*oheight; - // calculate max points + /* calculate max points */ long i, j; for(i = 0; i < oheight; i++) { for(j = 0; j < owidth; j++) { - // retrieve position of max + /* retrieve position of max */ long maxi = indy_p_k[i*owidth + j] - 1 + i*dH; long maxj = indx_p_k[i*owidth + j] - 1 + j*dW; - // update gradient + /* update gradient */ gradInput_p_k[maxi*iwidth + maxj] += gradOutput_p_k[i*owidth + j]; } } @@ -174,36 +185,44 @@ static int nn_(SpatialMaxPooling_updateGradInput)(lua_State *L) int dH = luaT_getfieldcheckint(L, 1, "dH"); THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); + int dimw = 2; + int dimh = 1; + long nbatch = 1; + int nslices; + int iheight; + int iwidth; + int oheight; + int owidth; + real *gradInput_data; + real *gradOutput_data; + real *indices_data; - // get contiguous gradOutput + /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); - // resize + /* resize */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); - int dimw = 2; - int dimh = 1; - long nbatch = 1; if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } - // sizes - int nslices = input->size[dimh-1]; - int iheight = input->size[dimh]; - int iwidth = input->size[dimw]; - int oheight = gradOutput->size[dimh]; - int owidth = gradOutput->size[dimw]; + /* sizes */ + nslices = input->size[dimh-1]; + iheight = input->size[dimh]; + iwidth = input->size[dimw]; + oheight = gradOutput->size[dimh]; + owidth = gradOutput->size[dimw]; - // get raw pointers - real *gradInput_data = THTensor_(data)(gradInput); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *indices_data = THTensor_(data)(indices); + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + indices_data = THTensor_(data)(indices); - // backprop + /* backprop */ if (input->nDimension == 3) { nn_(SpatialMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data, @@ -228,7 +247,7 @@ static int nn_(SpatialMaxPooling_updateGradInput)(lua_State *L) } } - // cleanup + /* cleanup */ THTensor_(free)(gradOutput); return 1; diff --git a/generic/SpatialSubSampling.c b/generic/SpatialSubSampling.c index ed9c059..912592c 100644 --- a/generic/SpatialSubSampling.c +++ b/generic/SpatialSubSampling.c @@ -20,21 +20,29 @@ static int nn_(SpatialSubSampling_updateOutput)(lua_State *L) real *output_data; real *input_data; - luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); - int dimw = 2; int dimh = 1; long nbatch = 1; + + long inputWidth; + long inputHeight; + long outputWidth; + long outputHeight; + + long k; + + luaL_argcheck(L, input->nDimension == 3 || input->nDimension == 4, 2, "3D or 4D(batch mode) tensor expected"); + if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } - long inputWidth = input->size[dimw]; - long inputHeight = input->size[dimh]; - long outputWidth = (inputWidth - kW) / dW + 1; - long outputHeight = (inputHeight - kH) / dH + 1; + inputWidth = input->size[dimw]; + inputHeight = input->size[dimh]; + outputWidth = (inputWidth - kW) / dW + 1; + outputHeight = (inputHeight - kH) / dH + 1; luaL_argcheck(L, input->size[dimh-1] == nInputPlane, 2, "invalid number of input planes"); luaL_argcheck(L, inputWidth >= kW && inputHeight >= kH, 2, "input image smaller than kernel size"); @@ -48,7 +56,6 @@ static int nn_(SpatialSubSampling_updateOutput)(lua_State *L) input_data = THTensor_(data)(input); output_data = THTensor_(data)(output); - long k; #pragma omp parallel for private(k) for(k = 0; k < nInputPlane; k++) { @@ -70,7 +77,7 @@ static int nn_(SpatialSubSampling_updateOutput)(lua_State *L) { for(xx = 0; xx < outputWidth; xx++) { - // Compute the mean of the input image... + /* Compute the mean of the input image... */ real *ptr_input = input_data + p*nInputPlane*inputWidth*inputHeight + k*inputWidth*inputHeight + yy*dH*inputWidth+xx*dW; real sum = 0; long kx, ky; @@ -79,9 +86,9 @@ static int nn_(SpatialSubSampling_updateOutput)(lua_State *L) { for(kx = 0; kx < kW; kx++) sum += ptr_input[kx]; - ptr_input += inputWidth; // next input line + ptr_input += inputWidth; /* next input line */ } - // Update output + /* Update output */ *ptr_output++ += the_weight*sum; } } @@ -108,20 +115,31 @@ static int nn_(SpatialSubSampling_updateGradInput)(lua_State *L) int dimw = 2; int dimh = 1; long nbatch = 1; + + long inputWidth; + long inputHeight; + long outputWidth; + long outputHeight; + + real *weight_data; + real *gradOutput_data; + real *input_data, *gradInput_data; + + long k; + if (input->nDimension == 4) { nbatch = input->size[0]; dimw++; dimh++; } - long inputWidth = input->size[dimw]; - long inputHeight = input->size[dimh]; - long outputWidth = (inputWidth - kW) / dW + 1; - long outputHeight = (inputHeight - kH) / dH + 1; + inputWidth = input->size[dimw]; + inputHeight = input->size[dimh]; + outputWidth = (inputWidth - kW) / dW + 1; + outputHeight = (inputHeight - kH) / dH + 1; - real *weight_data = THTensor_(data)(weight); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *input_data, *gradInput_data; + weight_data = THTensor_(data)(weight); + gradOutput_data = THTensor_(data)(gradOutput); input_data = THTensor_(data)(input); @@ -129,7 +147,6 @@ static int nn_(SpatialSubSampling_updateGradInput)(lua_State *L) gradInput_data = THTensor_(data)(gradInput); gradOutput_data = THTensor_(data)(gradOutput); - long k; #pragma omp parallel for private(k) for(k = 0; k < nInputPlane; k++) { @@ -184,26 +201,37 @@ static int nn_(SpatialSubSampling_accGradParameters)(lua_State *L) long nbatch = 1; long dimw = 2; long dimh = 1; + + long inputWidth; + long inputHeight; + long outputWidth; + long outputHeight; + + real *gradWeight_data; + real *gradBias_data; + real *gradOutput_data; + real *input_data; + + long k; + if (input->nDimension == 4) { dimw++; dimh++; nbatch = input->size[0]; } - long inputWidth = input->size[dimw]; - long inputHeight = input->size[dimh]; - long outputWidth = (inputWidth - kW) / dW + 1; - long outputHeight = (inputHeight - kH) / dH + 1; + inputWidth = input->size[dimw]; + inputHeight = input->size[dimh]; + outputWidth = (inputWidth - kW) / dW + 1; + outputHeight = (inputHeight - kH) / dH + 1; - real *gradWeight_data = THTensor_(data)(gradWeight); - real *gradBias_data = THTensor_(data)(gradBias); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *input_data; + gradWeight_data = THTensor_(data)(gradWeight); + gradBias_data = THTensor_(data)(gradBias); + gradOutput_data = THTensor_(data)(gradOutput); input = THTensor_(newContiguous)(input); input_data = THTensor_(data)(input); - long k; #pragma omp parallel for private(k) for(k = 0; k < nInputPlane; k++) { @@ -213,9 +241,9 @@ static int nn_(SpatialSubSampling_accGradParameters)(lua_State *L) real *ptr_gradOutput = gradOutput_data + p*nInputPlane*outputHeight*outputWidth + k*outputWidth*outputHeight; real sum; long xx, yy; + long i; sum = 0; - long i; for(i = 0; i < outputWidth*outputHeight; i++) sum += ptr_gradOutput[i]; gradBias_data[k] += scale*sum; diff --git a/generic/TemporalMaxPooling.c b/generic/TemporalMaxPooling.c index 0111cb5..5e269cf 100644 --- a/generic/TemporalMaxPooling.c +++ b/generic/TemporalMaxPooling.c @@ -10,29 +10,38 @@ static int nn_(TemporalMaxPooling_updateOutput)(lua_State *L) THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); + long niframe; + long framesize; + long noframe; + + real *input_data; + real *output_data; + real *indices_data; + + long t, x, y; + luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected"); luaL_argcheck(L, input->size[0] >= kW, 2, "input sequence smaller than kernel size"); - // sizes - long niframe = input->size[0]; - long framesize = input->size[1]; - long noframe = (niframe - kW) / dW + 1; + /* sizes */ + niframe = input->size[0]; + framesize = input->size[1]; + noframe = (niframe - kW) / dW + 1; - // get contiguous input + /* get contiguous input */ input = THTensor_(newContiguous)(input); - // resize output + /* resize output */ THTensor_(resize2d)(output, noframe, framesize); - // indices will contain index locations for each output point + /* indices will contain index locations for each output point */ THTensor_(resize2d)(indices, noframe, framesize); - // get raw pointers - real *input_data = THTensor_(data)(input); - real *output_data = THTensor_(data)(output); - real *indices_data = THTensor_(data)(indices); + /* get raw pointers */ + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THTensor_(data)(indices); - long t, x, y; for(t = 0; t < noframe; t++) { real *ip = input_data + t*framesize*dW; @@ -41,7 +50,7 @@ static int nn_(TemporalMaxPooling_updateOutput)(lua_State *L) #pragma omp parallel for private(y) for(y = 0; y < framesize; y++) { - // compute local max: + /* compute local max: */ long maxindex = -1; real maxval = -THInf; for(x = 0; x < kW; x++) @@ -54,13 +63,13 @@ static int nn_(TemporalMaxPooling_updateOutput)(lua_State *L) } } - // set output to local max + /* set output to local max */ op[y] = maxval; xp[y] = (real)maxindex; } } - // cleanup + /* cleanup */ THTensor_(free)(input); return 1; @@ -74,23 +83,31 @@ static int nn_(TemporalMaxPooling_updateGradInput)(lua_State *L) THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); - // get contiguous gradOutput + int noframe; + long framesize; + + real *gradInput_data; + real *gradOutput_data; + real *indices_data; + + long t, y; + + /* get contiguous gradOutput */ gradOutput = THTensor_(newContiguous)(gradOutput); - // resize and zero + /* resize and zero */ THTensor_(resizeAs)(gradInput, input); THTensor_(zero)(gradInput); - // sizes - int noframe = gradOutput->size[0]; - long framesize = gradOutput->size[1]; + /* sizes */ + noframe = gradOutput->size[0]; + framesize = gradOutput->size[1]; - // get raw pointers - real *gradInput_data = THTensor_(data)(gradInput); - real *gradOutput_data = THTensor_(data)(gradOutput); - real *indices_data = THTensor_(data)(indices); + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + indices_data = THTensor_(data)(indices); - long t, y; for(t = 0; t < noframe; t++) { real *gip = gradInput_data + t*framesize*dW; @@ -99,13 +116,13 @@ static int nn_(TemporalMaxPooling_updateGradInput)(lua_State *L) #pragma omp parallel for private(y) for(y = 0; y < framesize; y++) { - // compute local max: + /* compute local max: */ long maxindex = (long)xp[y]; gip[maxindex*framesize+y] += gop[y]; } } - // cleanup + /* cleanup */ THTensor_(free)(gradOutput); return 1; diff --git a/generic/VolumetricConvolution.c b/generic/VolumetricConvolution.c index 6e0b6d8..feeaf05 100644 --- a/generic/VolumetricConvolution.c +++ b/generic/VolumetricConvolution.c @@ -15,30 +15,33 @@ static int nn_(VolumetricConvolution_updateOutput)(lua_State *L) luaL_argcheck(L, input->nDimension == 4, 2, "4D tensor expected"); - long nOutputPlane = weight->size[0]; - long kT = weight->size[2]; - long kH = weight->size[3]; - long kW = weight->size[4]; - long inputDepth = input->size[1]; - long inputHeight = input->size[2]; - long inputWidth = input->size[3]; - long outputDepth = (inputDepth - kT) / dT + 1; - long outputWidth = (inputWidth - kW) / dW + 1; - long outputHeight = (inputHeight - kH) / dH + 1; - - THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth); - - /* add bias */ - long i; - THTensor *outn = THTensor_(new)(); - for (i=0; isize[0]; i++) { - THTensor_(select)(outn,output,0,i); - THTensor_(fill)(outn, THTensor_(get1d)(bias, i)); - } - THTensor_(free)(outn); + { + long nOutputPlane = weight->size[0]; + long kT = weight->size[2]; + long kH = weight->size[3]; + long kW = weight->size[4]; + long inputDepth = input->size[1]; + long inputHeight = input->size[2]; + long inputWidth = input->size[3]; + long outputDepth = (inputDepth - kT) / dT + 1; + long outputWidth = (inputWidth - kW) / dW + 1; + long outputHeight = (inputHeight - kH) / dH + 1; + THTensor *outn = THTensor_(new)(); + long i; + + THTensor_(resize4d)(output, nOutputPlane, outputDepth, outputHeight, outputWidth); + + /* add bias */ + for (i=0; isize[0]; i++) { + THTensor_(select)(outn,output,0,i); + THTensor_(fill)(outn, THTensor_(get1d)(bias, i)); + } - /* do convolutions */ - THTensor_(conv3Dmv)(output, 1.0, 1.0, input, weight, dT, dH, dW, "V", "X"); + THTensor_(free)(outn); + + /* do convolutions */ + THTensor_(conv3Dmv)(output, 1.0, 1.0, input, weight, dT, dH, dW, "V", "X"); + } return 1; } @@ -54,11 +57,12 @@ static int nn_(VolumetricConvolution_updateGradInput)(lua_State *L) THTensor *weight = luaT_getfieldcheckudata(L, 1, "weight", torch_Tensor); THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); + THTensor *tweight; THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" ); /* gradient to input */ - THTensor *tweight = THTensor_(newTranspose)(weight,0,1); + tweight = THTensor_(newTranspose)(weight,0,1); THTensor_(conv3Dmv)(gradInput, 0.0, 1.0, gradOutput, tweight, dT, dH, dW, "F", "C"); THTensor_(free)(tweight); @@ -77,14 +81,16 @@ static int nn_(VolumetricConvolution_accGradParameters)(lua_State *L) THTensor *gradWeight = luaT_getfieldcheckudata(L, 1, "gradWeight", torch_Tensor); THTensor *gradBias = luaT_getfieldcheckudata(L, 1, "gradBias", torch_Tensor); - - THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" ); long k; + real *gradBias_data; + THTensor* gradOutSlice; + + THArgCheck( nOutputPlane == gradOutput->size[0], 1, "Number of output features is not equal to nOutputPlane" ); /* gradient to bias */ - real *gradBias_data = THTensor_(data)(gradBias); - THTensor* gradOutSlice = THTensor_(new)(); + gradBias_data = THTensor_(data)(gradBias); + gradOutSlice = THTensor_(new)(); for(k = 0; k < nOutputPlane; k++) { THTensor_(select)(gradOutSlice, gradOutput, 0, k); -- cgit v1.2.3 From e50360c1ead4565c3a3aa5e60806dc04736af1b3 Mon Sep 17 00:00:00 2001 From: koray kavukcuoglu Date: Thu, 26 Sep 2013 23:19:19 +0100 Subject: remove WeightedMSECriterion test since it fails with an unknown module --- test/test.lua | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/test/test.lua b/test/test.lua index 58a9bd7..1eb92ad 100644 --- a/test/test.lua +++ b/test/test.lua @@ -340,21 +340,21 @@ function nntest.WeightedEuclidean() mytester:asserteq(berr, 0, torch.typename(module) .. ' - i/o backward err ') end -function nntest.WeightedMSECriterion() - local from = math.random(100,200) - local input = torch.Tensor(from):zero() - local target = torch.randn(from) - local weight = torch.randn(from) - local cri = nn.WeightedMSECriterion(weight) - local module = nn.CriterionModule(cri,target) - - local err = jac.testJacobian(module, input) - mytester:assertlt(err, precision, 'error on state ') - - local ferr, berr = jac.testIO(module, input) - mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ') - mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ') -end +--function nntest.WeightedMSECriterion() +-- local from = math.random(100,200) +-- local input = torch.Tensor(from):zero() +-- local target = torch.randn(from) +-- local weight = torch.randn(from) +-- local cri = nn.WeightedMSECriterion(weight) +-- local module = nn.CriterionModule(cri,target) + +-- local err = jac.testJacobian(module, input) +-- mytester:assertlt(err, precision, 'error on state ') + +-- local ferr, berr = jac.testIO(module, input) +-- mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ') +-- mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ') +--end function nntest.LogSigmoid() local ini = math.random(10,20) -- cgit v1.2.3 From 8c295db66843c1119797ef3390f248af4777ec28 Mon Sep 17 00:00:00 2001 From: koray kavukcuoglu Date: Sun, 13 Oct 2013 18:33:16 +0100 Subject: move k into thread private loop --- generic/SpatialConvolutionMap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/generic/SpatialConvolutionMap.c b/generic/SpatialConvolutionMap.c index eed0ca9..32c75e5 100644 --- a/generic/SpatialConvolutionMap.c +++ b/generic/SpatialConvolutionMap.c @@ -30,7 +30,7 @@ static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L) long weight_h; long weight_w; - long p, k; + long p; int nweight; @@ -66,7 +66,7 @@ static int nn_(SpatialConvolutionMap_updateOutput)(lua_State *L) for (p = 0; p < nOutputPlane; p++) { /* add bias */ real *ptr_output = output_data + p*output_w*output_h; - long j; + long j,k; for(j = 0; j < output_h*output_w; j++) ptr_output[j] = bias_data[p]; -- cgit v1.2.3 From bea3665f9dbcd2a5248078d2c23903fcc973c6be Mon Sep 17 00:00:00 2001 From: koray kavukcuoglu Date: Sun, 13 Oct 2013 20:11:41 +0100 Subject: move index declaragtion into pragma --- generic/TemporalMaxPooling.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/generic/TemporalMaxPooling.c b/generic/TemporalMaxPooling.c index 5e269cf..3c0384d 100644 --- a/generic/TemporalMaxPooling.c +++ b/generic/TemporalMaxPooling.c @@ -18,7 +18,7 @@ static int nn_(TemporalMaxPooling_updateOutput)(lua_State *L) real *output_data; real *indices_data; - long t, x, y; + long t, y; luaL_argcheck(L, input->nDimension == 2, 2, "2D tensor expected"); luaL_argcheck(L, input->size[0] >= kW, 2, "input sequence smaller than kernel size"); @@ -53,6 +53,7 @@ static int nn_(TemporalMaxPooling_updateOutput)(lua_State *L) /* compute local max: */ long maxindex = -1; real maxval = -THInf; + long x; for(x = 0; x < kW; x++) { real val = ip[x*framesize+y]; -- cgit v1.2.3 From 2a7a45dd813c94fb2831f3197c5f0a388035ab4a Mon Sep 17 00:00:00 2001 From: koray kavukcuoglu Date: Tue, 15 Oct 2013 01:21:26 +0100 Subject: add 3D max pooling --- VolumetricMaxPooling.lua | 37 +++++++ dok/index.dok | 11 ++ generic/VolumetricMaxPooling.c | 233 +++++++++++++++++++++++++++++++++++++++++ init.c | 5 + init.lua | 1 + test/test.lua | 34 +++++- 6 files changed, 317 insertions(+), 4 deletions(-) create mode 100644 VolumetricMaxPooling.lua create mode 100644 generic/VolumetricMaxPooling.c diff --git a/VolumetricMaxPooling.lua b/VolumetricMaxPooling.lua new file mode 100644 index 0000000..2bc7a76 --- /dev/null +++ b/VolumetricMaxPooling.lua @@ -0,0 +1,37 @@ +local VolumetricMaxPooling, parent = torch.class('nn.VolumetricMaxPooling', 'nn.Module') + +function VolumetricMaxPooling:__init(kT, kW, kH, dT, dW, dH) + parent.__init(self) + + dT = dT or kT + dW = dW or kW + dH = dH or kH + + self.kT = kT + self.kH = kH + self.kW = kW + self.dT = dT + self.dW = dW + self.dH = dH + + self.indices = torch.Tensor() +end + +function VolumetricMaxPooling:updateOutput(input) + input.nn.VolumetricMaxPooling_updateOutput(self, input) + return self.output +end + +function VolumetricMaxPooling:updateGradInput(input, gradOutput) + input.nn.VolumetricMaxPooling_updateGradInput(self, input, gradOutput) + return self.gradInput +end + +function VolumetricMaxPooling:empty() + self.gradInput:resize() + self.gradInput:storage():resize(0) + self.output:resize() + self.output:storage():resize(0) + self.indices:resize() + self.indices:storage():resize(0) +end diff --git a/dok/index.dok b/dok/index.dok index d4a0160..858a11a 100644 --- a/dok/index.dok +++ b/dok/index.dok @@ -1436,6 +1436,17 @@ Applies 2D max-pooling operation in ''kWxkH'' regions by step size ''dWxdH'' steps. The number of output features is equal to the number of input planes. +==== VoulmetricMaxPooling ==== +{{anchor:nn.VolumetricMaxPooling}} + + +module = nn.VolumetricMaxPooling(kT, kW, kH [, dT, dW, dH]) + + +Applies 3D max-pooling operation in ''kTxkWxkH'' regions by step size +''dTxdWxdH'' steps. The number of output features is equal to the number of +input planes. + ==== SpatialSubSampling ==== {{anchor:nn.SpatialSubSampling}} diff --git a/generic/VolumetricMaxPooling.c b/generic/VolumetricMaxPooling.c new file mode 100644 index 0000000..c1534de --- /dev/null +++ b/generic/VolumetricMaxPooling.c @@ -0,0 +1,233 @@ +#ifndef TH_GENERIC_FILE +#define TH_GENERIC_FILE "generic/VolumetricMaxPooling.c" +#else + +static void nn_(VolumetricMaxPooling_updateOutput_frame)(real *input_p, real *output_p, + real *indx_p, real *indy_p, real *indz_p, + long nslices, + long itime, long iwidth, long iheight, + long otime, long owidth, long oheight, + int kT, int kW, int kH, int dT, int dW, int dH) +{ + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + /* loop over output */ + long i, j, ti; + for(ti = 0; ti < otime; ti++) + { + for(i = 0; i < oheight; i++) + { + for(j = 0; j < owidth; j++) + { + /* local pointers */ + real *ip = input_p + k*itime*iwidth*iheight + ti*iwidth*iheight*dT + i*iwidth*dH + j*dW; + real *op = output_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j; + real *indzp = indz_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j; + real *indyp = indy_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j; + real *indxp = indx_p + k*otime*owidth*oheight + ti*owidth*oheight + i*owidth + j; + + /* compute local max: */ + real maxval = -THInf; + *indzp = -1; + *indyp = -1; + *indxp = -1; + int x,y,z; + for(z=0; z < kT; z++) + { + for(y = 0; y < kH; y++) + { + for(x = 0; x < kW; x++) + { + real val = *(ip + z*iwidth*iheight + y*iwidth + x); + if (val > maxval) + { + maxval = val; + *indzp = z+1; + *indyp = y+1; + *indxp = x+1; + } + } + } + } + /* set output to local max */ + *op = maxval; + + /* store location of max (x,y) */ + /**indyp = (int)(maxindex / kW)+1;*/ + /**indxp = (maxindex % kW) +1;*/ + } + } + } + } +} + +static int nn_(VolumetricMaxPooling_updateOutput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_Tensor); + int kT = luaT_getfieldcheckint(L, 1, "kT"); + int kW = luaT_getfieldcheckint(L, 1, "kW"); + int kH = luaT_getfieldcheckint(L, 1, "kH"); + int dT = luaT_getfieldcheckint(L, 1, "dT"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); + THTensor *output = luaT_getfieldcheckudata(L, 1, "output", torch_Tensor); + long nslices; + long itime; + long iheight; + long iwidth; + long otime; + long oheight; + long owidth; + real *input_data; + real *output_data; + real *indices_data; + + + luaL_argcheck(L, input->nDimension == 4 , 2, "4D tensor expected"); + luaL_argcheck(L, input->size[3] >= kW && input->size[2] >= kH && input->size[1] >= kT, 2, "input image smaller than kernel size"); + + /* sizes */ + nslices = input->size[0]; + itime = input->size[1]; + iheight = input->size[2]; + iwidth = input->size[3]; + otime = (itime - kT) / dT + 1; + oheight = (iheight - kH) / dH + 1; + owidth = (iwidth - kW) / dW + 1; + + /* get contiguous input */ + input = THTensor_(newContiguous)(input); + + /* resize output */ + THTensor_(resize4d)(output, nslices, otime, oheight, owidth); + /* indices will contain ti,i,j locations for each output point */ + THTensor_(resize5d)(indices, 3, nslices, otime, oheight, owidth); + + input_data = THTensor_(data)(input); + output_data = THTensor_(data)(output); + indices_data = THTensor_(data)(indices); + + nn_(VolumetricMaxPooling_updateOutput_frame)(input_data, output_data, + indices_data+nslices*otime*owidth*oheight*2, + indices_data+nslices*otime*owidth*oheight, + indices_data, + nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + kT, kW, kH, dT, dW, dH); + /* cleanup */ + THTensor_(free)(input); + return 1; +} + +static void nn_(VolumetricMaxPooling_updateGradInput_frame)(real *gradInput_p, real *gradOutput_p, + real *indx_p, real *indy_p, real *indz_p, + long nslices, + long itime, long iwidth, long iheight, + long otime, long owidth, long oheight, + int dT, int dW, int dH) +{ + long k; +#pragma omp parallel for private(k) + for (k = 0; k < nslices; k++) + { + real *gradInput_p_k = gradInput_p + k*itime*iwidth*iheight; + real *gradOutput_p_k = gradOutput_p + k*otime*owidth*oheight; + real *indx_p_k = indx_p + k*otime*owidth*oheight; + real *indy_p_k = indy_p + k*otime*owidth*oheight; + real *indz_p_k = indz_p + k*otime*owidth*oheight; + + /* calculate max points */ + long ti, i, j; + for(ti = 0; ti < otime; ti++) + { + for(i = 0; i < oheight; i++) + { + for(j = 0; j < owidth; j++) + { + /* retrieve position of max */ + long maxti = indz_p_k[ti*oheight*owidth + i*owidth + j] - 1 + ti*dT; + long maxi = indy_p_k[ti*oheight*owidth + i*owidth + j] - 1 + i*dH; + long maxj = indx_p_k[ti*oheight*owidth + i*owidth + j] - 1 + j*dW; + + /* update gradient */ + gradInput_p_k[maxti*iheight*iwidth + maxi*iwidth + maxj] += gradOutput_p_k[ti*oheight*owidth + i*owidth + j]; + } + } + } + } +} + +static int nn_(VolumetricMaxPooling_updateGradInput)(lua_State *L) +{ + THTensor *input = luaT_checkudata(L, 2, torch_Tensor); + THTensor *gradOutput = luaT_checkudata(L, 3, torch_Tensor); + int dT = luaT_getfieldcheckint(L, 1, "dT"); + int dW = luaT_getfieldcheckint(L, 1, "dW"); + int dH = luaT_getfieldcheckint(L, 1, "dH"); + THTensor *indices = luaT_getfieldcheckudata(L, 1, "indices", torch_Tensor); + THTensor *gradInput = luaT_getfieldcheckudata(L, 1, "gradInput", torch_Tensor); + int nslices; + int itime; + int iheight; + int iwidth; + int otime; + int oheight; + int owidth; + real *gradInput_data; + real *gradOutput_data; + real *indices_data; + + /* get contiguous gradOutput */ + gradOutput = THTensor_(newContiguous)(gradOutput); + + /* resize */ + THTensor_(resizeAs)(gradInput, input); + THTensor_(zero)(gradInput); + + /* sizes */ + nslices = input->size[0]; + itime = input->size[1]; + iheight = input->size[2]; + iwidth = input->size[3]; + otime = gradOutput->size[1]; + oheight = gradOutput->size[2]; + owidth = gradOutput->size[3]; + + /* get raw pointers */ + gradInput_data = THTensor_(data)(gradInput); + gradOutput_data = THTensor_(data)(gradOutput); + indices_data = THTensor_(data)(indices); + + /* backprop */ + nn_(VolumetricMaxPooling_updateGradInput_frame)(gradInput_data, gradOutput_data, + indices_data+nslices*otime*owidth*oheight*2, + indices_data+nslices*otime*owidth*oheight, + indices_data, + nslices, + itime, iwidth, iheight, + otime, owidth, oheight, + dT, dW, dH); + + /* cleanup */ + THTensor_(free)(gradOutput); + return 1; +} + +static const struct luaL_Reg nn_(VolumetricMaxPooling__) [] = { + {"VolumetricMaxPooling_updateOutput", nn_(VolumetricMaxPooling_updateOutput)}, + {"VolumetricMaxPooling_updateGradInput", nn_(VolumetricMaxPooling_updateGradInput)}, + {NULL, NULL} +}; + +static void nn_(VolumetricMaxPooling_init)(lua_State *L) +{ + luaT_pushmetatable(L, torch_Tensor); + luaT_registeratname(L, nn_(VolumetricMaxPooling__), "nn"); + lua_pop(L,1); +} + +#endif diff --git a/init.c b/init.c index 538c572..877faa9 100644 --- a/init.c +++ b/init.c @@ -95,6 +95,9 @@ #include "generic/VolumetricConvolution.c" #include "THGenerateFloatTypes.h" +#include "generic/VolumetricMaxPooling.c" +#include "THGenerateFloatTypes.h" + #include "generic/MultiMarginCriterion.c" #include "THGenerateFloatTypes.h" @@ -142,6 +145,7 @@ int luaopen_libnn(lua_State *L) nn_FloatSpatialSubSampling_init(L); nn_FloatSpatialMaxPooling_init(L); nn_FloatVolumetricConvolution_init(L); + nn_FloatVolumetricMaxPooling_init(L); nn_FloatMultiMarginCriterion_init(L); nn_FloatMultiLabelMarginCriterion_init(L); nn_FloatL1Cost_init(L); @@ -176,6 +180,7 @@ int luaopen_libnn(lua_State *L) nn_DoubleSpatialSubSampling_init(L); nn_DoubleSpatialMaxPooling_init(L); nn_DoubleVolumetricConvolution_init(L); + nn_DoubleVolumetricMaxPooling_init(L); nn_DoubleMultiMarginCriterion_init(L); nn_DoubleMultiLabelMarginCriterion_init(L); nn_DoubleL1Cost_init(L); diff --git a/init.lua b/init.lua index 3f7535f..87f080e 100644 --- a/init.lua +++ b/init.lua @@ -75,6 +75,7 @@ include('SpatialContrastiveNormalization.lua') include('SpatialZeroPadding.lua') include('VolumetricConvolution.lua') +include('VolumetricMaxPooling.lua') include('ParallelTable.lua') include('ConcatTable.lua') diff --git a/test/test.lua b/test/test.lua index 1eb92ad..dd6be22 100644 --- a/test/test.lua +++ b/test/test.lua @@ -1300,10 +1300,10 @@ function nntest.TemporalSubSampling() end function nntest.TemporalMaxPooling() - local from = math.random(1,10) - local ki = math.random(1,10) - local si = math.random(1,4) - local outi = math.random(10,20) + local from = math.random(10,10) + local ki = math.random(5,10) + local si = math.random(1,2) + local outi = math.random(50,90) local ini = (outi-1)*si+ki local module = nn.TemporalMaxPooling(ki, si) local input = torch.Tensor(ini, from):zero() @@ -1364,6 +1364,32 @@ function nntest.VolumetricConvolution() mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ') end +function nntest.VolumetricMaxPooling() + local from = math.random(2,5) + local to = from + local kt = math.random(3,7) + local ki = math.random(3,7) + local kj = math.random(3,7) + local st = math.random(2,4) + local si = math.random(2,4) + local sj = math.random(2,4) + local outt = math.random(3,7) + local outi = math.random(3,7) + local outj = math.random(3,7) + local int = (outt-1)*st+kt + local ini = (outi-1)*si+ki + local inj = (outj-1)*sj+kj + local module = nn.VolumetricMaxPooling(kt, ki, kj, st, si, sj) + local input = torch.Tensor(from, int, inj, ini):zero() + + local err = jac.testJacobian(module, input) + mytester:assertlt(err, precision, 'error on state ') + + local ferr, berr = jac.testIO(module, input) + mytester:asserteq(0, ferr, torch.typename(module) .. ' - i/o forward err ') + mytester:asserteq(0, berr, torch.typename(module) .. ' - i/o backward err ') +end + function nntest.Module_getParameters_1() local n = nn.Sequential() n:add( nn.Linear(10,10) ) -- cgit v1.2.3 From c5d2bf84c33fd8fc6669842e5f19f8f47470c443 Mon Sep 17 00:00:00 2001 From: koray kavukcuoglu Date: Tue, 15 Oct 2013 01:27:17 +0100 Subject: add 3D convolution documentation --- dok/index.dok | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/dok/index.dok b/dok/index.dok index 858a11a..baac983 100644 --- a/dok/index.dok +++ b/dok/index.dok @@ -1377,6 +1377,45 @@ output[i][j][k] = bias[k] * input[dW*(i-1)+s)][dH*(j-1)+t][l] +==== VolumetricConvolution ==== +{{anchor:nn.VolumetricConvolution}} + + +module = nn.VolumetricConvolution(nInputPlane, nOutputPlane, kT, kW, kH [, dT, dW, dH]) + + +Applies a 3D convolution over an input image composed of several input planes. The ''input'' tensor in +''forward(input)'' is expected to be a 4D tensor (''nInputPlane x time x height x width''). + +The parameters are the following: + * ''nInputPlane'': The number of expected input planes in the image given into ''forward()''. + * ''nOutputPlane'': The number of output planes the convolution layer will produce. + * ''kT'': The kernel size of the convolution in time + * ''kW'': The kernel width of the convolution + * ''kH'': The kernel height of the convolution + * ''dT'': The step of the convolution in the time dimension. Default is ''1''. + * ''dW'': The step of the convolution in the width dimension. Default is ''1''. + * ''dH'': The step of the convolution in the height dimension. Default is ''1''. + +Note that depending of the size of your kernel, several (of the last) +columns or rows of the input image might be lost. It is up to the user to +add proper padding in images. + +If the input image is a 4D tensor ''nInputPlane x time x height x width'', the output image size +will be ''nOutputPlane x otime x owidth x oheight'' where + +otime = (time - kT) / dT + 1 +owidth = (width - kW) / dW + 1 +oheight = (height - kH) / dH + 1 . + + +The parameters of the convolution can be found in ''self.weight'' (Tensor of +size ''nOutputPlane x nInputPlane x kT x kH x kW'') and ''self.bias'' (Tensor of +size ''nOutputPlane''). The corresponding gradients can be found in +''self.gradWeight'' and ''self.gradBias''. + + + ==== SpatialConvolutionMap ==== {{anchor:nn.SpatialConvolutionMap}} -- cgit v1.2.3 From 7649753354b266b31fa1bb2bf2e3422cf1ece8ef Mon Sep 17 00:00:00 2001 From: Ronan Collobert Date: Wed, 16 Oct 2013 11:19:27 +0200 Subject: added strict c89 flags --- CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 93c5dc6..2bfd582 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,6 +2,10 @@ CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR) CMAKE_POLICY(VERSION 2.6) FIND_PACKAGE(Torch REQUIRED) +IF("${CMAKE_C_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "GNU") + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c89 -pedantic") +ENDIF() + SET(src init.c) FILE(GLOB luasrc *.lua) -- cgit v1.2.3 From b994633771dd31d2597307bf85068176662f90ac Mon Sep 17 00:00:00 2001 From: Ronan Collobert Date: Wed, 16 Oct 2013 11:49:00 +0200 Subject: C89 --- generic/VolumetricMaxPooling.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/generic/VolumetricMaxPooling.c b/generic/VolumetricMaxPooling.c index c1534de..20f9701 100644 --- a/generic/VolumetricMaxPooling.c +++ b/generic/VolumetricMaxPooling.c @@ -30,10 +30,11 @@ static void nn_(VolumetricMaxPooling_updateOutput_frame)(real *input_p, real *ou /* compute local max: */ real maxval = -THInf; + int x,y,z; + *indzp = -1; *indyp = -1; *indxp = -1; - int x,y,z; for(z=0; z < kT; z++) { for(y = 0; y < kH; y++) -- cgit v1.2.3 From fd76fcc7787eac37757e9b9ccfda67da1cc9920a Mon Sep 17 00:00:00 2001 From: koray kavukcuoglu Date: Wed, 16 Oct 2013 23:13:52 +0100 Subject: correct the stochastic case check --- MarginRankingCriterion.lua | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MarginRankingCriterion.lua b/MarginRankingCriterion.lua index 5012c2a..30c6855 100644 --- a/MarginRankingCriterion.lua +++ b/MarginRankingCriterion.lua @@ -8,8 +8,8 @@ function MarginRankingCriterion:__init(margin) end function MarginRankingCriterion:updateOutput(input,y) - if type(input[1]) == "number" then - self.output=math.max(0, -y*(input[1]-input[2]) + self.margin ) + if input[1]:size(1) == 1 then + self.output=math.max(0, -y*(input[1][1]-input[2][1]) + self.margin ) else if type(self.output) == "number" then self.output = input[1]:clone() @@ -34,7 +34,7 @@ function MarginRankingCriterion:updateOutput(input,y) end function MarginRankingCriterion:updateGradInput(input, y) - if type(input[1]) == "number" then + if input[1]:size(1) == 1 then local dist = -y*(input[1][1]-input[2][1]) + self.margin if dist < 0 then self.gradInput[1][1]=0; -- cgit v1.2.3 From b8f17d4136410b80bf621ff660abb8ab1ed7bf5e Mon Sep 17 00:00:00 2001 From: koray kavukcuoglu Date: Wed, 16 Oct 2013 23:49:29 +0100 Subject: init self.output with a single element tensor, not an empty tensor --- PairwiseDistance.lua | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PairwiseDistance.lua b/PairwiseDistance.lua index f108b97..d9e6f81 100644 --- a/PairwiseDistance.lua +++ b/PairwiseDistance.lua @@ -5,7 +5,7 @@ function PairwiseDistance:__init(p) -- state self.gradInput = {torch.Tensor(), torch.Tensor()} - self.output = torch.Tensor() + self.output = torch.Tensor(1) self.norm=p end -- cgit v1.2.3