diff options
author | Michael 'myrhev' Mathieu <michael.mathieu@ens.fr> | 2012-03-10 11:12:52 +0400 |
---|---|---|
committer | Michael 'myrhev' Mathieu <michael.mathieu@ens.fr> | 2012-03-10 11:12:52 +0400 |
commit | b8d8c0b682eb146453fda721907a58dae4cb9335 (patch) | |
tree | eae756ba68644ece2e68f3de8f6c517e0231083c /generic | |
parent | 90503963e940a592205f66a704b3d1e96e894c2c (diff) |
Modified the comportment of SpatialMatching with full_output = false
Diffstat (limited to 'generic')
-rw-r--r-- | generic/SpatialMatching.c | 78 |
1 files changed, 41 insertions, 37 deletions
diff --git a/generic/SpatialMatching.c b/generic/SpatialMatching.c index ffde291..d34d61d 100644 --- a/generic/SpatialMatching.c +++ b/generic/SpatialMatching.c @@ -34,16 +34,16 @@ static int nn_(SpatialMatching_updateOutput)(lua_State *L) real *input2_p = THTensor_(data)(input2); real *output_p = THTensor_(data)(output); - // get halves of window size - int halfh1 = ceil((real)maxh/2)-1; - int halfh2 = floor((real)maxh/2)+1; - int halfw1 = ceil((real)maxw/2)-1; - int halfw2 = floor((real)maxw/2)+1; - // compute output int x1,y1,x2,y2,k; if (full_output) { -#pragma omp parallel for private(x1,y1,x2,y2,k) + // get halves of window size + int halfh1 = ceil((real)maxh/2)-1; + int halfh2 = floor((real)maxh/2)+1; + int halfw1 = ceil((real)maxw/2)-1; + int halfw2 = floor((real)maxw/2)+1; + + //#pragma omp parallel for private(x1,y1,x2,y2,k) for (y1 = 0; y1 < iheight; y1++) { for (x1 = 0; x1 < iwidth; x1++) { for (y2 = max(0,y1-halfh1); y2 < min(iheight,y1+halfh2); y2++) { @@ -51,27 +51,29 @@ static int nn_(SpatialMatching_updateOutput)(lua_State *L) real dist = 0; for (k=0; k<ichannels; k++) { dist += square(input1_p[k*i1s[0] + y1*i1s[1] + x1*i1s[2]] - input2_p[k*i2s[0] + y2*i2s[1] + x2*i2s[2]]); + //dist += square(THTensor_(get3d)(input1, k, y1, x1) - THTensor_(get3d)(input2, k, y2, x2)); } long dy = y2-y1 + halfh1; long dx = x2-x1 + halfw1; output_p[y1*os[0] + x1*os[1] + dy*os[2] + dx*os[3]] = dist; + //THTensor_(set4d)(output, y1, x1, dy, dx, dist); } } } } } else { -#pragma omp parallel for private(x1,y1,x2,y2,k) - for (y1 = halfh1; y1 < iheight-halfh2; y1++) { - for (x1 = halfw1; x1 < iwidth-halfw2; x1++) { - for (y2 = y1-halfh1; y2 < y1+halfh2; y2++) { - for (x2 = x1-halfw1; x2 < x1+halfw2; x2++) { + //#pragma omp parallel for private(x1,y1,x2,y2,k) + for (y1 = 0; y1 < iheight; y1++) { + for (x1 = 0; x1 < iwidth; x1++) { + for (y2 = y1; y2 < y1+maxh; y2++) { + for (x2 = x1; x2 < x1+maxw; x2++) { real dist = 0; for (k = 0; k < ichannels; k++) { dist += square(input1_p[k*i1s[0] + y1*i1s[1] + x1*i1s[2]] - input2_p[k*i2s[0] + y2*i2s[1] + x2*i2s[2]]); + //dist += square(THTensor_(get3d)(input1, k, y1, x1) - THTensor_(get3d)(input2, k, y2, x2)); } - long dy = y2-y1 + halfh1; - long dx = x2-x1 + halfw1; - output_p[(y1-halfh1)*os[0] + (x1-halfw1)*os[1] + dy*os[2] + dx*os[3]] = dist; + output_p[y1*os[0] + x1*os[1] + (y2-y1)*os[2] + (x2-x1)*os[3]] = dist; + //THTensor_(set4d)(output, y1, x1, y2-y1, x2-x1, dist); } } } @@ -100,13 +102,10 @@ static int nn_(SpatialMatching_updateGradInput)(lua_State *L) int iwidth = input1->size[2]; int iheight = input1->size[1]; int ichannels = input1->size[0]; - //int owidth = gradOutput->size[2]; - //int oheight = gradOutput->size[1]; - //int ochannels = gradOutput->size[0]; // resize gradInput - THTensor_(zero)(gradInput1); - THTensor_(zero)(gradInput2); + //THTensor_(zero)(gradInput1); + //THTensor_(zero)(gradInput2); // get strides long *i1s = input1->stride; @@ -124,15 +123,15 @@ static int nn_(SpatialMatching_updateGradInput)(lua_State *L) //real *output_p = THTensor_(data)(output); real *gradOutput_p = THTensor_(data)(gradOutput); - // get halves of window size - int halfh1 = ceil((real)maxh/2)-1; - int halfh2 = floor((real)maxh/2)+1; - int halfw1 = ceil((real)maxw/2)-1; - int halfw2 = floor((real)maxw/2)+1; - // compute gradients int x1, y1, x2, y2, k; if (full_output) { + // get halves of window size + int halfh1 = ceil((real)maxh/2)-1; + int halfh2 = floor((real)maxh/2)+1; + int halfw1 = ceil((real)maxw/2)-1; + int halfw2 = floor((real)maxw/2)+1; + #pragma omp parallel for private(x1,y1,x2,y2,k) for (y1 = 0; y1 < iheight; y1++) { for (x1 = 0; x1 < iwidth; x1++) { @@ -147,26 +146,31 @@ static int nn_(SpatialMatching_updateGradInput)(lua_State *L) partial_d *= gradOutput_p[y1*gos[0] + x1*gos[1] + dy*gos[2] + dx*gos[3]]; gradInput1_p[k*gi1s[0] + y1*gi1s[1] + x1*gi1s[2]] += partial_d; gradInput2_p[k*gi2s[0] + y2*gi2s[1] + x2*gi2s[2]] -= partial_d; + //real partial_d = 2*(THTensor_(get3d)(input1, k, y1, x1) - THTensor_(get3d)(input2, k, y2, x2)); + //partial_d *= THTensor_(get4d)(gradOutput, y1, x1, dy, dx); + //THTensor_(set3d)(gradInput1, k, y1, x1, THTensor_(get3d)(gradInput1, k, y1, x1) + partial_d); + //THTensor_(set3d)(gradInput2, k, y2, x2, THTensor_(get3d)(gradInput2, k, y2, x2) - partial_d); } } } } } } else { -#pragma omp parallel for private(x1,y1,x2,y2,k) - for (y1 = halfh1; y1 < iheight-halfh2; y1++) { - for (x1 = halfw1; x1 < iwidth-halfw2; x1++) { - for (y2 = y1-halfh1; y2 < y1+halfh2; y2++) { - for (x2 = x1-halfw1; x2 < x1+halfw2; x2++) { - long dy = y2-y1 + halfh1; - long dx = x2-x1 + halfw1; - for (k=0; k<ichannels; k++) { + //#pragma omp parallel for private(x1,y1,x2,y2,k) + for (y1 = 0; y1 < iheight; y1++) { + for (x1 = 0; x1 < iwidth; x1++) { + for (y2 = y1; y2 < y1+maxh; y2++) { + for (x2 = x1; x2 < x1+maxw; x2++) { + for (k = 0; k < ichannels; k++) { real partial_d = 2*(input1_p[k*i1s[0] + y1*i1s[1] + x1*i1s[2]] - input2_p[k*i2s[0] + y2*i2s[1] + x2*i2s[2]]); - /*if (partial_d != 0) - partial_d /= output_p[(int)(y1-ceil(maxh/2))*os[0] + (int)(x1-ceil(maxw/2))*os[1] + dy*os[2] + dx*os[3]];*/ - partial_d *= gradOutput_p[(y1-halfh1)*gos[0] + (x1-halfw1)*gos[1] + dy*gos[2] + dx*gos[3]]; + partial_d *= gradOutput_p[y1*gos[0]+x1*gos[1]+(y2-y1)*gos[2]+(x2-x1)*gos[3]]; gradInput1_p[k*gi1s[0] + y1*gi1s[1] + x1*gi1s[2]] += partial_d; gradInput2_p[k*gi2s[0] + y2*gi2s[1] + x2*gi2s[2]] -= partial_d; + //real partial_d = 2*(THTensor_(get3d)(input1, k, y1, x1) - THTensor_(get3d)(input2, k, y2, x2)); + //partial_d *= THTensor_(get4d)(gradOutput, y1, x1, y2-y1, x2-x1); + //THTensor_(set3d)(gradInput1, k, y1, x1, THTensor_(get3d)(gradInput1, k, y1, x1) + partial_d); + //THTensor_(set3d)(gradInput2, k, y2, x2, THTensor_(get3d)(gradInput2, k, y2, x2) - partial_d); + } } } |