Modified the comportment of SpatialMatching with full_output = false

author: Michael 'myrhev' Mathieu <michael.mathieu@ens.fr> 2012-03-10 11:12:52 +0400
committer: Michael 'myrhev' Mathieu <michael.mathieu@ens.fr> 2012-03-10 11:12:52 +0400
commit: b8d8c0b682eb146453fda721907a58dae4cb9335 (patch)
tree: eae756ba68644ece2e68f3de8f6c517e0231083c /generic
parent: 90503963e940a592205f66a704b3d1e96e894c2c (diff)
1 files changed, 41 insertions, 37 deletions
diff --git a/generic/SpatialMatching.c b/generic/SpatialMatching.c
index ffde291..d34d61d 100644
--- a/generic/SpatialMatching.c
+++ b/generic/SpatialMatching.c
@@ -34,16 +34,16 @@ static int nn_(SpatialMatching_updateOutput)(lua_State *L)
   real *input2_p = THTensor_(data)(input2);
   real *output_p = THTensor_(data)(output);
 
-  // get halves of window size
-  int halfh1 = ceil((real)maxh/2)-1;
-  int halfh2 = floor((real)maxh/2)+1;
-  int halfw1 = ceil((real)maxw/2)-1;
-  int halfw2 = floor((real)maxw/2)+1;
-
   // compute output
   int x1,y1,x2,y2,k;
   if (full_output) {
-#pragma omp parallel for private(x1,y1,x2,y2,k)
+    // get halves of window size
+    int halfh1 = ceil((real)maxh/2)-1;
+    int halfh2 = floor((real)maxh/2)+1;
+    int halfw1 = ceil((real)maxw/2)-1;
+    int halfw2 = floor((real)maxw/2)+1;
+
+    //#pragma omp parallel for private(x1,y1,x2,y2,k)
     for (y1 = 0; y1 < iheight; y1++) {
       for (x1 = 0; x1 < iwidth; x1++) {
 	for (y2 = max(0,y1-halfh1); y2 < min(iheight,y1+halfh2); y2++) {
@@ -51,27 +51,29 @@ static int nn_(SpatialMatching_updateOutput)(lua_State *L)
 	    real dist = 0;
 	    for (k=0; k<ichannels; k++) {
 	      dist += square(input1_p[k*i1s[0] + y1*i1s[1] + x1*i1s[2]] - input2_p[k*i2s[0] + y2*i2s[1] + x2*i2s[2]]);
+	      //dist += square(THTensor_(get3d)(input1, k, y1, x1) - THTensor_(get3d)(input2, k, y2, x2));
 	    }
 	    long dy = y2-y1 + halfh1;
 	    long dx = x2-x1 + halfw1;
 	    output_p[y1*os[0] + x1*os[1] + dy*os[2] + dx*os[3]] = dist;
+	    //THTensor_(set4d)(output, y1, x1, dy, dx, dist);
 	  }
 	}
       }
     }
   } else {
-#pragma omp parallel for private(x1,y1,x2,y2,k)
-    for (y1 = halfh1; y1 < iheight-halfh2; y1++) {
-      for (x1 = halfw1; x1 < iwidth-halfw2; x1++) {
-	for (y2 = y1-halfh1; y2 < y1+halfh2; y2++) {
-	  for (x2 = x1-halfw1; x2 < x1+halfw2; x2++) {
+    //#pragma omp parallel for private(x1,y1,x2,y2,k)
+    for (y1 = 0; y1 < iheight; y1++) {
+      for (x1 = 0; x1 < iwidth; x1++) {
+	for (y2 = y1; y2 < y1+maxh; y2++) {
+	  for (x2 = x1; x2 < x1+maxw; x2++) {
 	    real dist = 0;
 	    for (k = 0; k < ichannels; k++) {
 	      dist += square(input1_p[k*i1s[0] + y1*i1s[1] + x1*i1s[2]] - input2_p[k*i2s[0] + y2*i2s[1] + x2*i2s[2]]);
+	      //dist += square(THTensor_(get3d)(input1, k, y1, x1) - THTensor_(get3d)(input2, k, y2, x2));
 	    }
-	    long dy = y2-y1 + halfh1;
-	    long dx = x2-x1 + halfw1;
-	    output_p[(y1-halfh1)*os[0] + (x1-halfw1)*os[1] + dy*os[2] + dx*os[3]] = dist;
+	    output_p[y1*os[0] + x1*os[1] + (y2-y1)*os[2] + (x2-x1)*os[3]] = dist;
+	    //THTensor_(set4d)(output, y1, x1, y2-y1, x2-x1, dist);
 	  }
 	}
       }
@@ -100,13 +102,10 @@ static int nn_(SpatialMatching_updateGradInput)(lua_State *L)
   int iwidth = input1->size[2];
   int iheight = input1->size[1];
   int ichannels = input1->size[0];
-  //int owidth = gradOutput->size[2];
-  //int oheight = gradOutput->size[1];
-  //int ochannels = gradOutput->size[0];
 
   // resize gradInput
-  THTensor_(zero)(gradInput1);
-  THTensor_(zero)(gradInput2);
+  //THTensor_(zero)(gradInput1);
+  //THTensor_(zero)(gradInput2);
 
   // get strides
   long *i1s = input1->stride;
@@ -124,15 +123,15 @@ static int nn_(SpatialMatching_updateGradInput)(lua_State *L)
   //real *output_p = THTensor_(data)(output);
   real *gradOutput_p = THTensor_(data)(gradOutput);
   
-  // get halves of window size
-  int halfh1 = ceil((real)maxh/2)-1;
-  int halfh2 = floor((real)maxh/2)+1;
-  int halfw1 = ceil((real)maxw/2)-1;
-  int halfw2 = floor((real)maxw/2)+1;
-
   // compute gradients
   int x1, y1, x2, y2, k;
   if (full_output) {
+    // get halves of window size
+    int halfh1 = ceil((real)maxh/2)-1;
+    int halfh2 = floor((real)maxh/2)+1;
+    int halfw1 = ceil((real)maxw/2)-1;
+    int halfw2 = floor((real)maxw/2)+1;
+
 #pragma omp parallel for private(x1,y1,x2,y2,k)
     for (y1 = 0; y1 < iheight; y1++) {
       for (x1 = 0; x1 < iwidth; x1++) {
@@ -147,26 +146,31 @@ static int nn_(SpatialMatching_updateGradInput)(lua_State *L)
 	      partial_d *= gradOutput_p[y1*gos[0] + x1*gos[1] + dy*gos[2] + dx*gos[3]];
 	      gradInput1_p[k*gi1s[0] + y1*gi1s[1] + x1*gi1s[2]] += partial_d;
 	      gradInput2_p[k*gi2s[0] + y2*gi2s[1] + x2*gi2s[2]] -= partial_d;
+	      //real partial_d = 2*(THTensor_(get3d)(input1, k, y1, x1) - THTensor_(get3d)(input2, k, y2, x2));
+	      //partial_d *= THTensor_(get4d)(gradOutput, y1, x1, dy, dx);
+	      //THTensor_(set3d)(gradInput1, k, y1, x1, THTensor_(get3d)(gradInput1, k, y1, x1) + partial_d);
+	      //THTensor_(set3d)(gradInput2, k, y2, x2, THTensor_(get3d)(gradInput2, k, y2, x2) - partial_d);
 	    }
 	  }
 	}
       }
     }
   } else {
-#pragma omp parallel for private(x1,y1,x2,y2,k)
-    for (y1 = halfh1; y1 < iheight-halfh2; y1++) {
-      for (x1 = halfw1; x1 < iwidth-halfw2; x1++) {
-	for (y2 = y1-halfh1; y2 < y1+halfh2; y2++) {
-	  for (x2 = x1-halfw1; x2 < x1+halfw2; x2++) {
-	    long dy = y2-y1 + halfh1;
-	    long dx = x2-x1 + halfw1;
-	    for (k=0; k<ichannels; k++) {
+    //#pragma omp parallel for private(x1,y1,x2,y2,k)
+    for (y1 = 0; y1 < iheight; y1++) {
+      for (x1 = 0; x1 < iwidth; x1++) {
+	for (y2 = y1; y2 < y1+maxh; y2++) {
+	  for (x2 = x1; x2 < x1+maxw; x2++) {
+	    for (k = 0; k < ichannels; k++) {
 	      real partial_d = 2*(input1_p[k*i1s[0] + y1*i1s[1] + x1*i1s[2]] - input2_p[k*i2s[0] + y2*i2s[1] + x2*i2s[2]]);
-	      /*if (partial_d != 0)
-		partial_d /= output_p[(int)(y1-ceil(maxh/2))*os[0] + (int)(x1-ceil(maxw/2))*os[1] + dy*os[2] + dx*os[3]];*/
-	      partial_d *= gradOutput_p[(y1-halfh1)*gos[0] + (x1-halfw1)*gos[1] + dy*gos[2] + dx*gos[3]];
+	      partial_d *= gradOutput_p[y1*gos[0]+x1*gos[1]+(y2-y1)*gos[2]+(x2-x1)*gos[3]];
 	      gradInput1_p[k*gi1s[0] + y1*gi1s[1] + x1*gi1s[2]] += partial_d;
 	      gradInput2_p[k*gi2s[0] + y2*gi2s[1] + x2*gi2s[2]] -= partial_d;
+	      //real partial_d = 2*(THTensor_(get3d)(input1, k, y1, x1) - THTensor_(get3d)(input2, k, y2, x2));
+	      //partial_d *= THTensor_(get4d)(gradOutput, y1, x1, y2-y1, x2-x1);
+	      //THTensor_(set3d)(gradInput1, k, y1, x1, THTensor_(get3d)(gradInput1, k, y1, x1) + partial_d);
+	      //THTensor_(set3d)(gradInput2, k, y2, x2, THTensor_(get3d)(gradInput2, k, y2, x2) - partial_d);
+	      
 	    }
 	  }
 	}
author	Michael 'myrhev' Mathieu <michael.mathieu@ens.fr>	2012-03-10 11:12:52 +0400
committer	Michael 'myrhev' Mathieu <michael.mathieu@ens.fr>	2012-03-10 11:12:52 +0400
commit	b8d8c0b682eb146453fda721907a58dae4cb9335 (patch)
tree	eae756ba68644ece2e68f3de8f6c517e0231083c /generic
parent	90503963e940a592205f66a704b3d1e96e894c2c (diff)