Merge pull request #104 from colesbury/warp

Fix bicubic interpolation in image.warp
author: Soumith Chintala <soumith@gmail.com> 2015-09-12 00:30:19 +0300
committer: Soumith Chintala <soumith@gmail.com> 2015-09-12 00:30:19 +0300
commit: 1c013e5cc89e6831b65b2fef3c9c5a8a6a9e57d0 (patch)
tree: d15ad955f2f8ea64a3110782d4464ffff67210fc
parent: b17ed1930195d90b51a4343ef51f674494f1e803 (diff)
parent: b5fc8d082081b6533b427e101ac617171e792754 (diff)
1 files changed, 51 insertions, 55 deletions
diff --git a/generic/image.c b/generic/image.c
index 2e8223c..5ac3919 100755
--- a/generic/image.c
+++ b/generic/image.c
@@ -1642,6 +1642,48 @@ int image_(Main_flip)(lua_State *L) {
   return 0;
 }
 
+static inline real image_(Main_cubicInterpolate)(real p0, real p1, real p2, real p3, real x)
+{
+  return p1 + 0.5 * x * (p2 - p0 + x * (2 * p0 - 5 * p1 + 4 * p2 - p3 + x * (3 * (p1 - p2) + p3 - p0)));
+}
+
+static inline image_(Main_bicubicInterpolate)(
+  real* src, long* is, long* size, real ix, real iy,
+  real* dst, long *os,
+  real pad_value, int bounds_check)
+{
+  int i, j, k;
+  real arr[4], p[4];
+
+  // Calculate fractional and integer components
+  long x_pix = floor(ix);
+  long y_pix = floor(iy);
+  real dx = ix - (real)x_pix;
+  real dy = iy - (real)y_pix;
+
+  for (k=0; k<size[0]; k++) {
+    #pragma unroll
+    for (i = 0; i < 4; i++) {
+      long v = y_pix + i - 1;
+      real* data = &src[k * is[0] + v * is[1]];
+
+      #pragma unroll
+      for (j = 0; j < 4; j++) {
+        long u = x_pix + j - 1;
+        if (bounds_check && (v < 0 || v >= size[2] || u < 0 || u >= size[3])) {
+          p[j] = pad_value;
+        } else {
+          p[j] = data[u * is[2]];
+        }
+      }
+
+      arr[i] = image_(Main_cubicInterpolate)(p[0], p[1], p[2], p[3], dx);
+    }
+
+    dst[k * os[0]] = image_(Main_cubicInterpolate)(arr[0], arr[1], arr[2], arr[3], dy);
+  }
+}
+
 /*
  * Warps an image, according to an (x,y) flow field. The flow
  * field is in the space of the destination image, each vector
@@ -1741,61 +1783,15 @@ int image_(Main_warp)(lua_State *L) {
           break;
         case 2:  // Bicubic
           {
-	        // Calculate fractional and integer components
-            long x_pix = floor(ix);
-            long y_pix = floor(iy);
-            real dx = ix - (real)x_pix;
-            real dy = iy - (real)y_pix;
-
-            real C[4];
-            for (k=0; k<channels; k++) {
-              // Sweep by rows through the samples (to calculate final cubic coefs)
-              for (jj = 0; jj <= 3; jj++) {
-                v = y_pix - 1 + jj;
-                // We need to clamp all uv values to image border: hopefully
-                // branch prediction and compiler reordering takes care of all
-                // the conditionals (since the branch probabilities are heavily
-                // skewed).  Alternatively an inline "getPixelSafe" function would
-                // would be clearer here, but cannot be done with lua?
-                v = MAX(MIN((long)(src_height-1), v), 0);
-                long ofst = k * is[0] + v * is[1];
-                u = x_pix;
-                u = MAX(MIN((long)(src_width-1), u), 0);
-                real a0 = src_data[ofst + u * is[2]];
-                u = x_pix - 1;
-                u = MAX(MIN((long)(src_width-1), u), 0);
-                real d0 = src_data[ofst + u * is[2]] - a0;
-                u = x_pix + 1;
-                u = MAX(MIN((long)(src_width-1), u), 0);
-                real d2 = src_data[ofst + u * is[2]] - a0;
-                u = x_pix + 2;
-                u = MAX(MIN((long)(src_width-1), u), 0);
-                real d3 = src_data[ofst + u * is[2]] - a0;
-
-                // Note: there are mostly static casts, optimizer will take care of
-                // of it for us (prevents compiler warnings in new gcc)
-                real a1 =  -(real)1/(real)3*d0 + d2 -(real)1/(real)6*d3;
-                real a2 = (real)1/(real)2*d0 + (real)1/(real)2*d2;
-                real a3 = -(real)1/(real)6*d0 - (real)1/(real)2*d2 +
-                  (real)1/(real)6*d3;
-                C[jj] = a0 + dx * (a1 + dx * (a2 + a3 * dx));
-              }
-
-              real d0 = C[0]-C[1];
-              real d2 = C[2]-C[1];
-              real d3 = C[3]-C[1];
-              real a0 = C[1];
-              real a1 = -(real)1/(real)3*d0 + d2 - (real)1/(real)6*d3;
-              real a2 = (real)1/(real)2*d0 + (real)1/(real)2*d2;
-              real a3 = -(real)1/(real)6*d0 - (real)1/(real)2*d2 +
-                (real)1/(real)6*d3;
-              real Cc = a0 + dy * (a1 + dy * (a2 + a3 * dy));
-
-              // I assume that since the image is stored as reals we don't have
-              // to worry about clamping to min and max int (to prevent over or
-              // underflow)
-              dst_data[ k*os[0] + y*os[1] + x*os[2] ] = Cc;
-	        }
+            // We only need to do bounds checking if ix or iy are near the edge
+            int edge = !(iy >= 1 && iy < height - 2 && ix >= 1 && ix < width - 2);
+
+            real* dst = dst_data + y*os[1] + x*os[2];
+            if (edge) {
+              image_(Main_bicubicInterpolate)(src_data, is, src->size, ix, iy, dst, os, pad_value, 1);
+            } else {
+              image_(Main_bicubicInterpolate)(src_data, is, src->size, ix, iy, dst, os, pad_value, 0);
+            }
           }
           break;
         case 3:  // Lanczos
author	Soumith Chintala <soumith@gmail.com>	2015-09-12 00:30:19 +0300
committer	Soumith Chintala <soumith@gmail.com>	2015-09-12 00:30:19 +0300
commit	1c013e5cc89e6831b65b2fef3c9c5a8a6a9e57d0 (patch)
tree	d15ad955f2f8ea64a3110782d4464ffff67210fc
parent	b17ed1930195d90b51a4343ef51f674494f1e803 (diff)
parent	b5fc8d082081b6533b427e101ac617171e792754 (diff)