diff options
Diffstat (limited to 'extern')
-rw-r--r-- | extern/libmv/CMakeLists.txt | 2 | ||||
-rw-r--r-- | extern/libmv/libmv-capi.cpp | 149 | ||||
-rw-r--r-- | extern/libmv/libmv-capi.h | 26 | ||||
-rw-r--r-- | extern/libmv/libmv/image/sample.h | 64 | ||||
-rw-r--r-- | extern/libmv/libmv/multiview/homography.cc | 267 | ||||
-rw-r--r-- | extern/libmv/libmv/multiview/homography.h | 84 | ||||
-rw-r--r-- | extern/libmv/libmv/multiview/homography_parameterization.h | 91 | ||||
-rw-r--r-- | extern/libmv/libmv/tracking/esm_region_tracker.cc | 39 | ||||
-rw-r--r-- | extern/libmv/libmv/tracking/track_region.cc | 1391 | ||||
-rw-r--r-- | extern/libmv/libmv/tracking/track_region.h | 146 |
10 files changed, 2226 insertions, 33 deletions
diff --git a/extern/libmv/CMakeLists.txt b/extern/libmv/CMakeLists.txt index cf0ad1102e0..60cd84d89d4 100644 --- a/extern/libmv/CMakeLists.txt +++ b/extern/libmv/CMakeLists.txt @@ -62,6 +62,7 @@ set(SRC libmv/multiview/fundamental.cc libmv/multiview/projection.cc libmv/multiview/triangulation.cc + libmv/multiview/homography.cc libmv/numeric/numeric.cc libmv/numeric/poly.cc libmv/simple_pipeline/bundle.cc @@ -84,6 +85,7 @@ set(SRC libmv/tracking/pyramid_region_tracker.cc libmv/tracking/retrack_region_tracker.cc libmv/tracking/trklt_region_tracker.cc + libmv/tracking/track_region.cc third_party/fast/fast_10.c third_party/fast/fast_11.c diff --git a/extern/libmv/libmv-capi.cpp b/extern/libmv/libmv-capi.cpp index 6c20d76eeac..ffa065f08fe 100644 --- a/extern/libmv/libmv-capi.cpp +++ b/extern/libmv/libmv-capi.cpp @@ -28,6 +28,10 @@ tracking between which failed */ #undef DUMP_FAILURE +/* define this to generate PNG images with content of search areas + on every itteration of tracking */ +#undef DUMP_ALWAYS + #include "libmv-capi.h" #include "third_party/gflags/gflags/gflags.h" @@ -45,6 +49,7 @@ #include "libmv/tracking/trklt_region_tracker.h" #include "libmv/tracking/lmicklt_region_tracker.h" #include "libmv/tracking/pyramid_region_tracker.h" +#include "libmv/tracking/track_region.h" #include "libmv/simple_pipeline/callbacks.h" #include "libmv/simple_pipeline/tracks.h" @@ -59,7 +64,7 @@ #include <stdlib.h> #include <assert.h> -#ifdef DUMP_FAILURE +#if defined(DUMP_FAILURE) || defined (DUMP_ALWAYS) # include <png.h> #endif @@ -97,7 +102,7 @@ void libmv_initLogging(const char *argv0) void libmv_startDebugLogging(void) { google::SetCommandLineOption("logtostderr", "1"); - google::SetCommandLineOption("v", "0"); + google::SetCommandLineOption("v", "2"); google::SetCommandLineOption("stderrthreshold", "1"); google::SetCommandLineOption("minloglevel", "0"); V3D::optimizerVerbosenessLevel = 1; @@ -158,20 +163,35 @@ libmv_RegionTracker *libmv_bruteRegionTrackerNew(int half_window_size, double mi return (libmv_RegionTracker *)brute_region_tracker; } -static void floatBufToImage(const float *buf, int width, int height, libmv::FloatImage *image) +static void floatBufToImage(const float *buf, int width, int height, int channels, libmv::FloatImage *image) { - int x, y, a = 0; + int x, y, k, a = 0; - image->resize(height, width); + image->Resize(height, width, channels); for (y = 0; y < height; y++) { for (x = 0; x < width; x++) { - (*image)(y, x, 0) = buf[a++]; + for (k = 0; k < channels; k++) { + (*image)(y, x, k) = buf[a++]; + } + } + } +} + +static void imageToFloatBuf(const libmv::FloatImage *image, int channels, float *buf) +{ + int x, y, k, a = 0; + + for (y = 0; y < image->Height(); y++) { + for (x = 0; x < image->Width(); x++) { + for (k = 0; k < channels; k++) { + buf[a++] = (*image)(y, x, k); + } } } } -#ifdef DUMP_FAILURE +#if defined(DUMP_FAILURE) || defined (DUMP_ALWAYS) void savePNGImage(png_bytep *row_pointers, int width, int height, int depth, int color_type, char *file_name) { png_infop info_ptr; @@ -234,14 +254,14 @@ static void saveImage(char *prefix, libmv::FloatImage image, int x0, int y0) row_pointers[y]= (png_bytep)malloc(sizeof(png_byte)*4*image.Width()); for (x = 0; x < image.Width(); x++) { - if (x0 == x && y0 == y) { + if (x0 == x && image.Height() - y0 - 1 == y) { row_pointers[y][x*4+0]= 255; row_pointers[y][x*4+1]= 0; row_pointers[y][x*4+2]= 0; row_pointers[y][x*4+3]= 255; } else { - float pixel = image(y, x, 0); + float pixel = image(image.Height() - y - 1, x, 0); row_pointers[y][x*4+0]= pixel*255; row_pointers[y][x*4+1]= pixel*255; row_pointers[y][x*4+2]= pixel*255; @@ -302,19 +322,23 @@ int libmv_regionTrackerTrack(libmv_RegionTracker *libmv_tracker, const float *im libmv::RegionTracker *region_tracker = (libmv::RegionTracker *)libmv_tracker; libmv::FloatImage old_patch, new_patch; - floatBufToImage(ima1, width, height, &old_patch); - floatBufToImage(ima2, width, height, &new_patch); + floatBufToImage(ima1, width, height, 1, &old_patch); + floatBufToImage(ima2, width, height, 1, &new_patch); -#ifndef DUMP_FAILURE +#if !defined(DUMP_FAILURE) && !defined(DUMP_ALWAYS) return region_tracker->Track(old_patch, new_patch, x1, y1, x2, y2); #else { - double sx2 = *x2, sy2 = *y2; + /* double sx2 = *x2, sy2 = *y2; */ int result = region_tracker->Track(old_patch, new_patch, x1, y1, x2, y2); +#if defined(DUMP_ALWAYS) + { +#else if (!result) { +#endif saveImage("old_patch", old_patch, x1, y1); - saveImage("new_patch", new_patch, sx2, sy2); + saveImage("new_patch", new_patch, *x2, *y2); } return result; @@ -329,6 +353,103 @@ void libmv_regionTrackerDestroy(libmv_RegionTracker *libmv_tracker) delete region_tracker; } +/* ************ Planar tracker ************ */ + +/* TrackRegion (new planar tracker) */ +int libmv_trackRegion(const struct libmv_trackRegionOptions *options, + const float *image1, const float *image2, + int width, int height, + const double *x1, const double *y1, + struct libmv_trackRegionResult *result, + double *x2, double *y2) +{ + double xx1[5], yy1[5]; + double xx2[5], yy2[5]; + bool tracking_result = false; + + /* Convert to doubles for the libmv api. The four corners and the center. */ + for (int i = 0; i < 5; ++i) { + xx1[i] = x1[i]; + yy1[i] = y1[i]; + xx2[i] = x2[i]; + yy2[i] = y2[i]; + } + + libmv::TrackRegionOptions track_region_options; + switch (options->motion_model) { +#define LIBMV_CONVERT(the_model) \ + case libmv::TrackRegionOptions::the_model: \ + track_region_options.mode = libmv::TrackRegionOptions::the_model; \ + break; + LIBMV_CONVERT(TRANSLATION) + LIBMV_CONVERT(TRANSLATION_ROTATION) + LIBMV_CONVERT(TRANSLATION_SCALE) + LIBMV_CONVERT(TRANSLATION_ROTATION_SCALE) + LIBMV_CONVERT(AFFINE) + LIBMV_CONVERT(HOMOGRAPHY) +#undef LIBMV_CONVERT + } + + track_region_options.minimum_correlation = options->minimum_correlation; + track_region_options.max_iterations = options->num_iterations; + track_region_options.sigma = options->sigma; + track_region_options.num_extra_points = 1; + track_region_options.image1_mask = NULL; + track_region_options.use_brute_initialization = options->use_brute; + track_region_options.use_normalized_intensities = options->use_normalization; + + /* Convert from raw float buffers to libmv's FloatImage. */ + libmv::FloatImage old_patch, new_patch; + floatBufToImage(image1, width, height, 1, &old_patch); + floatBufToImage(image2, width, height, 1, &new_patch); + + libmv::TrackRegionResult track_region_result; + libmv::TrackRegion(old_patch, new_patch, xx1, yy1, track_region_options, xx2, yy2, &track_region_result); + + /* Convert to floats for the blender api. */ + for (int i = 0; i < 5; ++i) { + x2[i] = xx2[i]; + y2[i] = yy2[i]; + } + + /* TODO(keir): Update the termination string with failure details. */ + if (track_region_result.termination == libmv::TrackRegionResult::PARAMETER_TOLERANCE || + track_region_result.termination == libmv::TrackRegionResult::FUNCTION_TOLERANCE || + track_region_result.termination == libmv::TrackRegionResult::GRADIENT_TOLERANCE || + track_region_result.termination == libmv::TrackRegionResult::NO_CONVERGENCE) + { + tracking_result = true; + } + +#if defined(DUMP_FAILURE) || defined(DUMP_ALWAYS) +#if defined(DUMP_ALWAYS) + { +#else + if (!tracking_result) { +#endif + saveImage("old_patch", old_patch, x1[4], y1[4]); + saveImage("new_patch", new_patch, x2[4], y2[4]); + } +#endif + + return tracking_result; +} + +void libmv_samplePlanarPatch(const float *image, int width, int height, + int channels, const double *xs, const double *ys, + int num_samples_x, int num_samples_y, float *patch, + double *warped_position_x, double *warped_position_y) +{ + libmv::FloatImage libmv_image, libmv_patch; + + floatBufToImage(image, width, height, channels, &libmv_image); + + libmv::SamplePlanarPatch(libmv_image, xs, ys, num_samples_x, num_samples_y, + &libmv_patch, warped_position_x, warped_position_y); + + imageToFloatBuf(&libmv_patch, channels, patch); +} + /* ************ Tracks ************ */ libmv_Tracks *libmv_tracksNew(void) diff --git a/extern/libmv/libmv-capi.h b/extern/libmv/libmv-capi.h index bccc4706832..6f4b5dea384 100644 --- a/extern/libmv/libmv-capi.h +++ b/extern/libmv/libmv-capi.h @@ -50,6 +50,32 @@ int libmv_regionTrackerTrack(struct libmv_RegionTracker *libmv_tracker, const fl int width, int height, double x1, double y1, double *x2, double *y2); void libmv_regionTrackerDestroy(struct libmv_RegionTracker *libmv_tracker); +/* TrackRegion (new planar tracker) */ +struct libmv_trackRegionOptions { + int motion_model; + int num_iterations; + int use_brute; + int use_normalization; + double minimum_correlation; + double sigma; +}; +struct libmv_trackRegionResult { + int termination; + const char *termination_reason; + double correlation; +}; +int libmv_trackRegion(const struct libmv_trackRegionOptions *options, + const float *image1, const float *image2, + int width, int height, + const double *x1, const double *y1, + struct libmv_trackRegionResult *result, + double *x2, double *y2); + +void libmv_samplePlanarPatch(const float *image, int width, int height, + int channels, const double *xs, const double *ys, + int num_samples_x, int num_samples_y, float *patch, + double *warped_position_x, double *warped_position_y); + /* Tracks */ struct libmv_Tracks *libmv_tracksNew(void); void libmv_tracksInsert(struct libmv_Tracks *libmv_tracks, int image, int track, double x, double y); diff --git a/extern/libmv/libmv/image/sample.h b/extern/libmv/libmv/image/sample.h index e842747e6d4..a8850effeab 100644 --- a/extern/libmv/libmv/image/sample.h +++ b/extern/libmv/libmv/image/sample.h @@ -34,25 +34,22 @@ inline T SampleNearest(const Array3D<T> &image, return image(i, j, v); } -static inline void LinearInitAxis(float fx, int width, - int *x1, int *x2, - float *dx1, float *dx2) { - const int ix = int(fx); +inline void LinearInitAxis(float x, int size, + int *x1, int *x2, + float *dx) { + const int ix = static_cast<int>(x); if (ix < 0) { *x1 = 0; *x2 = 0; - *dx1 = 1; - *dx2 = 0; - } else if (ix > width-2) { - *x1 = width-1; - *x2 = width-1; - *dx1 = 1; - *dx2 = 0; + *dx = 1.0; + } else if (ix > size - 2) { + *x1 = size - 1; + *x2 = size - 1; + *dx = 1.0; } else { *x1 = ix; - *x2 = *x1 + 1; - *dx1 = *x2 - fx; - *dx2 = 1 - *dx1; + *x2 = ix + 1; + *dx = *x2 - x; } } @@ -60,18 +57,47 @@ static inline void LinearInitAxis(float fx, int width, template<typename T> inline T SampleLinear(const Array3D<T> &image, float y, float x, int v = 0) { int x1, y1, x2, y2; - float dx1, dy1, dx2, dy2; + float dx, dy; - LinearInitAxis(y, image.Height(), &y1, &y2, &dy1, &dy2); - LinearInitAxis(x, image.Width(), &x1, &x2, &dx1, &dx2); + // Take the upper left corner as integer pixel positions. + x -= 0.5; + y -= 0.5; + + LinearInitAxis(y, image.Height(), &y1, &y2, &dy); + LinearInitAxis(x, image.Width(), &x1, &x2, &dx); const T im11 = image(y1, x1, v); const T im12 = image(y1, x2, v); const T im21 = image(y2, x1, v); const T im22 = image(y2, x2, v); - return T(dy1 * ( dx1 * im11 + dx2 * im12 ) + - dy2 * ( dx1 * im21 + dx2 * im22 )); + return T( dy * ( dx * im11 + (1.0 - dx) * im12 ) + + (1 - dy) * ( dx * im21 + (1.0 - dx) * im22 )); +} + +/// Linear interpolation, of all channels. The sample is assumed to have the +/// same size as the number of channels in image. +template<typename T> +inline void SampleLinear(const Array3D<T> &image, float y, float x, T *sample) { + int x1, y1, x2, y2; + float dx, dy; + + // Take the upper left corner as integer pixel positions. + x -= 0.5; + y -= 0.5; + + LinearInitAxis(y, image.Height(), &y1, &y2, &dy); + LinearInitAxis(x, image.Width(), &x1, &x2, &dx); + + for (int i = 0; i < image.Depth(); ++i) { + const T im11 = image(y1, x1, i); + const T im12 = image(y1, x2, i); + const T im21 = image(y2, x1, i); + const T im22 = image(y2, x2, i); + + sample[i] = T( dy * ( dx * im11 + (1.0 - dx) * im12 ) + + (1 - dy) * ( dx * im21 + (1.0 - dx) * im22 )); + } } // Downsample all channels by 2. If the image has odd width or height, the last diff --git a/extern/libmv/libmv/multiview/homography.cc b/extern/libmv/libmv/multiview/homography.cc new file mode 100644 index 00000000000..366392f3923 --- /dev/null +++ b/extern/libmv/libmv/multiview/homography.cc @@ -0,0 +1,267 @@ +// Copyright (c) 2008, 2009 libmv authors. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. + +#include "libmv/logging/logging.h" +#include "libmv/multiview/homography.h" +#include "libmv/multiview/homography_parameterization.h" + +namespace libmv { +/** 2D Homography transformation estimation in the case that points are in + * euclidean coordinates. + * + * x = H y + * x and y vector must have the same direction, we could write + * crossproduct(|x|, * H * |y| ) = |0| + * + * | 0 -1 x2| |a b c| |y1| |0| + * | 1 0 -x1| * |d e f| * |y2| = |0| + * |-x2 x1 0| |g h 1| |1 | |0| + * + * That gives : + * + * (-d+x2*g)*y1 + (-e+x2*h)*y2 + -f+x2 |0| + * (a-x1*g)*y1 + (b-x1*h)*y2 + c-x1 = |0| + * (-x2*a+x1*d)*y1 + (-x2*b+x1*e)*y2 + -x2*c+x1*f |0| + */ +bool Homography2DFromCorrespondencesLinearEuc( + const Mat &x1, + const Mat &x2, + Mat3 *H, + double expected_precision) { + assert(2 == x1.rows()); + assert(4 <= x1.cols()); + assert(x1.rows() == x2.rows()); + assert(x1.cols() == x2.cols()); + + int n = x1.cols(); + MatX8 L = Mat::Zero(n * 3, 8); + Mat b = Mat::Zero(n * 3, 1); + for (int i = 0; i < n; ++i) { + int j = 3 * i; + L(j, 0) = x1(0, i); // a + L(j, 1) = x1(1, i); // b + L(j, 2) = 1.0; // c + L(j, 6) = -x2(0, i) * x1(0, i); // g + L(j, 7) = -x2(0, i) * x1(1, i); // h + b(j, 0) = x2(0, i); // i + + ++j; + L(j, 3) = x1(0, i); // d + L(j, 4) = x1(1, i); // e + L(j, 5) = 1.0; // f + L(j, 6) = -x2(1, i) * x1(0, i); // g + L(j, 7) = -x2(1, i) * x1(1, i); // h + b(j, 0) = x2(1, i); // i + + // This ensures better stability + // TODO(julien) make a lite version without this 3rd set + ++j; + L(j, 0) = x2(1, i) * x1(0, i); // a + L(j, 1) = x2(1, i) * x1(1, i); // b + L(j, 2) = x2(1, i); // c + L(j, 3) = -x2(0, i) * x1(0, i); // d + L(j, 4) = -x2(0, i) * x1(1, i); // e + L(j, 5) = -x2(0, i) ; // f + } + // Solve Lx=B + Vec h = L.fullPivLu().solve(b); + Homography2DNormalizedParameterization<double>::To(h, H); + if ((L * h).isApprox(b, expected_precision)) { + return true; + } else { + return false; + } +} + +/** 2D Homography transformation estimation in the case that points are in + * homogeneous coordinates. + * + * | 0 -x3 x2| |a b c| |y1| -x3*d+x2*g -x3*e+x2*h -x3*f+x2*1 |y1| (-x3*d+x2*g)*y1 (-x3*e+x2*h)*y2 (-x3*f+x2*1)*y3 |0| + * | x3 0 -x1| * |d e f| * |y2| = x3*a-x1*g x3*b-x1*h x3*c-x1*1 * |y2| = (x3*a-x1*g)*y1 (x3*b-x1*h)*y2 (x3*c-x1*1)*y3 = |0| + * |-x2 x1 0| |g h 1| |y3| -x2*a+x1*d -x2*b+x1*e -x2*c+x1*f |y3| (-x2*a+x1*d)*y1 (-x2*b+x1*e)*y2 (-x2*c+x1*f)*y3 |0| + * X = |a b c d e f g h|^t + */ +bool Homography2DFromCorrespondencesLinear(const Mat &x1, + const Mat &x2, + Mat3 *H, + double expected_precision) { + if (x1.rows() == 2) { + return Homography2DFromCorrespondencesLinearEuc(x1, x2, H, + expected_precision); + } + assert(3 == x1.rows()); + assert(4 <= x1.cols()); + assert(x1.rows() == x2.rows()); + assert(x1.cols() == x2.cols()); + + const int x = 0; + const int y = 1; + const int w = 2; + int n = x1.cols(); + MatX8 L = Mat::Zero(n * 3, 8); + Mat b = Mat::Zero(n * 3, 1); + for (int i = 0; i < n; ++i) { + int j = 3 * i; + L(j, 0) = x2(w, i) * x1(x, i);//a + L(j, 1) = x2(w, i) * x1(y, i);//b + L(j, 2) = x2(w, i) * x1(w, i);//c + L(j, 6) = -x2(x, i) * x1(x, i);//g + L(j, 7) = -x2(x, i) * x1(y, i);//h + b(j, 0) = x2(x, i) * x1(w, i); + + ++j; + L(j, 3) = x2(w, i) * x1(x, i);//d + L(j, 4) = x2(w, i) * x1(y, i);//e + L(j, 5) = x2(w, i) * x1(w, i);//f + L(j, 6) = -x2(y, i) * x1(x, i);//g + L(j, 7) = -x2(y, i) * x1(y, i);//h + b(j, 0) = x2(y, i) * x1(w, i); + + // This ensures better stability + ++j; + L(j, 0) = x2(y, i) * x1(x, i);//a + L(j, 1) = x2(y, i) * x1(y, i);//b + L(j, 2) = x2(y, i) * x1(w, i);//c + L(j, 3) = -x2(x, i) * x1(x, i);//d + L(j, 4) = -x2(x, i) * x1(y, i);//e + L(j, 5) = -x2(x, i) * x1(w, i);//f + } + // Solve Lx=B + Vec h = L.fullPivLu().solve(b); + if ((L * h).isApprox(b, expected_precision)) { + Homography2DNormalizedParameterization<double>::To(h, H); + return true; + } else { + return false; + } +} +/** + * x2 ~ A * x1 + * x2^t * Hi * A *x1 = 0 + * H1 = H2 = H3 = + * | 0 0 0 1| |-x2w| |0 0 0 0| | 0 | | 0 0 1 0| |-x2z| + * | 0 0 0 0| -> | 0 | |0 0 1 0| -> |-x2z| | 0 0 0 0| -> | 0 | + * | 0 0 0 0| | 0 | |0-1 0 0| | x2y| |-1 0 0 0| | x2x| + * |-1 0 0 0| | x2x| |0 0 0 0| | 0 | | 0 0 0 0| | 0 | + * H4 = H5 = H6 = + * |0 0 0 0| | 0 | | 0 1 0 0| |-x2y| |0 0 0 0| | 0 | + * |0 0 0 1| -> |-x2w| |-1 0 0 0| -> | x2x| |0 0 0 0| -> | 0 | + * |0 0 0 0| | 0 | | 0 0 0 0| | 0 | |0 0 0 1| |-x2w| + * |0-1 0 0| | x2y| | 0 0 0 0| | 0 | |0 0-1 0| | x2z| + * |a b c d| + * A = |e f g h| + * |i j k l| + * |m n o 1| + * + * x2^t * H1 * A *x1 = (-x2w*a +x2x*m )*x1x + (-x2w*b +x2x*n )*x1y + (-x2w*c +x2x*o )*x1z + (-x2w*d +x2x*1 )*x1w = 0 + * x2^t * H2 * A *x1 = (-x2z*e +x2y*i )*x1x + (-x2z*f +x2y*j )*x1y + (-x2z*g +x2y*k )*x1z + (-x2z*h +x2y*l )*x1w = 0 + * x2^t * H3 * A *x1 = (-x2z*a +x2x*i )*x1x + (-x2z*b +x2x*j )*x1y + (-x2z*c +x2x*k )*x1z + (-x2z*d +x2x*l )*x1w = 0 + * x2^t * H4 * A *x1 = (-x2w*e +x2y*m )*x1x + (-x2w*f +x2y*n )*x1y + (-x2w*g +x2y*o )*x1z + (-x2w*h +x2y*1 )*x1w = 0 + * x2^t * H5 * A *x1 = (-x2y*a +x2x*e )*x1x + (-x2y*b +x2x*f )*x1y + (-x2y*c +x2x*g )*x1z + (-x2y*d +x2x*h )*x1w = 0 + * x2^t * H6 * A *x1 = (-x2w*i +x2z*m )*x1x + (-x2w*j +x2z*n )*x1y + (-x2w*k +x2z*o )*x1z + (-x2w*l +x2z*1 )*x1w = 0 + * + * X = |a b c d e f g h i j k l m n o|^t +*/ +bool Homography3DFromCorrespondencesLinear(const Mat &x1, + const Mat &x2, + Mat4 *H, + double expected_precision) { + assert(4 == x1.rows()); + assert(5 <= x1.cols()); + assert(x1.rows() == x2.rows()); + assert(x1.cols() == x2.cols()); + const int x = 0; + const int y = 1; + const int z = 2; + const int w = 3; + int n = x1.cols(); + MatX15 L = Mat::Zero(n * 6, 15); + Mat b = Mat::Zero(n * 6, 1); + for (int i = 0; i < n; ++i) { + int j = 6 * i; + L(j, 0) = -x2(w, i) * x1(x, i);//a + L(j, 1) = -x2(w, i) * x1(y, i);//b + L(j, 2) = -x2(w, i) * x1(z, i);//c + L(j, 3) = -x2(w, i) * x1(w, i);//d + L(j,12) = x2(x, i) * x1(x, i);//m + L(j,13) = x2(x, i) * x1(y, i);//n + L(j,14) = x2(x, i) * x1(z, i);//o + b(j, 0) = -x2(x, i) * x1(w, i); + + ++j; + L(j, 4) = -x2(z, i) * x1(x, i);//e + L(j, 5) = -x2(z, i) * x1(y, i);//f + L(j, 6) = -x2(z, i) * x1(z, i);//g + L(j, 7) = -x2(z, i) * x1(w, i);//h + L(j, 8) = x2(y, i) * x1(x, i);//i + L(j, 9) = x2(y, i) * x1(y, i);//j + L(j,10) = x2(y, i) * x1(z, i);//k + L(j,11) = x2(y, i) * x1(w, i);//l + + ++j; + L(j, 0) = -x2(z, i) * x1(x, i);//a + L(j, 1) = -x2(z, i) * x1(y, i);//b + L(j, 2) = -x2(z, i) * x1(z, i);//c + L(j, 3) = -x2(z, i) * x1(w, i);//d + L(j, 8) = x2(x, i) * x1(x, i);//i + L(j, 9) = x2(x, i) * x1(y, i);//j + L(j,10) = x2(x, i) * x1(z, i);//k + L(j,11) = x2(x, i) * x1(w, i);//l + + ++j; + L(j, 4) = -x2(w, i) * x1(x, i);//e + L(j, 5) = -x2(w, i) * x1(y, i);//f + L(j, 6) = -x2(w, i) * x1(z, i);//g + L(j, 7) = -x2(w, i) * x1(w, i);//h + L(j,12) = x2(y, i) * x1(x, i);//m + L(j,13) = x2(y, i) * x1(y, i);//n + L(j,14) = x2(y, i) * x1(z, i);//o + b(j, 0) = -x2(y, i) * x1(w, i); + + ++j; + L(j, 0) = -x2(y, i) * x1(x, i);//a + L(j, 1) = -x2(y, i) * x1(y, i);//b + L(j, 2) = -x2(y, i) * x1(z, i);//c + L(j, 3) = -x2(y, i) * x1(w, i);//d + L(j, 4) = x2(x, i) * x1(x, i);//e + L(j, 5) = x2(x, i) * x1(y, i);//f + L(j, 6) = x2(x, i) * x1(z, i);//g + L(j, 7) = x2(x, i) * x1(w, i);//h + + ++j; + L(j, 8) = -x2(w, i) * x1(x, i);//i + L(j, 9) = -x2(w, i) * x1(y, i);//j + L(j,10) = -x2(w, i) * x1(z, i);//k + L(j,11) = -x2(w, i) * x1(w, i);//l + L(j,12) = x2(z, i) * x1(x, i);//m + L(j,13) = x2(z, i) * x1(y, i);//n + L(j,14) = x2(z, i) * x1(z, i);//o + b(j, 0) = -x2(z, i) * x1(w, i); + } + // Solve Lx=B + Vec h = L.fullPivLu().solve(b); + if ((L * h).isApprox(b, expected_precision)) { + Homography3DNormalizedParameterization<double>::To(h, H); + return true; + } else { + return false; + } +} +} // namespace libmv diff --git a/extern/libmv/libmv/multiview/homography.h b/extern/libmv/libmv/multiview/homography.h new file mode 100644 index 00000000000..786fd3df8ca --- /dev/null +++ b/extern/libmv/libmv/multiview/homography.h @@ -0,0 +1,84 @@ +// Copyright (c) 2011 libmv authors. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. + +#ifndef LIBMV_MULTIVIEW_HOMOGRAPHY_H_ +#define LIBMV_MULTIVIEW_HOMOGRAPHY_H_ + +#include "libmv/numeric/numeric.h" + +namespace libmv { + +/** + * 2D homography transformation estimation. + * + * This function estimates the homography transformation from a list of 2D + * correspondences which represents either: + * + * - 3D points on a plane, with a general moving camera. + * - 3D points with a rotating camera (pure rotation). + * - 3D points + different planar projections + * + * \param x1 The first 2xN or 3xN matrix of euclidean or homogeneous points. + * \param x2 The second 2xN or 3xN matrix of euclidean or homogeneous points. + * \param H The 3x3 homography transformation matrix (8 dof) such that + * x2 = H * x1 with |a b c| + * H = |d e f| + * |g h 1| + * \param expected_precision The expected precision in order for instance + * to accept almost homography matrices. + * + * \return True if the transformation estimation has succeeded. + * \note There must be at least 4 non-colinear points. + */ +bool Homography2DFromCorrespondencesLinear(const Mat &x1, + const Mat &x2, + Mat3 *H, + double expected_precision = + EigenDouble::dummy_precision()); + +/** + * 3D Homography transformation estimation. + * + * This function can be used in order to estimate the homography transformation + * from a list of 3D correspondences. + * + * \param[in] x1 The first 4xN matrix of homogeneous points + * \param[in] x2 The second 4xN matrix of homogeneous points + * \param[out] H The 4x4 homography transformation matrix (15 dof) such that + * x2 = H * x1 with |a b c d| + * H = |e f g h| + * |i j k l| + * |m n o 1| + * \param[in] expected_precision The expected precision in order for instance + * to accept almost homography matrices. + * + * \return true if the transformation estimation has succeeded + * + * \note Need at least 5 non coplanar points + * \note Points coordinates must be in homogeneous coordinates + */ +bool Homography3DFromCorrespondencesLinear(const Mat &x1, + const Mat &x2, + Mat4 *H, + double expected_precision = + EigenDouble::dummy_precision()); +} // namespace libmv + +#endif // LIBMV_MULTIVIEW_HOMOGRAPHY_H_ diff --git a/extern/libmv/libmv/multiview/homography_parameterization.h b/extern/libmv/libmv/multiview/homography_parameterization.h new file mode 100644 index 00000000000..b31642eea15 --- /dev/null +++ b/extern/libmv/libmv/multiview/homography_parameterization.h @@ -0,0 +1,91 @@ +// Copyright (c) 2011 libmv authors. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. + +#ifndef LIBMV_MULTIVIEW_HOMOGRAPHY_PARAMETERIZATION_H_ +#define LIBMV_MULTIVIEW_HOMOGRAPHY_PARAMETERIZATION_H_ + +#include "libmv/numeric/numeric.h" + +namespace libmv { + +/** A parameterization of the 2D homography matrix that uses 8 parameters so + * that the matrix is normalized (H(2,2) == 1). + * The homography matrix H is built from a list of 8 parameters (a, b,...g, h) + * as follows + * |a b c| + * H = |d e f| + * |g h 1| + */ +template<typename T = double> +class Homography2DNormalizedParameterization { + public: + typedef Eigen::Matrix<T, 8, 1> Parameters; // a, b, ... g, h + typedef Eigen::Matrix<T, 3, 3> Parameterized; // H + + /// Convert from the 8 parameters to a H matrix. + static void To(const Parameters &p, Parameterized *h) { + *h << p(0), p(1), p(2), + p(3), p(4), p(5), + p(6), p(7), 1.0; + } + + /// Convert from a H matrix to the 8 parameters. + static void From(const Parameterized &h, Parameters *p) { + *p << h(0, 0), h(0, 1), h(0, 2), + h(1, 0), h(1, 1), h(1, 2), + h(2, 0), h(2, 1); + } +}; + +/** A parameterization of the 2D homography matrix that uses 15 parameters so + * that the matrix is normalized (H(3,3) == 1). + * The homography matrix H is built from a list of 15 parameters (a, b,...n, o) + * as follows + * |a b c d| + * H = |e f g h| + * |i j k l| + * |m n o 1| + */ +template<typename T = double> +class Homography3DNormalizedParameterization { + public: + typedef Eigen::Matrix<T, 15, 1> Parameters; // a, b, ... n, o + typedef Eigen::Matrix<T, 4, 4> Parameterized; // H + + /// Convert from the 15 parameters to a H matrix. + static void To(const Parameters &p, Parameterized *h) { + *h << p(0), p(1), p(2), p(3), + p(4), p(5), p(6), p(7), + p(8), p(9), p(10), p(11), + p(12), p(13), p(14), 1.0; + } + + /// Convert from a H matrix to the 15 parameters. + static void From(const Parameterized &h, Parameters *p) { + *p << h(0, 0), h(0, 1), h(0, 2), h(0, 3), + h(1, 0), h(1, 1), h(1, 2), h(1, 3), + h(2, 0), h(2, 1), h(2, 2), h(2, 3), + h(3, 0), h(3, 1), h(3, 2); + } +}; + +} // namespace libmv + +#endif // LIBMV_MULTIVIEW_HOMOGRAPHY_PARAMETERIZATION_H_ diff --git a/extern/libmv/libmv/tracking/esm_region_tracker.cc b/extern/libmv/libmv/tracking/esm_region_tracker.cc index 221fa4d081b..a8dc46d439b 100644 --- a/extern/libmv/libmv/tracking/esm_region_tracker.cc +++ b/extern/libmv/libmv/tracking/esm_region_tracker.cc @@ -29,6 +29,7 @@ #include "libmv/image/correlation.h" #include "libmv/image/sample.h" #include "libmv/numeric/numeric.h" +#include "libmv/tracking/track_region.h" namespace libmv { @@ -72,6 +73,44 @@ bool EsmRegionTracker::Track(const FloatImage &image1, return false; } + // XXX + // TODO(keir): Delete the block between the XXX's once the planar tracker is + // integrated into blender. + // + // For now, to test, replace the ESM tracker with the Ceres tracker in + // translation mode. In the future, this should get removed and alloed to + // co-exist, since Ceres is not as fast as the ESM implementation since it + // specializes for translation. + double xx1[4], yy1[4]; + double xx2[4], yy2[4]; + // Clockwise winding, starting from the "origin" (top-left). + xx1[0] = xx2[0] = x1 - half_window_size; + yy1[0] = yy2[0] = y1 - half_window_size; + + xx1[1] = xx2[1] = x1 + half_window_size; + yy1[1] = yy2[1] = y1 - half_window_size; + + xx1[2] = xx2[2] = x1 + half_window_size; + yy1[2] = yy2[2] = y1 + half_window_size; + + xx1[3] = xx2[3] = x1 - half_window_size; + yy1[3] = yy2[3] = y1 + half_window_size; + + TrackRegionOptions options; + options.mode = TrackRegionOptions::TRANSLATION; + options.max_iterations = 20; + options.sigma = sigma; + options.use_esm = true; + + TrackRegionResult result; + TrackRegion(image1, image2, xx1, yy1, options, xx2, yy2, &result); + + *x2 = xx2[0] + half_window_size; + *y2 = yy2[0] + half_window_size; + + return true; + + // XXX int width = 2 * half_window_size + 1; // TODO(keir): Avoid recomputing gradients for e.g. the pyramid tracker. diff --git a/extern/libmv/libmv/tracking/track_region.cc b/extern/libmv/libmv/tracking/track_region.cc new file mode 100644 index 00000000000..e65ead50c80 --- /dev/null +++ b/extern/libmv/libmv/tracking/track_region.cc @@ -0,0 +1,1391 @@ +// Copyright (c) 2012 libmv authors. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. +// +// Author: mierle@google.com (Keir Mierle) +// +// TODO(keir): While this tracking code works rather well, it has some +// outragous inefficiencies. There is probably a 5-10x speedup to be had if a +// smart coder went through the TODO's and made the suggested performance +// enhancements. + +// Necessary for M_E when building with MSVC. +#define _USE_MATH_DEFINES + +#include "libmv/tracking/track_region.h" + +#include <Eigen/SVD> +#include <Eigen/QR> +#include <iostream> +#include "ceres/ceres.h" +#include "libmv/logging/logging.h" +#include "libmv/image/image.h" +#include "libmv/image/sample.h" +#include "libmv/image/convolve.h" +#include "libmv/multiview/homography.h" +#include "libmv/numeric/numeric.h" + +namespace libmv { + +using ceres::Jet; +using ceres::JetOps; +using ceres::Chain; + +TrackRegionOptions::TrackRegionOptions() + : mode(TRANSLATION), + minimum_correlation(0), + max_iterations(20), + use_esm(true), + use_brute_initialization(true), + use_normalized_intensities(false), + sigma(0.9), + num_extra_points(0), + regularization_coefficient(0.0), + image1_mask(NULL) { +} + +namespace { + +// TODO(keir): Consider adding padding. +template<typename T> +bool InBounds(const FloatImage &image, + const T &x, + const T &y) { + return 0.0 <= x && x < image.Width() && + 0.0 <= y && y < image.Height(); +} + +bool AllInBounds(const FloatImage &image, + const double *x, + const double *y) { + for (int i = 0; i < 4; ++i) { + if (!InBounds(image, x[i], y[i])) { + return false; + } + } + return true; +} + +// Sample the image at position (x, y) but use the gradient, if present, to +// propagate derivatives from x and y. This is needed to integrate the numeric +// image gradients with Ceres's autodiff framework. +template<typename T> +static T SampleWithDerivative(const FloatImage &image_and_gradient, + const T &x, + const T &y) { + float scalar_x = JetOps<T>::GetScalar(x); + float scalar_y = JetOps<T>::GetScalar(y); + + // Note that sample[1] and sample[2] will be uninitialized in the scalar + // case, but that is not an issue because the Chain::Rule below will not read + // the uninitialized values. + float sample[3]; + if (JetOps<T>::IsScalar()) { + // For the scalar case, only sample the image. + sample[0] = SampleLinear(image_and_gradient, scalar_y, scalar_x, 0); + } else { + // For the derivative case, sample the gradient as well. + SampleLinear(image_and_gradient, scalar_y, scalar_x, sample); + } + T xy[2] = { x, y }; + return Chain<float, 2, T>::Rule(sample[0], sample + 1, xy); +} + +template<typename Warp> +class BoundaryCheckingCallback : public ceres::IterationCallback { + public: + BoundaryCheckingCallback(const FloatImage& image2, + const Warp &warp, + const double *x1, const double *y1) + : image2_(image2), warp_(warp), x1_(x1), y1_(y1) {} + + virtual ceres::CallbackReturnType operator()( + const ceres::IterationSummary& summary) { + // Warp the original 4 points with the current warp into image2. + double x2[4]; + double y2[4]; + for (int i = 0; i < 4; ++i) { + warp_.Forward(warp_.parameters, x1_[i], y1_[i], x2 + i, y2 + i); + } + // Enusre they are all in bounds. + if (!AllInBounds(image2_, x2, y2)) { + return ceres::SOLVER_ABORT; + } + return ceres::SOLVER_CONTINUE; + } + + private: + const FloatImage &image2_; + const Warp &warp_; + const double *x1_; + const double *y1_; +}; + +template<typename Warp> +class PixelDifferenceCostFunctor { + public: + PixelDifferenceCostFunctor(const TrackRegionOptions &options, + const FloatImage &image_and_gradient1, + const FloatImage &image_and_gradient2, + const Mat3 &canonical_to_image1, + int num_samples_x, + int num_samples_y, + const Warp &warp) + : options_(options), + image_and_gradient1_(image_and_gradient1), + image_and_gradient2_(image_and_gradient2), + canonical_to_image1_(canonical_to_image1), + num_samples_x_(num_samples_x), + num_samples_y_(num_samples_y), + warp_(warp), + pattern_and_gradient_(num_samples_y_, num_samples_x_, 3), + pattern_positions_(num_samples_y_, num_samples_x_, 2), + pattern_mask_(num_samples_y_, num_samples_x_, 1) { + ComputeCanonicalPatchAndNormalizer(); + } + + void ComputeCanonicalPatchAndNormalizer() { + src_mean_ = 0.0; + double num_samples = 0.0; + for (int r = 0; r < num_samples_y_; ++r) { + for (int c = 0; c < num_samples_x_; ++c) { + // Compute the position; cache it. + Vec3 image_position = canonical_to_image1_ * Vec3(c, r, 1); + image_position /= image_position(2); + pattern_positions_(r, c, 0) = image_position(0); + pattern_positions_(r, c, 1) = image_position(1); + + // Sample the pattern and gradients. + SampleLinear(image_and_gradient1_, + image_position(1), // SampleLinear is r, c. + image_position(0), + &pattern_and_gradient_(r, c, 0)); + + // Sample sample the mask. + double mask_value = 1.0; + if (options_.image1_mask != NULL) { + SampleLinear(*options_.image1_mask, + image_position(1), // SampleLinear is r, c. + image_position(0), + &pattern_mask_(r, c, 0)); + mask_value = pattern_mask_(r, c); + } + src_mean_ += pattern_and_gradient_(r, c, 0) * mask_value; + num_samples += mask_value; + } + } + src_mean_ /= num_samples; + } + + template<typename T> + bool operator()(const T *warp_parameters, T *residuals) const { + if (options_.image1_mask != NULL) { + VLOG(2) << "Using a mask."; + } + for (int i = 0; i < Warp::NUM_PARAMETERS; ++i) { + VLOG(2) << "warp_parameters[" << i << "]: " << warp_parameters[i]; + } + + T dst_mean = T(1.0); + if (options_.use_normalized_intensities) { + ComputeNormalizingCoefficient(warp_parameters, + &dst_mean); + } + + int cursor = 0; + for (int r = 0; r < num_samples_y_; ++r) { + for (int c = 0; c < num_samples_x_; ++c) { + // Use the pre-computed image1 position. + Vec2 image1_position(pattern_positions_(r, c, 0), + pattern_positions_(r, c, 1)); + + // Sample the mask early; if it's zero, this pixel has no effect. This + // allows early bailout from the expensive sampling that happens below. + // + // Note that partial masks are not short circuited. To see why short + // circuiting produces bitwise-exact same results, consider that the + // residual for each pixel is + // + // residual = mask * (src - dst) , + // + // and for jets, multiplying by a scalar multiplies the derivative + // components by the scalar as well. Therefore, if the mask is exactly + // zero, then so too will the final residual and derivatives. + double mask_value = 1.0; + if (options_.image1_mask != NULL) { + mask_value = pattern_mask_(r, c); + if (mask_value == 0.0) { + residuals[cursor++] = T(0.0); + continue; + } + } + + // Compute the location of the destination pixel. + T image2_position[2]; + warp_.Forward(warp_parameters, + T(image1_position[0]), + T(image1_position[1]), + &image2_position[0], + &image2_position[1]); + + // Sample the destination, propagating derivatives. + T dst_sample = SampleWithDerivative(image_and_gradient2_, + image2_position[0], + image2_position[1]); + + // Sample the source. This is made complicated by ESM mode. + T src_sample; + if (options_.use_esm && !JetOps<T>::IsScalar()) { + // In ESM mode, the derivative of the source is also taken into + // account. This changes the linearization in a way that causes + // better convergence. Copy the derivative of the warp parameters + // onto the jets for the image1 position. This is the ESM hack. + T image1_position_jet[2] = { + image2_position[0], // Order is x, y. This matches the + image2_position[1] // derivative order in the patch. + }; + JetOps<T>::SetScalar(image1_position[0], image1_position_jet + 0); + JetOps<T>::SetScalar(image1_position[1], image1_position_jet + 1); + + // Now that the image1 positions have the jets applied from the + // image2 position (the ESM hack), chain the image gradients to + // obtain a sample with the derivative with respect to the warp + // parameters attached. + src_sample = Chain<float, 2, T>::Rule(pattern_and_gradient_(r, c), + &pattern_and_gradient_(r, c, 1), + image1_position_jet); + + // The jacobians for these should be averaged. Due to the subtraction + // below, flip the sign of the src derivative so that the effect + // after subtraction of the jets is that they are averaged. + JetOps<T>::ScaleDerivative(-0.5, &src_sample); + JetOps<T>::ScaleDerivative(0.5, &dst_sample); + } else { + // This is the traditional, forward-mode KLT solution. + src_sample = T(pattern_and_gradient_(r, c)); + } + + // Normalize the samples by the mean values of each signal. The typical + // light model assumes multiplicative intensity changes with changing + // light, so this is a reasonable choice. Note that dst_mean has + // derivative information attached thanks to autodiff. + if (options_.use_normalized_intensities) { + src_sample /= T(src_mean_); + dst_sample /= dst_mean; + } + + // The difference is the error. + T error = src_sample - dst_sample; + + // Weight the error by the mask, if one is present. + if (options_.image1_mask != NULL) { + error *= T(mask_value); + } + residuals[cursor++] = error; + } + } + return true; + } + + // For normalized matching, the average and + template<typename T> + void ComputeNormalizingCoefficient(const T *warp_parameters, + T *dst_mean) const { + + *dst_mean = T(0.0); + double num_samples = 0.0; + for (int r = 0; r < num_samples_y_; ++r) { + for (int c = 0; c < num_samples_x_; ++c) { + // Use the pre-computed image1 position. + Vec2 image1_position(pattern_positions_(r, c, 0), + pattern_positions_(r, c, 1)); + + // Sample the mask early; if it's zero, this pixel has no effect. This + // allows early bailout from the expensive sampling that happens below. + double mask_value = 1.0; + if (options_.image1_mask != NULL) { + mask_value = pattern_mask_(r, c); + if (mask_value == 0.0) { + continue; + } + } + + // Compute the location of the destination pixel. + T image2_position[2]; + warp_.Forward(warp_parameters, + T(image1_position[0]), + T(image1_position[1]), + &image2_position[0], + &image2_position[1]); + + + // Sample the destination, propagating derivatives. + // TODO(keir): This accumulation can, surprisingly, be done as a + // pre-pass by using integral images. This is complicated by the need + // to store the jets in the integral image, but it is possible. + T dst_sample = SampleWithDerivative(image_and_gradient2_, + image2_position[0], + image2_position[1]); + + // Weight the sample by the mask, if one is present. + if (options_.image1_mask != NULL) { + dst_sample *= T(mask_value); + } + + *dst_mean += dst_sample; + num_samples += mask_value; + } + } + *dst_mean /= T(num_samples); + LG << "Normalization for dst:" << *dst_mean; + } + + // TODO(keir): Consider also computing the cost here. + double PearsonProductMomentCorrelationCoefficient( + const double *warp_parameters) const { + for (int i = 0; i < Warp::NUM_PARAMETERS; ++i) { + VLOG(2) << "Correlation warp_parameters[" << i << "]: " + << warp_parameters[i]; + } + + // The single-pass PMCC computation is somewhat numerically unstable, but + // it's sufficient for the tracker. + double sX = 0, sY = 0, sXX = 0, sYY = 0, sXY = 0; + + // Due to masking, it's important to account for fractional samples. + // For example, samples with a 50% mask are counted as a half sample. + double num_samples = 0; + + for (int r = 0; r < num_samples_y_; ++r) { + for (int c = 0; c < num_samples_x_; ++c) { + // Use the pre-computed image1 position. + Vec2 image1_position(pattern_positions_(r, c, 0), + pattern_positions_(r, c, 1)); + + double mask_value = 1.0; + if (options_.image1_mask != NULL) { + mask_value = pattern_mask_(r, c); + if (mask_value == 0.0) { + continue; + } + } + + // Compute the location of the destination pixel. + double image2_position[2]; + warp_.Forward(warp_parameters, + image1_position[0], + image1_position[1], + &image2_position[0], + &image2_position[1]); + + double x = pattern_and_gradient_(r, c); + double y = SampleLinear(image_and_gradient2_, + image2_position[1], // SampleLinear is r, c. + image2_position[0]); + + // Weight the signals by the mask, if one is present. + if (options_.image1_mask != NULL) { + x *= mask_value; + y *= mask_value; + num_samples += mask_value; + } else { + num_samples++; + } + sX += x; + sY += y; + sXX += x*x; + sYY += y*y; + sXY += x*y; + } + } + // Normalize. + sX /= num_samples; + sY /= num_samples; + sXX /= num_samples; + sYY /= num_samples; + sXY /= num_samples; + + double var_x = sXX - sX*sX; + double var_y = sYY - sY*sY; + double covariance_xy = sXY - sX*sY; + + double correlation = covariance_xy / sqrt(var_x * var_y); + LG << "Covariance xy: " << covariance_xy + << ", var 1: " << var_x << ", var 2: " << var_y + << ", correlation: " << correlation; + return correlation; + } + + private: + const TrackRegionOptions &options_; + const FloatImage &image_and_gradient1_; + const FloatImage &image_and_gradient2_; + const Mat3 &canonical_to_image1_; + int num_samples_x_; + int num_samples_y_; + const Warp &warp_; + double src_mean_; + FloatImage pattern_and_gradient_; + + // This contains the position from where the cached pattern samples were + // taken from. This is also used to warp from src to dest without going from + // canonical pixels to src first. + FloatImage pattern_positions_; + + FloatImage pattern_mask_; +}; + +template<typename Warp> +class WarpRegularizingCostFunctor { + public: + WarpRegularizingCostFunctor(const TrackRegionOptions &options, + const double *x1, + const double *y1, + const double *x2_original, + const double *y2_original, + const Warp &warp) + : options_(options), + x1_(x1), + y1_(y1), + x2_original_(x2_original), + y2_original_(y2_original), + warp_(warp) { + // Compute the centroid of the first guess quad. + // TODO(keir): Use Quad class here. + original_centroid_[0] = 0.0; + original_centroid_[1] = 0.0; + for (int i = 0; i < 4; ++i) { + original_centroid_[0] += x2_original[i]; + original_centroid_[1] += y2_original[i]; + } + original_centroid_[0] /= 4; + original_centroid_[1] /= 4; + } + + template<typename T> + bool operator()(const T *warp_parameters, T *residuals) const { + T dst_centroid[2] = { T(0.0), T(0.0) }; + for (int i = 0; i < 4; ++i) { + T image1_position[2] = { T(x1_[i]), T(y1_[i]) }; + T image2_position[2]; + warp_.Forward(warp_parameters, + T(x1_[i]), + T(y1_[i]), + &image2_position[0], + &image2_position[1]); + + // Subtract the positions. Note that this ignores the centroids. + residuals[2 * i + 0] = image2_position[0] - image1_position[0]; + residuals[2 * i + 1] = image2_position[1] - image1_position[1]; + + // Accumulate the dst centroid. + dst_centroid[0] += image2_position[0]; + dst_centroid[1] += image2_position[1]; + } + dst_centroid[0] /= T(4.0); + dst_centroid[1] /= T(4.0); + + // Adjust for the centroids. + for (int i = 0; i < 4; ++i) { + residuals[2 * i + 0] += T(original_centroid_[0]) - dst_centroid[0]; + residuals[2 * i + 1] += T(original_centroid_[1]) - dst_centroid[1]; + } + + // Reweight the residuals. + for (int i = 0; i < 8; ++i) { + residuals[i] *= T(options_.regularization_coefficient); + } + + return true; + } + + const TrackRegionOptions &options_; + const double *x1_; + const double *y1_; + const double *x2_original_; + const double *y2_original_; + double original_centroid_[2]; + const Warp &warp_; +}; + +// Compute the warp from rectangular coordinates, where one corner is the +// origin, and the opposite corner is at (num_samples_x, num_samples_y). +Mat3 ComputeCanonicalHomography(const double *x1, + const double *y1, + int num_samples_x, + int num_samples_y) { + Mat canonical(2, 4); + canonical << 0, num_samples_x, num_samples_x, 0, + 0, 0, num_samples_y, num_samples_y; + + Mat xy1(2, 4); + xy1 << x1[0], x1[1], x1[2], x1[3], + y1[0], y1[1], y1[2], y1[3]; + + Mat3 H; + if (!Homography2DFromCorrespondencesLinear(canonical, xy1, &H, 1e-12)) { + LG << "Couldn't construct homography."; + } + return H; +} + +class Quad { + public: + Quad(const double *x, const double *y) : x_(x), y_(y) { + // Compute the centroid and store it. + centroid_ = Vec2(0.0, 0.0); + for (int i = 0; i < 4; ++i) { + centroid_ += Vec2(x_[i], y_[i]); + } + centroid_ /= 4.0; + } + + // The centroid of the four points representing the quad. + const Vec2& Centroid() const { + return centroid_; + } + + // The average magnitude of the four points relative to the centroid. + double Scale() const { + double scale = 0.0; + for (int i = 0; i < 4; ++i) { + scale += (Vec2(x_[i], y_[i]) - Centroid()).norm(); + } + return scale / 4.0; + } + + Vec2 CornerRelativeToCentroid(int i) const { + return Vec2(x_[i], y_[i]) - centroid_; + } + + private: + const double *x_; + const double *y_; + Vec2 centroid_; +}; + +struct TranslationWarp { + TranslationWarp(const double *x1, const double *y1, + const double *x2, const double *y2) { + Vec2 t = Quad(x2, y2).Centroid() - Quad(x1, y1).Centroid(); + parameters[0] = t[0]; + parameters[1] = t[1]; + } + + template<typename T> + void Forward(const T *warp_parameters, + const T &x1, const T& y1, T *x2, T* y2) const { + *x2 = x1 + warp_parameters[0]; + *y2 = y1 + warp_parameters[1]; + } + + // Translation x, translation y. + enum { NUM_PARAMETERS = 2 }; + double parameters[NUM_PARAMETERS]; +}; + +struct TranslationScaleWarp { + TranslationScaleWarp(const double *x1, const double *y1, + const double *x2, const double *y2) + : q1(x1, y1) { + Quad q2(x2, y2); + + // The difference in centroids is the best guess for translation. + Vec2 t = q2.Centroid() - q1.Centroid(); + parameters[0] = t[0]; + parameters[1] = t[1]; + + // The difference in scales is the estimate for the scale. + parameters[2] = 1.0 - q2.Scale() / q1.Scale(); + } + + // The strange way of parameterizing the translation and scaling is to make + // the knobs that the optimizer sees easy to adjust. This is less important + // for the scaling case than the rotation case. + template<typename T> + void Forward(const T *warp_parameters, + const T &x1, const T& y1, T *x2, T* y2) const { + // Make the centroid of Q1 the origin. + const T x1_origin = x1 - q1.Centroid()(0); + const T y1_origin = y1 - q1.Centroid()(1); + + // Scale uniformly about the origin. + const T scale = 1.0 + warp_parameters[2]; + const T x1_origin_scaled = scale * x1_origin; + const T y1_origin_scaled = scale * y1_origin; + + // Translate back into the space of Q1 (but scaled). + const T x1_scaled = x1_origin_scaled + q1.Centroid()(0); + const T y1_scaled = y1_origin_scaled + q1.Centroid()(1); + + // Translate into the space of Q2. + *x2 = x1_scaled + warp_parameters[0]; + *y2 = y1_scaled + warp_parameters[1]; + } + + // Translation x, translation y, scale. + enum { NUM_PARAMETERS = 3 }; + double parameters[NUM_PARAMETERS]; + + Quad q1; +}; + +// Assumes the given points are already zero-centroid and the same size. +Mat2 OrthogonalProcrustes(const Mat2 &correlation_matrix) { + Eigen::JacobiSVD<Mat2> svd(correlation_matrix, + Eigen::ComputeFullU | Eigen::ComputeFullV); + return svd.matrixV() * svd.matrixU().transpose(); +} + +struct TranslationRotationWarp { + TranslationRotationWarp(const double *x1, const double *y1, + const double *x2, const double *y2) + : q1(x1, y1) { + Quad q2(x2, y2); + + // The difference in centroids is the best guess for translation. + Vec2 t = q2.Centroid() - q1.Centroid(); + parameters[0] = t[0]; + parameters[1] = t[1]; + + // Obtain the rotation via orthorgonal procrustes. + Mat2 correlation_matrix; + for (int i = 0; i < 4; ++i) { + correlation_matrix += q1.CornerRelativeToCentroid(i) * + q2.CornerRelativeToCentroid(i).transpose(); + } + Mat2 R = OrthogonalProcrustes(correlation_matrix); + parameters[2] = atan2(R(1, 0), R(0, 0)); + + LG << "Correlation_matrix:\n" << correlation_matrix; + LG << "R:\n" << R; + LG << "Theta:" << parameters[2]; + } + + // The strange way of parameterizing the translation and rotation is to make + // the knobs that the optimizer sees easy to adjust. The reason is that while + // it is always the case that it is possible to express composed rotations + // and translations as a single translation and rotation, the numerical + // values needed for the composition are often large in magnitude. This is + // enough to throw off any minimizer, since it must do the equivalent of + // compose rotations and translations. + // + // Instead, use the parameterization below that offers a parameterization + // that exposes the degrees of freedom in a way amenable to optimization. + template<typename T> + void Forward(const T *warp_parameters, + const T &x1, const T& y1, T *x2, T* y2) const { + // Make the centroid of Q1 the origin. + const T x1_origin = x1 - q1.Centroid()(0); + const T y1_origin = y1 - q1.Centroid()(1); + + // Rotate about the origin (i.e. centroid of Q1). + const T theta = warp_parameters[2]; + const T costheta = cos(theta); + const T sintheta = sin(theta); + const T x1_origin_rotated = costheta * x1_origin - sintheta * y1_origin; + const T y1_origin_rotated = sintheta * x1_origin + costheta * y1_origin; + + // Translate back into the space of Q1 (but scaled). + const T x1_rotated = x1_origin_rotated + q1.Centroid()(0); + const T y1_rotated = y1_origin_rotated + q1.Centroid()(1); + + // Translate into the space of Q2. + *x2 = x1_rotated + warp_parameters[0]; + *y2 = y1_rotated + warp_parameters[1]; + } + + // Translation x, translation y, rotation about the center of Q1 degrees. + enum { NUM_PARAMETERS = 3 }; + double parameters[NUM_PARAMETERS]; + + Quad q1; +}; + +struct TranslationRotationScaleWarp { + TranslationRotationScaleWarp(const double *x1, const double *y1, + const double *x2, const double *y2) + : q1(x1, y1) { + Quad q2(x2, y2); + + // The difference in centroids is the best guess for translation. + Vec2 t = q2.Centroid() - q1.Centroid(); + parameters[0] = t[0]; + parameters[1] = t[1]; + + // The difference in scales is the estimate for the scale. + parameters[2] = 1.0 - q2.Scale() / q1.Scale(); + + // Obtain the rotation via orthorgonal procrustes. + Mat2 correlation_matrix; + for (int i = 0; i < 4; ++i) { + correlation_matrix += q1.CornerRelativeToCentroid(i) * + q2.CornerRelativeToCentroid(i).transpose(); + } + Mat2 R = OrthogonalProcrustes(correlation_matrix); + parameters[3] = atan2(R(1, 0), R(0, 0)); + + LG << "Correlation_matrix:\n" << correlation_matrix; + LG << "R:\n" << R; + LG << "Theta:" << parameters[3]; + } + + // The strange way of parameterizing the translation and rotation is to make + // the knobs that the optimizer sees easy to adjust. The reason is that while + // it is always the case that it is possible to express composed rotations + // and translations as a single translation and rotation, the numerical + // values needed for the composition are often large in magnitude. This is + // enough to throw off any minimizer, since it must do the equivalent of + // compose rotations and translations. + // + // Instead, use the parameterization below that offers a parameterization + // that exposes the degrees of freedom in a way amenable to optimization. + template<typename T> + void Forward(const T *warp_parameters, + const T &x1, const T& y1, T *x2, T* y2) const { + // Make the centroid of Q1 the origin. + const T x1_origin = x1 - q1.Centroid()(0); + const T y1_origin = y1 - q1.Centroid()(1); + + // Rotate about the origin (i.e. centroid of Q1). + const T theta = warp_parameters[3]; + const T costheta = cos(theta); + const T sintheta = sin(theta); + const T x1_origin_rotated = costheta * x1_origin - sintheta * y1_origin; + const T y1_origin_rotated = sintheta * x1_origin + costheta * y1_origin; + + // Scale uniformly about the origin. + const T scale = 1.0 + warp_parameters[2]; + const T x1_origin_rotated_scaled = scale * x1_origin_rotated; + const T y1_origin_rotated_scaled = scale * y1_origin_rotated; + + // Translate back into the space of Q1 (but scaled and rotated). + const T x1_rotated_scaled = x1_origin_rotated_scaled + q1.Centroid()(0); + const T y1_rotated_scaled = y1_origin_rotated_scaled + q1.Centroid()(1); + + // Translate into the space of Q2. + *x2 = x1_rotated_scaled + warp_parameters[0]; + *y2 = y1_rotated_scaled + warp_parameters[1]; + } + + // Translation x, translation y, rotation about the center of Q1 degrees, + // scale. + enum { NUM_PARAMETERS = 4 }; + double parameters[NUM_PARAMETERS]; + + Quad q1; +}; + +struct AffineWarp { + AffineWarp(const double *x1, const double *y1, + const double *x2, const double *y2) + : q1(x1, y1) { + Quad q2(x2, y2); + + // The difference in centroids is the best guess for translation. + Vec2 t = q2.Centroid() - q1.Centroid(); + parameters[0] = t[0]; + parameters[1] = t[1]; + + // Estimate the four affine parameters with the usual least squares. + Mat Q1(8, 4); + Vec Q2(8); + for (int i = 0; i < 4; ++i) { + Vec2 v1 = q1.CornerRelativeToCentroid(i); + Vec2 v2 = q2.CornerRelativeToCentroid(i); + + Q1.row(2 * i + 0) << v1[0], v1[1], 0, 0 ; + Q1.row(2 * i + 1) << 0, 0, v1[0], v1[1]; + + Q2(2 * i + 0) = v2[0]; + Q2(2 * i + 1) = v2[1]; + } + + // TODO(keir): Check solution quality. + Vec4 a = Q1.jacobiSvd(Eigen::ComputeThinU | Eigen::ComputeThinV).solve(Q2); + parameters[2] = a[0]; + parameters[3] = a[1]; + parameters[4] = a[2]; + parameters[5] = a[3]; + + LG << "a:" << a.transpose(); + LG << "t:" << t.transpose(); + } + + // See comments in other parameterizations about why the centroid is used. + template<typename T> + void Forward(const T *p, const T &x1, const T& y1, T *x2, T* y2) const { + // Make the centroid of Q1 the origin. + const T x1_origin = x1 - q1.Centroid()(0); + const T y1_origin = y1 - q1.Centroid()(1); + + // Apply the affine transformation. + const T x1_origin_affine = p[2] * x1_origin + p[3] * y1_origin; + const T y1_origin_affine = p[4] * x1_origin + p[5] * y1_origin; + + // Translate back into the space of Q1 (but affine transformed). + const T x1_affine = x1_origin_affine + q1.Centroid()(0); + const T y1_affine = y1_origin_affine + q1.Centroid()(1); + + // Translate into the space of Q2. + *x2 = x1_affine + p[0]; + *y2 = y1_affine + p[1]; + } + + // Translation x, translation y, rotation about the center of Q1 degrees, + // scale. + enum { NUM_PARAMETERS = 6 }; + double parameters[NUM_PARAMETERS]; + + Quad q1; +}; + +struct HomographyWarp { + HomographyWarp(const double *x1, const double *y1, + const double *x2, const double *y2) { + Mat quad1(2, 4); + quad1 << x1[0], x1[1], x1[2], x1[3], + y1[0], y1[1], y1[2], y1[3]; + + Mat quad2(2, 4); + quad2 << x2[0], x2[1], x2[2], x2[3], + y2[0], y2[1], y2[2], y2[3]; + + Mat3 H; + if (!Homography2DFromCorrespondencesLinear(quad1, quad2, &H, 1e-12)) { + LG << "Couldn't construct homography."; + } + + // Assume H(2, 2) != 0, and fix scale at H(2, 2) == 1.0. + H /= H(2, 2); + + // Assume H is close to identity, so subtract out the diagonal. + H(0, 0) -= 1.0; + H(1, 1) -= 1.0; + + CHECK_NE(H(2, 2), 0.0) << H; + for (int i = 0; i < 8; ++i) { + parameters[i] = H(i / 3, i % 3); + LG << "Parameters[" << i << "]: " << parameters[i]; + } + } + + template<typename T> + static void Forward(const T *p, + const T &x1, const T& y1, T *x2, T* y2) { + // Homography warp with manual 3x3 matrix multiply. + const T xx2 = (1.0 + p[0]) * x1 + p[1] * y1 + p[2]; + const T yy2 = p[3] * x1 + (1.0 + p[4]) * y1 + p[5]; + const T zz2 = p[6] * x1 + p[7] * y1 + 1.0; + *x2 = xx2 / zz2; + *y2 = yy2 / zz2; + } + + enum { NUM_PARAMETERS = 8 }; + double parameters[NUM_PARAMETERS]; +}; + +// Determine the number of samples to use for x and y. Quad winding goes: +// +// 0 1 +// 3 2 +// +// The idea is to take the maximum x or y distance. This may be oversampling. +// TODO(keir): Investigate the various choices; perhaps average is better? +void PickSampling(const double *x1, const double *y1, + const double *x2, const double *y2, + int *num_samples_x, int *num_samples_y) { + Vec2 a0(x1[0], y1[0]); + Vec2 a1(x1[1], y1[1]); + Vec2 a2(x1[2], y1[2]); + Vec2 a3(x1[3], y1[3]); + + Vec2 b0(x1[0], y1[0]); + Vec2 b1(x1[1], y1[1]); + Vec2 b2(x1[2], y1[2]); + Vec2 b3(x1[3], y1[3]); + + double x_dimensions[4] = { + (a1 - a0).norm(), + (a3 - a2).norm(), + (b1 - b0).norm(), + (b3 - b2).norm() + }; + + double y_dimensions[4] = { + (a3 - a0).norm(), + (a1 - a2).norm(), + (b3 - b0).norm(), + (b1 - b2).norm() + }; + const double kScaleFactor = 1.0; + *num_samples_x = static_cast<int>( + kScaleFactor * *std::max_element(x_dimensions, x_dimensions + 4)); + *num_samples_y = static_cast<int>( + kScaleFactor * *std::max_element(y_dimensions, y_dimensions + 4)); + LG << "Automatic num_samples_x: " << *num_samples_x + << ", num_samples_y: " << *num_samples_y; +} + +bool SearchAreaTooBigForDescent(const FloatImage &image2, + const double *x2, const double *y2) { + // TODO(keir): Check the bounds and enable only when it makes sense. + return true; +} + +bool PointOnRightHalfPlane(const Vec2 &a, const Vec2 &b, double x, double y) { + Vec2 ba = b - a; + return ((Vec2(x, y) - b).transpose() * Vec2(-ba.y(), ba.x())) > 0; +} + +// Determine if a point is in a quad. The quad is arranged as: +// +// +-------> x +// | +// | a0------a1 +// | | | +// | | | +// | | | +// | a3------a2 +// v +// y +// +// The implementation does up to four half-plane comparisons. +bool PointInQuad(const double *xs, const double *ys, double x, double y) { + Vec2 a0(xs[0], ys[0]); + Vec2 a1(xs[1], ys[1]); + Vec2 a2(xs[2], ys[2]); + Vec2 a3(xs[3], ys[3]); + + return PointOnRightHalfPlane(a0, a1, x, y) && + PointOnRightHalfPlane(a1, a2, x, y) && + PointOnRightHalfPlane(a2, a3, x, y) && + PointOnRightHalfPlane(a3, a0, x, y); +} + +// This makes it possible to map between Eigen float arrays and FloatImage +// without using comparisons. +typedef Eigen::Array<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> FloatArray; + +// This creates a pattern in the frame of image2, from the pixel is image1, +// based on the initial guess represented by the two quads x1, y1, and x2, y2. +template<typename Warp> +void CreateBrutePattern(const double *x1, const double *y1, + const double *x2, const double *y2, + const FloatImage &image1, + const FloatImage *image1_mask, + FloatArray *pattern, + FloatArray *mask, + int *origin_x, + int *origin_y) { + // Get integer bounding box of quad2 in image2. + int min_x = static_cast<int>(floor(*std::min_element(x2, x2 + 4))); + int min_y = static_cast<int>(floor(*std::min_element(y2, y2 + 4))); + int max_x = static_cast<int>(ceil (*std::max_element(x2, x2 + 4))); + int max_y = static_cast<int>(ceil (*std::max_element(y2, y2 + 4))); + + int w = max_x - min_x; + int h = max_y - min_y; + + pattern->resize(h, w); + mask->resize(h, w); + + Warp inverse_warp(x2, y2, x1, y1); + + // r,c are in the coordinate frame of image2. + for (int r = min_y; r < max_y; ++r) { + for (int c = min_x; c < max_x; ++c) { + // i and j are in the coordinate frame of the pattern in image2. + int i = r - min_y; + int j = c - min_x; + + double dst_x = c; + double dst_y = r; + double src_x; + double src_y; + inverse_warp.Forward(inverse_warp.parameters, + dst_x, dst_y, + &src_x, &src_y); + + if (PointInQuad(x1, y1, src_x, src_y)) { + (*pattern)(i, j) = SampleLinear(image1, src_y, src_x); + (*mask)(i, j) = 1.0; + if (image1_mask) { + (*mask)(i, j) = SampleLinear(*image1_mask, src_y, src_x);; + } + } else { + (*pattern)(i, j) = 0.0; + (*mask)(i, j) = 0.0; + } + } + } + *origin_x = min_x; + *origin_y = min_y; +} + +// Compute a translation-only estimate of the warp, using brute force search. A +// smarter implementation would use the FFT to compute the normalized cross +// correlation. Instead, this is a dumb implementation. Surprisingly, it is +// fast enough in practice. +// +// TODO(keir): The normalization is less effective for the brute force search +// than it is with the Ceres solver. It's unclear if this is a bug or due to +// the original frame being too different from the reprojected reference in the +// destination frame. +// +// The likely solution is to use the previous frame, instead of the original +// pattern, when doing brute initialization. Unfortunately that implies a +// totally different warping interface, since access to more than a the source +// and current destination frame is necessary. +template<typename Warp> +void BruteTranslationOnlyInitialize(const FloatImage &image1, + const FloatImage *image1_mask, + const FloatImage &image2, + const int num_extra_points, + const bool use_normalized_intensities, + const double *x1, const double *y1, + double *x2, double *y2) { + // Create the pattern to match in the space of image2, assuming our inital + // guess isn't too far from the template in image1. If there is no image1 + // mask, then the resulting mask is binary. + FloatArray pattern; + FloatArray mask; + int origin_x = -1, origin_y = -1; + CreateBrutePattern<Warp>(x1, y1, x2, y2, + image1, image1_mask, + &pattern, &mask, + &origin_x, &origin_y); + + // For normalization, premultiply the pattern by the inverse pattern mean. + double mask_sum = 1.0; + if (use_normalized_intensities) { + mask_sum = mask.sum(); + double inverse_pattern_mean = mask_sum / ((mask * pattern).sum()); + pattern *= inverse_pattern_mean; + } + + // Use Eigen on the images via maps for strong vectorization. + Map<const FloatArray> search(image2.Data(), image2.Height(), image2.Width()); + + // Try all possible locations inside the search area. Yes, everywhere. + // + // TODO(keir): There are a number of possible optimizations here. One choice + // is to make a grid and only try one out of every N possible samples. + // + // Another, slightly more clever idea, is to compute some sort of spatial + // frequency distribution of the pattern patch. If the spatial resolution is + // high (e.g. a grating pattern or fine lines) then checking every possible + // translation is necessary, since a 1-pixel shift may induce a massive + // change in the cost function. If the image is a blob or splotch with blurry + // edges, then fewer samples are necessary since a few pixels offset won't + // change the cost function much. + double best_sad = std::numeric_limits<double>::max(); + int best_r = -1; + int best_c = -1; + int w = pattern.cols(); + int h = pattern.rows(); + for (int r = 0; r < (image2.Height() - h); ++r) { + for (int c = 0; c < (image2.Width() - w); ++c) { + // Compute the weighted sum of absolute differences, Eigen style. Note + // that the block from the search image is never stored in a variable, to + // avoid copying overhead and permit inlining. + double sad; + if (use_normalized_intensities) { + // TODO(keir): It's really dumb to recompute the search mean for every + // shift. A smarter implementation would use summed area tables + // instead, reducing the mean calculation to an O(1) operation. + double inverse_search_mean = + mask_sum / ((mask * search.block(r, c, h, w)).sum()); + sad = (mask * (pattern - (search.block(r, c, h, w) * + inverse_search_mean))).abs().sum(); + } else { + sad = (mask * (pattern - search.block(r, c, h, w))).abs().sum(); + } + if (sad < best_sad) { + best_r = r; + best_c = c; + best_sad = sad; + } + } + } + CHECK_NE(best_r, -1); + CHECK_NE(best_c, -1); + + LG << "Brute force translation found a shift. " + << "best_c: " << best_c << ", best_r: " << best_r << ", " + << "origin_x: " << origin_x << ", origin_y: " << origin_y << ", " + << "dc: " << (best_c - origin_x) << ", " + << "dr: " << (best_r - origin_y) + << ", tried " << ((image2.Height() - h) * (image2.Width() - w)) + << " shifts."; + + // Apply the shift. + for (int i = 0; i < 4 + num_extra_points; ++i) { + x2[i] += best_c - origin_x; + y2[i] += best_r - origin_y; + } +} + +} // namespace + +template<typename Warp> +void TemplatedTrackRegion(const FloatImage &image1, + const FloatImage &image2, + const double *x1, const double *y1, + const TrackRegionOptions &options, + double *x2, double *y2, + TrackRegionResult *result) { + for (int i = 0; i < 4; ++i) { + LG << "P" << i << ": (" << x1[i] << ", " << y1[i] << "); guess (" + << x2[i] << ", " << y2[i] << "); (dx, dy): (" << (x2[i] - x1[i]) << ", " + << (y2[i] - y1[i]) << ")."; + } + if (options.use_normalized_intensities) { + LG << "Using normalized intensities."; + } + + // Bail early if the points are already outside. + if (!AllInBounds(image1, x1, y1)) { + result->termination = TrackRegionResult::SOURCE_OUT_OF_BOUNDS; + return; + } + if (!AllInBounds(image2, x2, y2)) { + result->termination = TrackRegionResult::DESTINATION_OUT_OF_BOUNDS; + return; + } + // TODO(keir): Check quads to ensure there is some area. + + // Keep a copy of the "original" guess for regularization. + double x2_original[4]; + double y2_original[4]; + for (int i = 0; i < 4; ++i) { + x2_original[i] = x2[i]; + y2_original[i] = y2[i]; + } + + // Prepare the image and gradient. + Array3Df image_and_gradient1; + Array3Df image_and_gradient2; + BlurredImageAndDerivativesChannels(image1, options.sigma, + &image_and_gradient1); + BlurredImageAndDerivativesChannels(image2, options.sigma, + &image_and_gradient2); + + // Possibly do a brute-force translation-only initialization. + if (SearchAreaTooBigForDescent(image2, x2, y2) && + options.use_brute_initialization) { + LG << "Running brute initialization..."; + BruteTranslationOnlyInitialize<Warp>(image_and_gradient1, + options.image1_mask, + image2, + options.num_extra_points, + options.use_normalized_intensities, + x1, y1, x2, y2); + for (int i = 0; i < 4; ++i) { + LG << "P" << i << ": (" << x1[i] << ", " << y1[i] << "); brute (" + << x2[i] << ", " << y2[i] << "); (dx, dy): (" << (x2[i] - x1[i]) + << ", " << (y2[i] - y1[i]) << ")."; + } + } + + // Prepare the initial warp parameters from the four correspondences. + // Note: This must happen after the brute initialization runs, since the + // brute initialization mutates x2 and y2 in place. + Warp warp(x1, y1, x2, y2); + + // Decide how many samples to use in the x and y dimensions. + int num_samples_x; + int num_samples_y; + PickSampling(x1, y1, x2, y2, &num_samples_x, &num_samples_y); + + + // Compute the warp from rectangular coordinates. + Mat3 canonical_homography = ComputeCanonicalHomography(x1, y1, + num_samples_x, + num_samples_y); + + ceres::Problem problem; + + // Construct the warp cost function. AutoDiffCostFunction takes ownership. + PixelDifferenceCostFunctor<Warp> *pixel_difference_cost_function = + new PixelDifferenceCostFunctor<Warp>(options, + image_and_gradient1, + image_and_gradient2, + canonical_homography, + num_samples_x, + num_samples_y, + warp); + problem.AddResidualBlock( + new ceres::AutoDiffCostFunction< + PixelDifferenceCostFunctor<Warp>, + ceres::DYNAMIC, + Warp::NUM_PARAMETERS>(pixel_difference_cost_function, + num_samples_x * num_samples_y), + NULL, + warp.parameters); + + // Construct the regularizing cost function + if (options.regularization_coefficient != 0.0) { + WarpRegularizingCostFunctor<Warp> *regularizing_warp_cost_function = + new WarpRegularizingCostFunctor<Warp>(options, + x1, y2, + x2_original, + y2_original, + warp); + + problem.AddResidualBlock( + new ceres::AutoDiffCostFunction< + WarpRegularizingCostFunctor<Warp>, + 8 /* num_residuals */, + Warp::NUM_PARAMETERS>(regularizing_warp_cost_function), + NULL, + warp.parameters); + } + + // Configure the solve. + ceres::Solver::Options solver_options; + solver_options.linear_solver_type = ceres::DENSE_QR; + solver_options.max_num_iterations = options.max_iterations; + solver_options.update_state_every_iteration = true; + solver_options.parameter_tolerance = 1e-16; + solver_options.function_tolerance = 1e-16; + + // Prevent the corners from going outside the destination image. + BoundaryCheckingCallback<Warp> callback(image2, warp, x1, y1); + solver_options.callbacks.push_back(&callback); + + // Run the solve. + ceres::Solver::Summary summary; + ceres::Solve(solver_options, &problem, &summary); + + LG << "Summary:\n" << summary.FullReport(); + + // Update the four points with the found solution; if the solver failed, then + // the warp parameters are the identity (so ignore failure). + // + // Also warp any extra points on the end of the array. + for (int i = 0; i < 4 + options.num_extra_points; ++i) { + warp.Forward(warp.parameters, x1[i], y1[i], x2 + i, y2 + i); + LG << "Warped point " << i << ": (" << x1[i] << ", " << y1[i] << ") -> (" + << x2[i] << ", " << y2[i] << "); (dx, dy): (" << (x2[i] - x1[i]) << ", " + << (y2[i] - y1[i]) << ")."; + } + + // TODO(keir): Update the result statistics. + // TODO(keir): Add a normalize-cross-correlation variant. + + CHECK_NE(summary.termination_type, ceres::USER_ABORT) << "Libmv bug."; + if (summary.termination_type == ceres::USER_ABORT) { + result->termination = TrackRegionResult::FELL_OUT_OF_BOUNDS; + return; + } +#define HANDLE_TERMINATION(termination_enum) \ + if (summary.termination_type == ceres::termination_enum) { \ + result->termination = TrackRegionResult::termination_enum; \ + return; \ + } + + // Avoid computing correlation for tracking failures. + HANDLE_TERMINATION(DID_NOT_RUN); + HANDLE_TERMINATION(NUMERICAL_FAILURE); + + // Otherwise, run a final correlation check. + if (options.minimum_correlation > 0.0) { + result->correlation = pixel_difference_cost_function-> + PearsonProductMomentCorrelationCoefficient(warp.parameters); + if (result->correlation < options.minimum_correlation) { + LG << "Failing with insufficient correlation."; + result->termination = TrackRegionResult::INSUFFICIENT_CORRELATION; + return; + } + } + + HANDLE_TERMINATION(PARAMETER_TOLERANCE); + HANDLE_TERMINATION(FUNCTION_TOLERANCE); + HANDLE_TERMINATION(GRADIENT_TOLERANCE); + HANDLE_TERMINATION(NO_CONVERGENCE); +#undef HANDLE_TERMINATION +}; + +void TrackRegion(const FloatImage &image1, + const FloatImage &image2, + const double *x1, const double *y1, + const TrackRegionOptions &options, + double *x2, double *y2, + TrackRegionResult *result) { + // Enum is necessary due to templated nature of autodiff. +#define HANDLE_MODE(mode_enum, mode_type) \ + if (options.mode == TrackRegionOptions::mode_enum) { \ + TemplatedTrackRegion<mode_type>(image1, image2, \ + x1, y1, \ + options, \ + x2, y2, \ + result); \ + return; \ + } + HANDLE_MODE(TRANSLATION, TranslationWarp); + HANDLE_MODE(TRANSLATION_SCALE, TranslationScaleWarp); + HANDLE_MODE(TRANSLATION_ROTATION, TranslationRotationWarp); + HANDLE_MODE(TRANSLATION_ROTATION_SCALE, TranslationRotationScaleWarp); + HANDLE_MODE(AFFINE, AffineWarp); + HANDLE_MODE(HOMOGRAPHY, HomographyWarp); +#undef HANDLE_MODE +} + +bool SamplePlanarPatch(const FloatImage &image, + const double *xs, const double *ys, + int num_samples_x, int num_samples_y, + FloatImage *patch, + double *warped_position_x, double *warped_position_y) { + // Bail early if the points are outside the image. + if (!AllInBounds(image, xs, ys)) { + LG << "Can't sample patch: out of bounds."; + return false; + } + + // Make the patch have the appropriate size, and match the depth of image. + patch->Resize(num_samples_y, num_samples_x, image.Depth()); + + // Compute the warp from rectangular coordinates. + Mat3 canonical_homography = ComputeCanonicalHomography(xs, ys, + num_samples_x, + num_samples_y); + + // Walk over the coordinates in the canonical space, sampling from the image + // in the original space and copying the result into the patch. + for (int r = 0; r < num_samples_y; ++r) { + for (int c = 0; c < num_samples_x; ++c) { + Vec3 image_position = canonical_homography * Vec3(c, r, 1); + image_position /= image_position(2); + SampleLinear(image, image_position(1), + image_position(0), + &(*patch)(r, c, 0)); + } + } + + Vec3 warped_position = canonical_homography.inverse() * Vec3(xs[4], ys[4], 1); + warped_position /= warped_position(2); + + *warped_position_x = warped_position(0); + *warped_position_y = warped_position(1); + + return true; +} + +} // namespace libmv diff --git a/extern/libmv/libmv/tracking/track_region.h b/extern/libmv/libmv/tracking/track_region.h new file mode 100644 index 00000000000..0de11239da6 --- /dev/null +++ b/extern/libmv/libmv/tracking/track_region.h @@ -0,0 +1,146 @@ +// Copyright (c) 2012 libmv authors. +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to +// deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +// sell copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +// IN THE SOFTWARE. + +#ifndef LIBMV_TRACKING_TRACK_REGION_H_ + +// Necessary for M_E when building with MSVC. +#define _USE_MATH_DEFINES + +#include "libmv/tracking/esm_region_tracker.h" + +#include "libmv/image/image.h" +#include "libmv/image/sample.h" +#include "libmv/numeric/numeric.h" + +namespace libmv { + +struct TrackRegionOptions { + TrackRegionOptions(); + + enum Mode { + TRANSLATION, + TRANSLATION_ROTATION, + TRANSLATION_SCALE, + TRANSLATION_ROTATION_SCALE, + AFFINE, + HOMOGRAPHY, + }; + Mode mode; + + double minimum_correlation; + int max_iterations; + + // Use the "Efficient Second-order Minimization" scheme. This increases + // convergence speed at the cost of more per-iteration work. + bool use_esm; + + // If true, apply a brute-force translation-only search before attempting the + // full search. This is not enabled if the destination image ("image2") is + // too small; in that case eithen the basin of attraction is close enough + // that the nearby minima is correct, or the search area is too small. + bool use_brute_initialization; + + // If true, normalize the image patches by their mean before doing the sum of + // squared error calculation. This is reasonable since the effect of + // increasing light intensity is multiplicative on the pixel intensities. + // + // Note: This does nearly double the solving time, so it is not advised to + // turn this on all the time. + bool use_normalized_intensities; + + // The size in pixels of the blur kernel used to both smooth the image and + // take the image derivative. + double sigma; + + // Extra points that should get transformed by the warp. This is useful + // because the actual warp parameters are not exposed. + int num_extra_points; + + // For motion models other than translation, the optimizer sometimes has + // trouble deciding what to do around flat areas in the cost function. This + // leads to the optimizer picking poor solutions near the minimum. Visually, + // the effect is that the quad corners jiggle around, even though the center + // of the patch is well estimated. regularization_coefficient controls a term + // in the sum of squared error cost that makes it expensive for the optimizer + // to pick a warp that changes the shape of the patch dramatically (e.g. + // rotating, scaling, skewing, etc). + // + // In particular it adds an 8-residual cost function to the optimization, + // where each corner induces 2 residuals: the difference between the warped + // and the initial guess. However, the patch centroids are subtracted so that + // only patch distortions are penalized. + // + // If zero, no regularization is used. + double regularization_coefficient; + + // If non-null, this is used as the pattern mask. It should match the size of + // image1, even though only values inside the image1 quad are examined. The + // values must be in the range 0.0 to 0.1. + FloatImage *image1_mask; +}; + +struct TrackRegionResult { + enum Termination { + // Ceres termination types, duplicated; though, not the int values. + PARAMETER_TOLERANCE, + FUNCTION_TOLERANCE, + GRADIENT_TOLERANCE, + NO_CONVERGENCE, + DID_NOT_RUN, + NUMERICAL_FAILURE, + + // Libmv specific errors. + SOURCE_OUT_OF_BOUNDS, + DESTINATION_OUT_OF_BOUNDS, + FELL_OUT_OF_BOUNDS, + INSUFFICIENT_CORRELATION, + CONFIGURATION_ERROR, + }; + Termination termination; + + int num_iterations; + double correlation; + + // Final parameters? + bool used_brute_translation_initialization; +}; + +// Always needs 4 correspondences. +void TrackRegion(const FloatImage &image1, + const FloatImage &image2, + const double *x1, const double *y1, + const TrackRegionOptions &options, + double *x2, double *y2, + TrackRegionResult *result); + +// Sample a "canonical" version of the passed planar patch, using bilinear +// sampling. The passed corners must be within the image, and have at least two +// pixels of border around them. (so e.g. a corner of the patch cannot lie +// directly on the edge of the image). Four corners are always required. All +// channels are interpolated. +bool SamplePlanarPatch(const FloatImage &image, + const double *xs, const double *ys, + int num_samples_x, int num_samples_y, + FloatImage *patch, + double *warped_position_x, double *warped_position_y); + +} // namespace libmv + +#endif // LIBMV_TRACKING_TRACK_REGION_H_ |