Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/libmv/libmv/tracking/track_region.cc')
-rw-r--r--intern/libmv/libmv/tracking/track_region.cc1601
1 files changed, 1601 insertions, 0 deletions
diff --git a/intern/libmv/libmv/tracking/track_region.cc b/intern/libmv/libmv/tracking/track_region.cc
new file mode 100644
index 00000000000..ef6dac65236
--- /dev/null
+++ b/intern/libmv/libmv/tracking/track_region.cc
@@ -0,0 +1,1601 @@
+// Copyright (c) 2012 libmv authors.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+//
+// Author: mierle@gmail.com (Keir Mierle)
+//
+// TODO(keir): While this tracking code works rather well, it has some
+// outragous inefficiencies. There is probably a 5-10x speedup to be had if a
+// smart coder went through the TODO's and made the suggested performance
+// enhancements.
+
+// Necessary for M_E when building with MSVC.
+#define _USE_MATH_DEFINES
+
+#include "libmv/tracking/track_region.h"
+
+#include <Eigen/SVD>
+#include <Eigen/QR>
+#include <iostream>
+#include "ceres/ceres.h"
+#include "libmv/logging/logging.h"
+#include "libmv/image/image.h"
+#include "libmv/image/sample.h"
+#include "libmv/image/convolve.h"
+#include "libmv/multiview/homography.h"
+#include "libmv/numeric/numeric.h"
+
+// Expand the Jet functionality of Ceres to allow mixed numeric/autodiff.
+//
+// TODO(keir): Push this (or something similar) into upstream Ceres.
+namespace ceres {
+
+// A jet traits class to make it easier to work with mixed auto / numeric diff.
+template<typename T>
+struct JetOps {
+ static bool IsScalar() {
+ return true;
+ }
+ static T GetScalar(const T& t) {
+ return t;
+ }
+ static void SetScalar(const T& scalar, T* t) {
+ *t = scalar;
+ }
+ static void ScaleDerivative(double scale_by, T *value) {
+ // For double, there is no derivative to scale.
+ (void) scale_by; // Ignored.
+ (void) value; // Ignored.
+ }
+};
+
+template<typename T, int N>
+struct JetOps<Jet<T, N> > {
+ static bool IsScalar() {
+ return false;
+ }
+ static T GetScalar(const Jet<T, N>& t) {
+ return t.a;
+ }
+ static void SetScalar(const T& scalar, Jet<T, N>* t) {
+ t->a = scalar;
+ }
+ static void ScaleDerivative(double scale_by, Jet<T, N> *value) {
+ value->v *= scale_by;
+ }
+};
+
+template<typename FunctionType, int kNumArgs, typename ArgumentType>
+struct Chain {
+ static ArgumentType Rule(const FunctionType &f,
+ const FunctionType dfdx[kNumArgs],
+ const ArgumentType x[kNumArgs]) {
+ // In the default case of scalars, there's nothing to do since there are no
+ // derivatives to propagate.
+ (void) dfdx; // Ignored.
+ (void) x; // Ignored.
+ return f;
+ }
+};
+
+// XXX Add documentation here!
+template<typename FunctionType, int kNumArgs, typename T, int N>
+struct Chain<FunctionType, kNumArgs, Jet<T, N> > {
+ static Jet<T, N> Rule(const FunctionType &f,
+ const FunctionType dfdx[kNumArgs],
+ const Jet<T, N> x[kNumArgs]) {
+ // x is itself a function of another variable ("z"); what this function
+ // needs to return is "f", but with the derivative with respect to z
+ // attached to the jet. So combine the derivative part of x's jets to form
+ // a Jacobian matrix between x and z (i.e. dx/dz).
+ Eigen::Matrix<T, kNumArgs, N> dxdz;
+ for (int i = 0; i < kNumArgs; ++i) {
+ dxdz.row(i) = x[i].v.transpose();
+ }
+
+ // Map the input gradient dfdx into an Eigen row vector.
+ Eigen::Map<const Eigen::Matrix<FunctionType, 1, kNumArgs> >
+ vector_dfdx(dfdx, 1, kNumArgs);
+
+ // Now apply the chain rule to obtain df/dz. Combine the derivative with
+ // the scalar part to obtain f with full derivative information.
+ Jet<T, N> jet_f;
+ jet_f.a = f;
+ jet_f.v = vector_dfdx.template cast<T>() * dxdz; // Also known as dfdz.
+ return jet_f;
+ }
+};
+
+} // namespace ceres
+
+namespace libmv {
+
+using ceres::Jet;
+using ceres::JetOps;
+using ceres::Chain;
+
+TrackRegionOptions::TrackRegionOptions()
+ : mode(TRANSLATION),
+ minimum_correlation(0),
+ max_iterations(20),
+ use_esm(true),
+ use_brute_initialization(true),
+ use_normalized_intensities(false),
+ sigma(0.9),
+ num_extra_points(0),
+ regularization_coefficient(0.0),
+ minimum_corner_shift_tolerance_pixels(0.005),
+ image1_mask(NULL) {
+}
+
+namespace {
+
+// TODO(keir): Consider adding padding.
+template<typename T>
+bool InBounds(const FloatImage &image,
+ const T &x,
+ const T &y) {
+ return 0.0 <= x && x < image.Width() &&
+ 0.0 <= y && y < image.Height();
+}
+
+bool AllInBounds(const FloatImage &image,
+ const double *x,
+ const double *y) {
+ for (int i = 0; i < 4; ++i) {
+ if (!InBounds(image, x[i], y[i])) {
+ return false;
+ }
+ }
+ return true;
+}
+
+// Sample the image at position (x, y) but use the gradient, if present, to
+// propagate derivatives from x and y. This is needed to integrate the numeric
+// image gradients with Ceres's autodiff framework.
+template<typename T>
+static T SampleWithDerivative(const FloatImage &image_and_gradient,
+ const T &x,
+ const T &y) {
+ float scalar_x = JetOps<T>::GetScalar(x);
+ float scalar_y = JetOps<T>::GetScalar(y);
+
+ // Note that sample[1] and sample[2] will be uninitialized in the scalar
+ // case, but that is not an issue because the Chain::Rule below will not read
+ // the uninitialized values.
+ float sample[3];
+ if (JetOps<T>::IsScalar()) {
+ // For the scalar case, only sample the image.
+ sample[0] = SampleLinear(image_and_gradient, scalar_y, scalar_x, 0);
+ } else {
+ // For the derivative case, sample the gradient as well.
+ SampleLinear(image_and_gradient, scalar_y, scalar_x, sample);
+ }
+ T xy[2] = { x, y };
+ return Chain<float, 2, T>::Rule(sample[0], sample + 1, xy);
+}
+
+template<typename Warp>
+class TerminationCheckingCallback : public ceres::IterationCallback {
+ public:
+ TerminationCheckingCallback(const TrackRegionOptions &options,
+ const FloatImage& image2,
+ const Warp &warp,
+ const double *x1, const double *y1)
+ : options_(options), image2_(image2), warp_(warp), x1_(x1), y1_(y1),
+ have_last_successful_step_(false) {}
+
+ virtual ceres::CallbackReturnType operator()(
+ const ceres::IterationSummary& summary) {
+ // If the step wasn't successful, there's nothing to do.
+ if (!summary.step_is_successful) {
+ return ceres::SOLVER_CONTINUE;
+ }
+ // Warp the original 4 points with the current warp into image2.
+ double x2[4];
+ double y2[4];
+ for (int i = 0; i < 4; ++i) {
+ warp_.Forward(warp_.parameters, x1_[i], y1_[i], x2 + i, y2 + i);
+ }
+ // Ensure the corners are all in bounds.
+ if (!AllInBounds(image2_, x2, y2)) {
+ LG << "Successful step fell outside of the pattern bounds; aborting.";
+ return ceres::SOLVER_ABORT;
+ }
+
+ // Ensure the minimizer is making large enough shifts to bother continuing.
+ // Ideally, this check would happen on the parameters themselves which
+ // Ceres supports directly; however, the mapping from parameter change
+ // magnitude to corner movement in pixels is not a simple norm. Hence, the
+ // need for a stateful callback which tracks the last successful set of
+ // parameters (and the position of the projected patch corners).
+ if (have_last_successful_step_) {
+ // Compute the maximum shift of any corner in pixels since the last
+ // successful iteration.
+ double max_change_pixels = 0;
+ for (int i = 0; i < 4; ++i) {
+ double dx = x2[i] - x2_last_successful_[i];
+ double dy = y2[i] - y2_last_successful_[i];
+ double change_pixels = dx*dx + dy*dy;
+ if (change_pixels > max_change_pixels) {
+ max_change_pixels = change_pixels;
+ }
+ }
+ max_change_pixels = sqrt(max_change_pixels);
+ LG << "Max patch corner shift is " << max_change_pixels;
+
+ // Bail if the shift is too small.
+ if (max_change_pixels < options_.minimum_corner_shift_tolerance_pixels) {
+ LG << "Max patch corner shift is " << max_change_pixels
+ << " from the last iteration; returning success.";
+ return ceres::SOLVER_TERMINATE_SUCCESSFULLY;
+ }
+ }
+
+ // Save the projected corners for checking on the next successful iteration.
+ for (int i = 0; i < 4; ++i) {
+ x2_last_successful_[i] = x2[i];
+ y2_last_successful_[i] = y2[i];
+ }
+ have_last_successful_step_ = true;
+ return ceres::SOLVER_CONTINUE;
+ }
+
+ private:
+ const TrackRegionOptions &options_;
+ const FloatImage &image2_;
+ const Warp &warp_;
+ const double *x1_;
+ const double *y1_;
+
+ bool have_last_successful_step_;
+ double x2_last_successful_[4];
+ double y2_last_successful_[4];
+};
+
+template<typename Warp>
+class PixelDifferenceCostFunctor {
+ public:
+ PixelDifferenceCostFunctor(const TrackRegionOptions &options,
+ const FloatImage &image_and_gradient1,
+ const FloatImage &image_and_gradient2,
+ const Mat3 &canonical_to_image1,
+ int num_samples_x,
+ int num_samples_y,
+ const Warp &warp)
+ : options_(options),
+ image_and_gradient1_(image_and_gradient1),
+ image_and_gradient2_(image_and_gradient2),
+ canonical_to_image1_(canonical_to_image1),
+ num_samples_x_(num_samples_x),
+ num_samples_y_(num_samples_y),
+ warp_(warp),
+ pattern_and_gradient_(num_samples_y_, num_samples_x_, 3),
+ pattern_positions_(num_samples_y_, num_samples_x_, 2),
+ pattern_mask_(num_samples_y_, num_samples_x_, 1) {
+ ComputeCanonicalPatchAndNormalizer();
+ }
+
+ void ComputeCanonicalPatchAndNormalizer() {
+ src_mean_ = 0.0;
+ double num_samples = 0.0;
+ for (int r = 0; r < num_samples_y_; ++r) {
+ for (int c = 0; c < num_samples_x_; ++c) {
+ // Compute the position; cache it.
+ Vec3 image_position = canonical_to_image1_ * Vec3(c, r, 1);
+ image_position /= image_position(2);
+ pattern_positions_(r, c, 0) = image_position(0);
+ pattern_positions_(r, c, 1) = image_position(1);
+
+ // Sample the pattern and gradients.
+ SampleLinear(image_and_gradient1_,
+ image_position(1), // SampleLinear is r, c.
+ image_position(0),
+ &pattern_and_gradient_(r, c, 0));
+
+ // Sample sample the mask.
+ double mask_value = 1.0;
+ if (options_.image1_mask != NULL) {
+ SampleLinear(*options_.image1_mask,
+ image_position(1), // SampleLinear is r, c.
+ image_position(0),
+ &pattern_mask_(r, c, 0));
+ mask_value = pattern_mask_(r, c);
+ }
+ src_mean_ += pattern_and_gradient_(r, c, 0) * mask_value;
+ num_samples += mask_value;
+ }
+ }
+ src_mean_ /= num_samples;
+ }
+
+ template<typename T>
+ bool operator()(const T *warp_parameters, T *residuals) const {
+ if (options_.image1_mask != NULL) {
+ VLOG(2) << "Using a mask.";
+ }
+ for (int i = 0; i < Warp::NUM_PARAMETERS; ++i) {
+ VLOG(2) << "warp_parameters[" << i << "]: " << warp_parameters[i];
+ }
+
+ T dst_mean = T(1.0);
+ if (options_.use_normalized_intensities) {
+ ComputeNormalizingCoefficient(warp_parameters,
+ &dst_mean);
+ }
+
+ int cursor = 0;
+ for (int r = 0; r < num_samples_y_; ++r) {
+ for (int c = 0; c < num_samples_x_; ++c) {
+ // Use the pre-computed image1 position.
+ Vec2 image1_position(pattern_positions_(r, c, 0),
+ pattern_positions_(r, c, 1));
+
+ // Sample the mask early; if it's zero, this pixel has no effect. This
+ // allows early bailout from the expensive sampling that happens below.
+ //
+ // Note that partial masks are not short circuited. To see why short
+ // circuiting produces bitwise-exact same results, consider that the
+ // residual for each pixel is
+ //
+ // residual = mask * (src - dst) ,
+ //
+ // and for jets, multiplying by a scalar multiplies the derivative
+ // components by the scalar as well. Therefore, if the mask is exactly
+ // zero, then so too will the final residual and derivatives.
+ double mask_value = 1.0;
+ if (options_.image1_mask != NULL) {
+ mask_value = pattern_mask_(r, c);
+ if (mask_value == 0.0) {
+ residuals[cursor++] = T(0.0);
+ continue;
+ }
+ }
+
+ // Compute the location of the destination pixel.
+ T image2_position[2];
+ warp_.Forward(warp_parameters,
+ T(image1_position[0]),
+ T(image1_position[1]),
+ &image2_position[0],
+ &image2_position[1]);
+
+ // Sample the destination, propagating derivatives.
+ T dst_sample = SampleWithDerivative(image_and_gradient2_,
+ image2_position[0],
+ image2_position[1]);
+
+ // Sample the source. This is made complicated by ESM mode.
+ T src_sample;
+ if (options_.use_esm && !JetOps<T>::IsScalar()) {
+ // In ESM mode, the derivative of the source is also taken into
+ // account. This changes the linearization in a way that causes
+ // better convergence. Copy the derivative of the warp parameters
+ // onto the jets for the image1 position. This is the ESM hack.
+ T image1_position_jet[2] = {
+ image2_position[0], // Order is x, y. This matches the
+ image2_position[1] // derivative order in the patch.
+ };
+ JetOps<T>::SetScalar(image1_position[0], image1_position_jet + 0);
+ JetOps<T>::SetScalar(image1_position[1], image1_position_jet + 1);
+
+ // Now that the image1 positions have the jets applied from the
+ // image2 position (the ESM hack), chain the image gradients to
+ // obtain a sample with the derivative with respect to the warp
+ // parameters attached.
+ src_sample = Chain<float, 2, T>::Rule(pattern_and_gradient_(r, c),
+ &pattern_and_gradient_(r, c, 1),
+ image1_position_jet);
+
+ // The jacobians for these should be averaged. Due to the subtraction
+ // below, flip the sign of the src derivative so that the effect
+ // after subtraction of the jets is that they are averaged.
+ JetOps<T>::ScaleDerivative(-0.5, &src_sample);
+ JetOps<T>::ScaleDerivative(0.5, &dst_sample);
+ } else {
+ // This is the traditional, forward-mode KLT solution.
+ src_sample = T(pattern_and_gradient_(r, c));
+ }
+
+ // Normalize the samples by the mean values of each signal. The typical
+ // light model assumes multiplicative intensity changes with changing
+ // light, so this is a reasonable choice. Note that dst_mean has
+ // derivative information attached thanks to autodiff.
+ if (options_.use_normalized_intensities) {
+ src_sample /= T(src_mean_);
+ dst_sample /= dst_mean;
+ }
+
+ // The difference is the error.
+ T error = src_sample - dst_sample;
+
+ // Weight the error by the mask, if one is present.
+ if (options_.image1_mask != NULL) {
+ error *= T(mask_value);
+ }
+ residuals[cursor++] = error;
+ }
+ }
+ return true;
+ }
+
+ // For normalized matching, the average and
+ template<typename T>
+ void ComputeNormalizingCoefficient(const T *warp_parameters,
+ T *dst_mean) const {
+ *dst_mean = T(0.0);
+ double num_samples = 0.0;
+ for (int r = 0; r < num_samples_y_; ++r) {
+ for (int c = 0; c < num_samples_x_; ++c) {
+ // Use the pre-computed image1 position.
+ Vec2 image1_position(pattern_positions_(r, c, 0),
+ pattern_positions_(r, c, 1));
+
+ // Sample the mask early; if it's zero, this pixel has no effect. This
+ // allows early bailout from the expensive sampling that happens below.
+ double mask_value = 1.0;
+ if (options_.image1_mask != NULL) {
+ mask_value = pattern_mask_(r, c);
+ if (mask_value == 0.0) {
+ continue;
+ }
+ }
+
+ // Compute the location of the destination pixel.
+ T image2_position[2];
+ warp_.Forward(warp_parameters,
+ T(image1_position[0]),
+ T(image1_position[1]),
+ &image2_position[0],
+ &image2_position[1]);
+
+
+ // Sample the destination, propagating derivatives.
+ // TODO(keir): This accumulation can, surprisingly, be done as a
+ // pre-pass by using integral images. This is complicated by the need
+ // to store the jets in the integral image, but it is possible.
+ T dst_sample = SampleWithDerivative(image_and_gradient2_,
+ image2_position[0],
+ image2_position[1]);
+
+ // Weight the sample by the mask, if one is present.
+ if (options_.image1_mask != NULL) {
+ dst_sample *= T(mask_value);
+ }
+
+ *dst_mean += dst_sample;
+ num_samples += mask_value;
+ }
+ }
+ *dst_mean /= T(num_samples);
+ LG << "Normalization for dst:" << *dst_mean;
+ }
+
+ // TODO(keir): Consider also computing the cost here.
+ double PearsonProductMomentCorrelationCoefficient(
+ const double *warp_parameters) const {
+ for (int i = 0; i < Warp::NUM_PARAMETERS; ++i) {
+ VLOG(2) << "Correlation warp_parameters[" << i << "]: "
+ << warp_parameters[i];
+ }
+
+ // The single-pass PMCC computation is somewhat numerically unstable, but
+ // it's sufficient for the tracker.
+ double sX = 0, sY = 0, sXX = 0, sYY = 0, sXY = 0;
+
+ // Due to masking, it's important to account for fractional samples.
+ // For example, samples with a 50% mask are counted as a half sample.
+ double num_samples = 0;
+
+ for (int r = 0; r < num_samples_y_; ++r) {
+ for (int c = 0; c < num_samples_x_; ++c) {
+ // Use the pre-computed image1 position.
+ Vec2 image1_position(pattern_positions_(r, c, 0),
+ pattern_positions_(r, c, 1));
+
+ double mask_value = 1.0;
+ if (options_.image1_mask != NULL) {
+ mask_value = pattern_mask_(r, c);
+ if (mask_value == 0.0) {
+ continue;
+ }
+ }
+
+ // Compute the location of the destination pixel.
+ double image2_position[2];
+ warp_.Forward(warp_parameters,
+ image1_position[0],
+ image1_position[1],
+ &image2_position[0],
+ &image2_position[1]);
+
+ double x = pattern_and_gradient_(r, c);
+ double y = SampleLinear(image_and_gradient2_,
+ image2_position[1], // SampleLinear is r, c.
+ image2_position[0]);
+
+ // Weight the signals by the mask, if one is present.
+ if (options_.image1_mask != NULL) {
+ x *= mask_value;
+ y *= mask_value;
+ num_samples += mask_value;
+ } else {
+ num_samples++;
+ }
+ sX += x;
+ sY += y;
+ sXX += x*x;
+ sYY += y*y;
+ sXY += x*y;
+ }
+ }
+ // Normalize.
+ sX /= num_samples;
+ sY /= num_samples;
+ sXX /= num_samples;
+ sYY /= num_samples;
+ sXY /= num_samples;
+
+ double var_x = sXX - sX*sX;
+ double var_y = sYY - sY*sY;
+ double covariance_xy = sXY - sX*sY;
+
+ double correlation = covariance_xy / sqrt(var_x * var_y);
+ LG << "Covariance xy: " << covariance_xy
+ << ", var 1: " << var_x << ", var 2: " << var_y
+ << ", correlation: " << correlation;
+ return correlation;
+ }
+
+ private:
+ const TrackRegionOptions &options_;
+ const FloatImage &image_and_gradient1_;
+ const FloatImage &image_and_gradient2_;
+ const Mat3 &canonical_to_image1_;
+ int num_samples_x_;
+ int num_samples_y_;
+ const Warp &warp_;
+ double src_mean_;
+ FloatImage pattern_and_gradient_;
+
+ // This contains the position from where the cached pattern samples were
+ // taken from. This is also used to warp from src to dest without going from
+ // canonical pixels to src first.
+ FloatImage pattern_positions_;
+
+ FloatImage pattern_mask_;
+};
+
+template<typename Warp>
+class WarpRegularizingCostFunctor {
+ public:
+ WarpRegularizingCostFunctor(const TrackRegionOptions &options,
+ const double *x1,
+ const double *y1,
+ const double *x2_original,
+ const double *y2_original,
+ const Warp &warp)
+ : options_(options),
+ x1_(x1),
+ y1_(y1),
+ x2_original_(x2_original),
+ y2_original_(y2_original),
+ warp_(warp) {
+ // Compute the centroid of the first guess quad.
+ // TODO(keir): Use Quad class here.
+ original_centroid_[0] = 0.0;
+ original_centroid_[1] = 0.0;
+ for (int i = 0; i < 4; ++i) {
+ original_centroid_[0] += x2_original[i];
+ original_centroid_[1] += y2_original[i];
+ }
+ original_centroid_[0] /= 4;
+ original_centroid_[1] /= 4;
+ }
+
+ template<typename T>
+ bool operator()(const T *warp_parameters, T *residuals) const {
+ T dst_centroid[2] = { T(0.0), T(0.0) };
+ for (int i = 0; i < 4; ++i) {
+ T image1_position[2] = { T(x1_[i]), T(y1_[i]) };
+ T image2_position[2];
+ warp_.Forward(warp_parameters,
+ T(x1_[i]),
+ T(y1_[i]),
+ &image2_position[0],
+ &image2_position[1]);
+
+ // Subtract the positions. Note that this ignores the centroids.
+ residuals[2 * i + 0] = image2_position[0] - image1_position[0];
+ residuals[2 * i + 1] = image2_position[1] - image1_position[1];
+
+ // Accumulate the dst centroid.
+ dst_centroid[0] += image2_position[0];
+ dst_centroid[1] += image2_position[1];
+ }
+ dst_centroid[0] /= T(4.0);
+ dst_centroid[1] /= T(4.0);
+
+ // Adjust for the centroids.
+ for (int i = 0; i < 4; ++i) {
+ residuals[2 * i + 0] += T(original_centroid_[0]) - dst_centroid[0];
+ residuals[2 * i + 1] += T(original_centroid_[1]) - dst_centroid[1];
+ }
+
+ // Reweight the residuals.
+ for (int i = 0; i < 8; ++i) {
+ residuals[i] *= T(options_.regularization_coefficient);
+ }
+
+ return true;
+ }
+
+ const TrackRegionOptions &options_;
+ const double *x1_;
+ const double *y1_;
+ const double *x2_original_;
+ const double *y2_original_;
+ double original_centroid_[2];
+ const Warp &warp_;
+};
+
+// Compute the warp from rectangular coordinates, where one corner is the
+// origin, and the opposite corner is at (num_samples_x, num_samples_y).
+Mat3 ComputeCanonicalHomography(const double *x1,
+ const double *y1,
+ int num_samples_x,
+ int num_samples_y) {
+ Mat canonical(2, 4);
+ canonical << 0, num_samples_x, num_samples_x, 0,
+ 0, 0, num_samples_y, num_samples_y;
+
+ Mat xy1(2, 4);
+ xy1 << x1[0], x1[1], x1[2], x1[3],
+ y1[0], y1[1], y1[2], y1[3];
+
+ Mat3 H;
+ if (!Homography2DFromCorrespondencesLinear(canonical, xy1, &H, 1e-12)) {
+ LG << "Couldn't construct homography.";
+ }
+ return H;
+}
+
+class Quad {
+ public:
+ Quad(const double *x, const double *y) : x_(x), y_(y) {
+ // Compute the centroid and store it.
+ centroid_ = Vec2(0.0, 0.0);
+ for (int i = 0; i < 4; ++i) {
+ centroid_ += Vec2(x_[i], y_[i]);
+ }
+ centroid_ /= 4.0;
+ }
+
+ // The centroid of the four points representing the quad.
+ const Vec2& Centroid() const {
+ return centroid_;
+ }
+
+ // The average magnitude of the four points relative to the centroid.
+ double Scale() const {
+ double scale = 0.0;
+ for (int i = 0; i < 4; ++i) {
+ scale += (Vec2(x_[i], y_[i]) - Centroid()).norm();
+ }
+ return scale / 4.0;
+ }
+
+ Vec2 CornerRelativeToCentroid(int i) const {
+ return Vec2(x_[i], y_[i]) - centroid_;
+ }
+
+ private:
+ const double *x_;
+ const double *y_;
+ Vec2 centroid_;
+};
+
+struct TranslationWarp {
+ TranslationWarp(const double *x1, const double *y1,
+ const double *x2, const double *y2) {
+ Vec2 t = Quad(x2, y2).Centroid() - Quad(x1, y1).Centroid();
+ parameters[0] = t[0];
+ parameters[1] = t[1];
+ }
+
+ template<typename T>
+ void Forward(const T *warp_parameters,
+ const T &x1, const T& y1, T *x2, T* y2) const {
+ *x2 = x1 + warp_parameters[0];
+ *y2 = y1 + warp_parameters[1];
+ }
+
+ // Translation x, translation y.
+ enum { NUM_PARAMETERS = 2 };
+ double parameters[NUM_PARAMETERS];
+};
+
+struct TranslationScaleWarp {
+ TranslationScaleWarp(const double *x1, const double *y1,
+ const double *x2, const double *y2)
+ : q1(x1, y1) {
+ Quad q2(x2, y2);
+
+ // The difference in centroids is the best guess for translation.
+ Vec2 t = q2.Centroid() - q1.Centroid();
+ parameters[0] = t[0];
+ parameters[1] = t[1];
+
+ // The difference in scales is the estimate for the scale.
+ parameters[2] = 1.0 - q2.Scale() / q1.Scale();
+ }
+
+ // The strange way of parameterizing the translation and scaling is to make
+ // the knobs that the optimizer sees easy to adjust. This is less important
+ // for the scaling case than the rotation case.
+ template<typename T>
+ void Forward(const T *warp_parameters,
+ const T &x1, const T& y1, T *x2, T* y2) const {
+ // Make the centroid of Q1 the origin.
+ const T x1_origin = x1 - q1.Centroid()(0);
+ const T y1_origin = y1 - q1.Centroid()(1);
+
+ // Scale uniformly about the origin.
+ const T scale = 1.0 + warp_parameters[2];
+ const T x1_origin_scaled = scale * x1_origin;
+ const T y1_origin_scaled = scale * y1_origin;
+
+ // Translate back into the space of Q1 (but scaled).
+ const T x1_scaled = x1_origin_scaled + q1.Centroid()(0);
+ const T y1_scaled = y1_origin_scaled + q1.Centroid()(1);
+
+ // Translate into the space of Q2.
+ *x2 = x1_scaled + warp_parameters[0];
+ *y2 = y1_scaled + warp_parameters[1];
+ }
+
+ // Translation x, translation y, scale.
+ enum { NUM_PARAMETERS = 3 };
+ double parameters[NUM_PARAMETERS];
+
+ Quad q1;
+};
+
+// Assumes the given points are already zero-centroid and the same size.
+Mat2 OrthogonalProcrustes(const Mat2 &correlation_matrix) {
+ Eigen::JacobiSVD<Mat2> svd(correlation_matrix,
+ Eigen::ComputeFullU | Eigen::ComputeFullV);
+ return svd.matrixV() * svd.matrixU().transpose();
+}
+
+struct TranslationRotationWarp {
+ TranslationRotationWarp(const double *x1, const double *y1,
+ const double *x2, const double *y2)
+ : q1(x1, y1) {
+ Quad q2(x2, y2);
+
+ // The difference in centroids is the best guess for translation.
+ Vec2 t = q2.Centroid() - q1.Centroid();
+ parameters[0] = t[0];
+ parameters[1] = t[1];
+
+ // Obtain the rotation via orthorgonal procrustes.
+ Mat2 correlation_matrix = Mat2::Zero();
+ for (int i = 0; i < 4; ++i) {
+ correlation_matrix += q1.CornerRelativeToCentroid(i) *
+ q2.CornerRelativeToCentroid(i).transpose();
+ }
+ Mat2 R = OrthogonalProcrustes(correlation_matrix);
+ parameters[2] = atan2(R(1, 0), R(0, 0));
+
+ LG << "Correlation_matrix:\n" << correlation_matrix;
+ LG << "R:\n" << R;
+ LG << "Theta:" << parameters[2];
+ }
+
+ // The strange way of parameterizing the translation and rotation is to make
+ // the knobs that the optimizer sees easy to adjust. The reason is that while
+ // it is always the case that it is possible to express composed rotations
+ // and translations as a single translation and rotation, the numerical
+ // values needed for the composition are often large in magnitude. This is
+ // enough to throw off any minimizer, since it must do the equivalent of
+ // compose rotations and translations.
+ //
+ // Instead, use the parameterization below that offers a parameterization
+ // that exposes the degrees of freedom in a way amenable to optimization.
+ template<typename T>
+ void Forward(const T *warp_parameters,
+ const T &x1, const T& y1, T *x2, T* y2) const {
+ // Make the centroid of Q1 the origin.
+ const T x1_origin = x1 - q1.Centroid()(0);
+ const T y1_origin = y1 - q1.Centroid()(1);
+
+ // Rotate about the origin (i.e. centroid of Q1).
+ const T theta = warp_parameters[2];
+ const T costheta = cos(theta);
+ const T sintheta = sin(theta);
+ const T x1_origin_rotated = costheta * x1_origin - sintheta * y1_origin;
+ const T y1_origin_rotated = sintheta * x1_origin + costheta * y1_origin;
+
+ // Translate back into the space of Q1 (but scaled).
+ const T x1_rotated = x1_origin_rotated + q1.Centroid()(0);
+ const T y1_rotated = y1_origin_rotated + q1.Centroid()(1);
+
+ // Translate into the space of Q2.
+ *x2 = x1_rotated + warp_parameters[0];
+ *y2 = y1_rotated + warp_parameters[1];
+ }
+
+ // Translation x, translation y, rotation about the center of Q1 degrees.
+ enum { NUM_PARAMETERS = 3 };
+ double parameters[NUM_PARAMETERS];
+
+ Quad q1;
+};
+
+struct TranslationRotationScaleWarp {
+ TranslationRotationScaleWarp(const double *x1, const double *y1,
+ const double *x2, const double *y2)
+ : q1(x1, y1) {
+ Quad q2(x2, y2);
+
+ // The difference in centroids is the best guess for translation.
+ Vec2 t = q2.Centroid() - q1.Centroid();
+ parameters[0] = t[0];
+ parameters[1] = t[1];
+
+ // The difference in scales is the estimate for the scale.
+ parameters[2] = 1.0 - q2.Scale() / q1.Scale();
+
+ // Obtain the rotation via orthorgonal procrustes.
+ Mat2 correlation_matrix = Mat2::Zero();
+ for (int i = 0; i < 4; ++i) {
+ correlation_matrix += q1.CornerRelativeToCentroid(i) *
+ q2.CornerRelativeToCentroid(i).transpose();
+ }
+ Mat2 R = OrthogonalProcrustes(correlation_matrix);
+ parameters[3] = atan2(R(1, 0), R(0, 0));
+
+ LG << "Correlation_matrix:\n" << correlation_matrix;
+ LG << "R:\n" << R;
+ LG << "Theta:" << parameters[3];
+ }
+
+ // The strange way of parameterizing the translation and rotation is to make
+ // the knobs that the optimizer sees easy to adjust. The reason is that while
+ // it is always the case that it is possible to express composed rotations
+ // and translations as a single translation and rotation, the numerical
+ // values needed for the composition are often large in magnitude. This is
+ // enough to throw off any minimizer, since it must do the equivalent of
+ // compose rotations and translations.
+ //
+ // Instead, use the parameterization below that offers a parameterization
+ // that exposes the degrees of freedom in a way amenable to optimization.
+ template<typename T>
+ void Forward(const T *warp_parameters,
+ const T &x1, const T& y1, T *x2, T* y2) const {
+ // Make the centroid of Q1 the origin.
+ const T x1_origin = x1 - q1.Centroid()(0);
+ const T y1_origin = y1 - q1.Centroid()(1);
+
+ // Rotate about the origin (i.e. centroid of Q1).
+ const T theta = warp_parameters[3];
+ const T costheta = cos(theta);
+ const T sintheta = sin(theta);
+ const T x1_origin_rotated = costheta * x1_origin - sintheta * y1_origin;
+ const T y1_origin_rotated = sintheta * x1_origin + costheta * y1_origin;
+
+ // Scale uniformly about the origin.
+ const T scale = 1.0 + warp_parameters[2];
+ const T x1_origin_rotated_scaled = scale * x1_origin_rotated;
+ const T y1_origin_rotated_scaled = scale * y1_origin_rotated;
+
+ // Translate back into the space of Q1 (but scaled and rotated).
+ const T x1_rotated_scaled = x1_origin_rotated_scaled + q1.Centroid()(0);
+ const T y1_rotated_scaled = y1_origin_rotated_scaled + q1.Centroid()(1);
+
+ // Translate into the space of Q2.
+ *x2 = x1_rotated_scaled + warp_parameters[0];
+ *y2 = y1_rotated_scaled + warp_parameters[1];
+ }
+
+ // Translation x, translation y, rotation about the center of Q1 degrees,
+ // scale.
+ enum { NUM_PARAMETERS = 4 };
+ double parameters[NUM_PARAMETERS];
+
+ Quad q1;
+};
+
+struct AffineWarp {
+ AffineWarp(const double *x1, const double *y1,
+ const double *x2, const double *y2)
+ : q1(x1, y1) {
+ Quad q2(x2, y2);
+
+ // The difference in centroids is the best guess for translation.
+ Vec2 t = q2.Centroid() - q1.Centroid();
+ parameters[0] = t[0];
+ parameters[1] = t[1];
+
+ // Estimate the four affine parameters with the usual least squares.
+ Mat Q1(8, 4);
+ Vec Q2(8);
+ for (int i = 0; i < 4; ++i) {
+ Vec2 v1 = q1.CornerRelativeToCentroid(i);
+ Vec2 v2 = q2.CornerRelativeToCentroid(i);
+
+ Q1.row(2 * i + 0) << v1[0], v1[1], 0, 0;
+ Q1.row(2 * i + 1) << 0, 0, v1[0], v1[1];
+
+ Q2(2 * i + 0) = v2[0];
+ Q2(2 * i + 1) = v2[1];
+ }
+
+ // TODO(keir): Check solution quality.
+ Vec4 a = Q1.jacobiSvd(Eigen::ComputeThinU | Eigen::ComputeThinV).solve(Q2);
+ parameters[2] = a[0];
+ parameters[3] = a[1];
+ parameters[4] = a[2];
+ parameters[5] = a[3];
+
+ LG << "a:" << a.transpose();
+ LG << "t:" << t.transpose();
+ }
+
+ // See comments in other parameterizations about why the centroid is used.
+ template<typename T>
+ void Forward(const T *p, const T &x1, const T& y1, T *x2, T* y2) const {
+ // Make the centroid of Q1 the origin.
+ const T x1_origin = x1 - q1.Centroid()(0);
+ const T y1_origin = y1 - q1.Centroid()(1);
+
+ // Apply the affine transformation.
+ const T x1_origin_affine = p[2] * x1_origin + p[3] * y1_origin;
+ const T y1_origin_affine = p[4] * x1_origin + p[5] * y1_origin;
+
+ // Translate back into the space of Q1 (but affine transformed).
+ const T x1_affine = x1_origin_affine + q1.Centroid()(0);
+ const T y1_affine = y1_origin_affine + q1.Centroid()(1);
+
+ // Translate into the space of Q2.
+ *x2 = x1_affine + p[0];
+ *y2 = y1_affine + p[1];
+ }
+
+ // Translation x, translation y, rotation about the center of Q1 degrees,
+ // scale.
+ enum { NUM_PARAMETERS = 6 };
+ double parameters[NUM_PARAMETERS];
+
+ Quad q1;
+};
+
+struct HomographyWarp {
+ HomographyWarp(const double *x1, const double *y1,
+ const double *x2, const double *y2) {
+ Mat quad1(2, 4);
+ quad1 << x1[0], x1[1], x1[2], x1[3],
+ y1[0], y1[1], y1[2], y1[3];
+
+ Mat quad2(2, 4);
+ quad2 << x2[0], x2[1], x2[2], x2[3],
+ y2[0], y2[1], y2[2], y2[3];
+
+ Mat3 H;
+ if (!Homography2DFromCorrespondencesLinear(quad1, quad2, &H, 1e-12)) {
+ LG << "Couldn't construct homography.";
+ }
+
+ // Assume H(2, 2) != 0, and fix scale at H(2, 2) == 1.0.
+ H /= H(2, 2);
+
+ // Assume H is close to identity, so subtract out the diagonal.
+ H(0, 0) -= 1.0;
+ H(1, 1) -= 1.0;
+
+ CHECK_NE(H(2, 2), 0.0) << H;
+ for (int i = 0; i < 8; ++i) {
+ parameters[i] = H(i / 3, i % 3);
+ LG << "Parameters[" << i << "]: " << parameters[i];
+ }
+ }
+
+ template<typename T>
+ static void Forward(const T *p,
+ const T &x1, const T& y1, T *x2, T* y2) {
+ // Homography warp with manual 3x3 matrix multiply.
+ const T xx2 = (1.0 + p[0]) * x1 + p[1] * y1 + p[2];
+ const T yy2 = p[3] * x1 + (1.0 + p[4]) * y1 + p[5];
+ const T zz2 = p[6] * x1 + p[7] * y1 + 1.0;
+ *x2 = xx2 / zz2;
+ *y2 = yy2 / zz2;
+ }
+
+ enum { NUM_PARAMETERS = 8 };
+ double parameters[NUM_PARAMETERS];
+};
+
+// Determine the number of samples to use for x and y. Quad winding goes:
+//
+// 0 1
+// 3 2
+//
+// The idea is to take the maximum x or y distance. This may be oversampling.
+// TODO(keir): Investigate the various choices; perhaps average is better?
+void PickSampling(const double *x1, const double *y1,
+ const double *x2, const double *y2,
+ int *num_samples_x, int *num_samples_y) {
+ (void) x2; // Ignored.
+ (void) y2; // Ignored.
+
+ Vec2 a0(x1[0], y1[0]);
+ Vec2 a1(x1[1], y1[1]);
+ Vec2 a2(x1[2], y1[2]);
+ Vec2 a3(x1[3], y1[3]);
+
+ Vec2 b0(x1[0], y1[0]);
+ Vec2 b1(x1[1], y1[1]);
+ Vec2 b2(x1[2], y1[2]);
+ Vec2 b3(x1[3], y1[3]);
+
+ double x_dimensions[4] = {
+ (a1 - a0).norm(),
+ (a3 - a2).norm(),
+ (b1 - b0).norm(),
+ (b3 - b2).norm()
+ };
+
+ double y_dimensions[4] = {
+ (a3 - a0).norm(),
+ (a1 - a2).norm(),
+ (b3 - b0).norm(),
+ (b1 - b2).norm()
+ };
+ const double kScaleFactor = 1.0;
+ *num_samples_x = static_cast<int>(
+ kScaleFactor * *std::max_element(x_dimensions, x_dimensions + 4));
+ *num_samples_y = static_cast<int>(
+ kScaleFactor * *std::max_element(y_dimensions, y_dimensions + 4));
+ LG << "Automatic num_samples_x: " << *num_samples_x
+ << ", num_samples_y: " << *num_samples_y;
+}
+
+bool SearchAreaTooBigForDescent(const FloatImage &image2,
+ const double *x2, const double *y2) {
+ // TODO(keir): Check the bounds and enable only when it makes sense.
+ (void) image2; // Ignored.
+ (void) x2; // Ignored.
+ (void) y2; // Ignored.
+
+ return true;
+}
+
+bool PointOnRightHalfPlane(const Vec2 &a, const Vec2 &b, double x, double y) {
+ Vec2 ba = b - a;
+ return ((Vec2(x, y) - b).transpose() * Vec2(-ba.y(), ba.x())) > 0;
+}
+
+// Determine if a point is in a quad. The quad is arranged as:
+//
+// +-------> x
+// |
+// | a0------a1
+// | | |
+// | | |
+// | | |
+// | a3------a2
+// v
+// y
+//
+// The implementation does up to four half-plane comparisons.
+bool PointInQuad(const double *xs, const double *ys, double x, double y) {
+ Vec2 a0(xs[0], ys[0]);
+ Vec2 a1(xs[1], ys[1]);
+ Vec2 a2(xs[2], ys[2]);
+ Vec2 a3(xs[3], ys[3]);
+
+ return PointOnRightHalfPlane(a0, a1, x, y) &&
+ PointOnRightHalfPlane(a1, a2, x, y) &&
+ PointOnRightHalfPlane(a2, a3, x, y) &&
+ PointOnRightHalfPlane(a3, a0, x, y);
+}
+
+// This makes it possible to map between Eigen float arrays and FloatImage
+// without using comparisons.
+typedef Eigen::Array<float, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> FloatArray;
+
+// This creates a pattern in the frame of image2, from the pixel is image1,
+// based on the initial guess represented by the two quads x1, y1, and x2, y2.
+template<typename Warp>
+void CreateBrutePattern(const double *x1, const double *y1,
+ const double *x2, const double *y2,
+ const FloatImage &image1,
+ const FloatImage *image1_mask,
+ FloatArray *pattern,
+ FloatArray *mask,
+ int *origin_x,
+ int *origin_y) {
+ // Get integer bounding box of quad2 in image2.
+ int min_x = static_cast<int>(floor(*std::min_element(x2, x2 + 4)));
+ int min_y = static_cast<int>(floor(*std::min_element(y2, y2 + 4)));
+ int max_x = static_cast<int>(ceil (*std::max_element(x2, x2 + 4)));
+ int max_y = static_cast<int>(ceil (*std::max_element(y2, y2 + 4)));
+
+ int w = max_x - min_x;
+ int h = max_y - min_y;
+
+ pattern->resize(h, w);
+ mask->resize(h, w);
+
+ Warp inverse_warp(x2, y2, x1, y1);
+
+ // r,c are in the coordinate frame of image2.
+ for (int r = min_y; r < max_y; ++r) {
+ for (int c = min_x; c < max_x; ++c) {
+ // i and j are in the coordinate frame of the pattern in image2.
+ int i = r - min_y;
+ int j = c - min_x;
+
+ double dst_x = c;
+ double dst_y = r;
+ double src_x;
+ double src_y;
+ inverse_warp.Forward(inverse_warp.parameters,
+ dst_x, dst_y,
+ &src_x, &src_y);
+
+ if (PointInQuad(x1, y1, src_x, src_y)) {
+ (*pattern)(i, j) = SampleLinear(image1, src_y, src_x);
+ (*mask)(i, j) = 1.0;
+ if (image1_mask) {
+ (*mask)(i, j) = SampleLinear(*image1_mask, src_y, src_x);
+ }
+ } else {
+ (*pattern)(i, j) = 0.0;
+ (*mask)(i, j) = 0.0;
+ }
+ }
+ }
+ *origin_x = min_x;
+ *origin_y = min_y;
+}
+
+// Compute a translation-only estimate of the warp, using brute force search. A
+// smarter implementation would use the FFT to compute the normalized cross
+// correlation. Instead, this is a dumb implementation. Surprisingly, it is
+// fast enough in practice.
+//
+// Returns true if any alignment was found, and false if the projected pattern
+// is zero sized.
+//
+// TODO(keir): The normalization is less effective for the brute force search
+// than it is with the Ceres solver. It's unclear if this is a bug or due to
+// the original frame being too different from the reprojected reference in the
+// destination frame.
+//
+// The likely solution is to use the previous frame, instead of the original
+// pattern, when doing brute initialization. Unfortunately that implies a
+// totally different warping interface, since access to more than a the source
+// and current destination frame is necessary.
+template<typename Warp>
+bool BruteTranslationOnlyInitialize(const FloatImage &image1,
+ const FloatImage *image1_mask,
+ const FloatImage &image2,
+ const int num_extra_points,
+ const bool use_normalized_intensities,
+ const double *x1, const double *y1,
+ double *x2, double *y2) {
+ // Create the pattern to match in the space of image2, assuming our inital
+ // guess isn't too far from the template in image1. If there is no image1
+ // mask, then the resulting mask is binary.
+ FloatArray pattern;
+ FloatArray mask;
+ int origin_x = -1, origin_y = -1;
+ CreateBrutePattern<Warp>(x1, y1, x2, y2,
+ image1, image1_mask,
+ &pattern, &mask,
+ &origin_x, &origin_y);
+
+ // For normalization, premultiply the pattern by the inverse pattern mean.
+ double mask_sum = 1.0;
+ if (use_normalized_intensities) {
+ mask_sum = mask.sum();
+ double inverse_pattern_mean = mask_sum / ((mask * pattern).sum());
+ pattern *= inverse_pattern_mean;
+ }
+
+ // Use Eigen on the images via maps for strong vectorization.
+ Map<const FloatArray> search(image2.Data(), image2.Height(), image2.Width());
+
+ // Try all possible locations inside the search area. Yes, everywhere.
+ //
+ // TODO(keir): There are a number of possible optimizations here. One choice
+ // is to make a grid and only try one out of every N possible samples.
+ //
+ // Another, slightly more clever idea, is to compute some sort of spatial
+ // frequency distribution of the pattern patch. If the spatial resolution is
+ // high (e.g. a grating pattern or fine lines) then checking every possible
+ // translation is necessary, since a 1-pixel shift may induce a massive
+ // change in the cost function. If the image is a blob or splotch with blurry
+ // edges, then fewer samples are necessary since a few pixels offset won't
+ // change the cost function much.
+ double best_sad = std::numeric_limits<double>::max();
+ int best_r = -1;
+ int best_c = -1;
+ int w = pattern.cols();
+ int h = pattern.rows();
+
+ for (int r = 0; r < (image2.Height() - h); ++r) {
+ for (int c = 0; c < (image2.Width() - w); ++c) {
+ // Compute the weighted sum of absolute differences, Eigen style. Note
+ // that the block from the search image is never stored in a variable, to
+ // avoid copying overhead and permit inlining.
+ double sad;
+ if (use_normalized_intensities) {
+ // TODO(keir): It's really dumb to recompute the search mean for every
+ // shift. A smarter implementation would use summed area tables
+ // instead, reducing the mean calculation to an O(1) operation.
+ double inverse_search_mean =
+ mask_sum / ((mask * search.block(r, c, h, w)).sum());
+ sad = (mask * (pattern - (search.block(r, c, h, w) *
+ inverse_search_mean))).abs().sum();
+ } else {
+ sad = (mask * (pattern - search.block(r, c, h, w))).abs().sum();
+ }
+ if (sad < best_sad) {
+ best_r = r;
+ best_c = c;
+ best_sad = sad;
+ }
+ }
+ }
+
+ // This mean the effective pattern area is zero. This check could go earlier,
+ // but this is less code.
+ if (best_r == -1 || best_c == -1) {
+ return false;
+ }
+
+ LG << "Brute force translation found a shift. "
+ << "best_c: " << best_c << ", best_r: " << best_r << ", "
+ << "origin_x: " << origin_x << ", origin_y: " << origin_y << ", "
+ << "dc: " << (best_c - origin_x) << ", "
+ << "dr: " << (best_r - origin_y)
+ << ", tried " << ((image2.Height() - h) * (image2.Width() - w))
+ << " shifts.";
+
+ // Apply the shift.
+ for (int i = 0; i < 4 + num_extra_points; ++i) {
+ x2[i] += best_c - origin_x;
+ y2[i] += best_r - origin_y;
+ }
+ return true;
+}
+
+void CopyQuad(double *src_x, double *src_y,
+ double *dst_x, double *dst_y,
+ int num_extra_points) {
+ for (int i = 0; i < 4 + num_extra_points; ++i) {
+ dst_x[i] = src_x[i];
+ dst_y[i] = src_y[i];
+ }
+}
+
+} // namespace
+
+template<typename Warp>
+void TemplatedTrackRegion(const FloatImage &image1,
+ const FloatImage &image2,
+ const double *x1, const double *y1,
+ const TrackRegionOptions &options,
+ double *x2, double *y2,
+ TrackRegionResult *result) {
+ for (int i = 0; i < 4 + options.num_extra_points; ++i) {
+ LG << "P" << i << ": (" << x1[i] << ", " << y1[i] << "); guess ("
+ << x2[i] << ", " << y2[i] << "); (dx, dy): (" << (x2[i] - x1[i]) << ", "
+ << (y2[i] - y1[i]) << ").";
+ }
+
+ // Since (x2, y2) contains a prediction for where the tracked point should
+ // go, try a refinement immediately in the hope that the prediction is close
+ // enough.
+ if (options.attempt_refine_before_brute) {
+ TrackRegionOptions modified_options = options;
+ modified_options.use_brute_initialization = false;
+ modified_options.attempt_refine_before_brute = false;
+
+ double x2_first_try[5];
+ double y2_first_try[5];
+ CopyQuad(x2, y2, x2_first_try, y2_first_try, options.num_extra_points);
+
+ TemplatedTrackRegion<Warp>(image1, image2,
+ x1, y1, modified_options,
+ x2_first_try, y2_first_try, result);
+
+ // Of the things that can happen in the first pass, don't try the brute
+ // pass (and second attempt) if the error is one of the terminations below.
+ if (result->termination == TrackRegionResult::CONVERGENCE ||
+ result->termination == TrackRegionResult::SOURCE_OUT_OF_BOUNDS ||
+ result->termination == TrackRegionResult::DESTINATION_OUT_OF_BOUNDS ||
+ result->termination == TrackRegionResult::INSUFFICIENT_PATTERN_AREA) {
+ LG << "Terminated with first try at refinement; no brute needed.";
+ // TODO(keir): Also check correlation?
+ CopyQuad(x2_first_try, y2_first_try, x2, y2, options.num_extra_points);
+ LG << "Early termination correlation: " << result->correlation;
+ return;
+ } else {
+ LG << "Initial eager-refinement failed; retrying normally.";
+ }
+ }
+
+ if (options.use_normalized_intensities) {
+ LG << "Using normalized intensities.";
+ }
+
+ // Bail early if the points are already outside.
+ if (!AllInBounds(image1, x1, y1)) {
+ result->termination = TrackRegionResult::SOURCE_OUT_OF_BOUNDS;
+ return;
+ }
+ if (!AllInBounds(image2, x2, y2)) {
+ result->termination = TrackRegionResult::DESTINATION_OUT_OF_BOUNDS;
+ return;
+ }
+ // TODO(keir): Check quads to ensure there is some area.
+
+ // Keep a copy of the "original" guess for regularization.
+ double x2_original[4];
+ double y2_original[4];
+ for (int i = 0; i < 4; ++i) {
+ x2_original[i] = x2[i];
+ y2_original[i] = y2[i];
+ }
+
+ // Prepare the image and gradient.
+ Array3Df image_and_gradient1;
+ Array3Df image_and_gradient2;
+ BlurredImageAndDerivativesChannels(image1, options.sigma,
+ &image_and_gradient1);
+ BlurredImageAndDerivativesChannels(image2, options.sigma,
+ &image_and_gradient2);
+
+ // Possibly do a brute-force translation-only initialization.
+ if (SearchAreaTooBigForDescent(image2, x2, y2) &&
+ options.use_brute_initialization) {
+ LG << "Running brute initialization...";
+ bool found_any_alignment = BruteTranslationOnlyInitialize<Warp>(
+ image_and_gradient1,
+ options.image1_mask,
+ image2,
+ options.num_extra_points,
+ options.use_normalized_intensities,
+ x1, y1, x2, y2);
+ if (!found_any_alignment) {
+ LG << "Brute failed to find an alignment; pattern too small. "
+ << "Failing entire track operation.";
+ result->termination = TrackRegionResult::INSUFFICIENT_PATTERN_AREA;
+ return;
+ }
+ for (int i = 0; i < 4; ++i) {
+ LG << "P" << i << ": (" << x1[i] << ", " << y1[i] << "); brute ("
+ << x2[i] << ", " << y2[i] << "); (dx, dy): (" << (x2[i] - x1[i])
+ << ", " << (y2[i] - y1[i]) << ").";
+ }
+ }
+
+ // Prepare the initial warp parameters from the four correspondences.
+ // Note: This must happen after the brute initialization runs, since the
+ // brute initialization mutates x2 and y2 in place.
+ Warp warp(x1, y1, x2, y2);
+
+ // Decide how many samples to use in the x and y dimensions.
+ int num_samples_x;
+ int num_samples_y;
+ PickSampling(x1, y1, x2, y2, &num_samples_x, &num_samples_y);
+
+ // Compute the warp from rectangular coordinates.
+ Mat3 canonical_homography = ComputeCanonicalHomography(x1, y1,
+ num_samples_x,
+ num_samples_y);
+
+ ceres::Problem problem;
+
+ // Construct the warp cost function. AutoDiffCostFunction takes ownership.
+ PixelDifferenceCostFunctor<Warp> *pixel_difference_cost_function =
+ new PixelDifferenceCostFunctor<Warp>(options,
+ image_and_gradient1,
+ image_and_gradient2,
+ canonical_homography,
+ num_samples_x,
+ num_samples_y,
+ warp);
+ problem.AddResidualBlock(
+ new ceres::AutoDiffCostFunction<
+ PixelDifferenceCostFunctor<Warp>,
+ ceres::DYNAMIC,
+ Warp::NUM_PARAMETERS>(pixel_difference_cost_function,
+ num_samples_x * num_samples_y),
+ NULL,
+ warp.parameters);
+
+ // Construct the regularizing cost function
+ if (options.regularization_coefficient != 0.0) {
+ WarpRegularizingCostFunctor<Warp> *regularizing_warp_cost_function =
+ new WarpRegularizingCostFunctor<Warp>(options,
+ x1, y2,
+ x2_original,
+ y2_original,
+ warp);
+
+ problem.AddResidualBlock(
+ new ceres::AutoDiffCostFunction<
+ WarpRegularizingCostFunctor<Warp>,
+ 8 /* num_residuals */,
+ Warp::NUM_PARAMETERS>(regularizing_warp_cost_function),
+ NULL,
+ warp.parameters);
+ }
+
+ // Configure the solve.
+ ceres::Solver::Options solver_options;
+ solver_options.linear_solver_type = ceres::DENSE_QR;
+ solver_options.max_num_iterations = options.max_iterations;
+ solver_options.update_state_every_iteration = true;
+ solver_options.parameter_tolerance = 1e-16;
+ solver_options.function_tolerance = 1e-16;
+
+ // Prevent the corners from going outside the destination image and
+ // terminate if the optimizer is making tiny moves (converged).
+ TerminationCheckingCallback<Warp> callback(options, image2, warp, x1, y1);
+ solver_options.callbacks.push_back(&callback);
+
+ // Run the solve.
+ ceres::Solver::Summary summary;
+ ceres::Solve(solver_options, &problem, &summary);
+
+ LG << "Summary:\n" << summary.FullReport();
+
+ // Update the four points with the found solution; if the solver failed, then
+ // the warp parameters are the identity (so ignore failure).
+ //
+ // Also warp any extra points on the end of the array.
+ for (int i = 0; i < 4 + options.num_extra_points; ++i) {
+ warp.Forward(warp.parameters, x1[i], y1[i], x2 + i, y2 + i);
+ LG << "Warped point " << i << ": (" << x1[i] << ", " << y1[i] << ") -> ("
+ << x2[i] << ", " << y2[i] << "); (dx, dy): (" << (x2[i] - x1[i]) << ", "
+ << (y2[i] - y1[i]) << ").";
+ }
+
+ // TODO(keir): Update the result statistics.
+ // TODO(keir): Add a normalize-cross-correlation variant.
+
+ if (summary.termination_type == ceres::USER_FAILURE) {
+ result->termination = TrackRegionResult::FELL_OUT_OF_BOUNDS;
+ return;
+ }
+
+#define HANDLE_TERMINATION(termination_enum) \
+ if (summary.termination_type == ceres::termination_enum) { \
+ result->termination = TrackRegionResult::termination_enum; \
+ return; \
+ }
+
+ // Avoid computing correlation for tracking failures.
+ HANDLE_TERMINATION(FAILURE);
+
+ // Otherwise, run a final correlation check.
+ if (options.minimum_correlation > 0.0) {
+ result->correlation = pixel_difference_cost_function->
+ PearsonProductMomentCorrelationCoefficient(warp.parameters);
+ if (result->correlation < options.minimum_correlation) {
+ LG << "Failing with insufficient correlation.";
+ result->termination = TrackRegionResult::INSUFFICIENT_CORRELATION;
+ return;
+ }
+ }
+
+ // This happens when the minimum corner shift tolerance is reached. Due to
+ // how the tolerance is computed this can't be done by Ceres. So return the
+ // same termination enum as Ceres, even though this is slightly different
+ // than Ceres's parameter tolerance, which operates on the raw parameter
+ // values rather than the pixel shifts of the patch corners.
+ if (summary.termination_type == ceres::USER_SUCCESS) {
+ result->termination = TrackRegionResult::CONVERGENCE;
+ return;
+ }
+
+ HANDLE_TERMINATION(CONVERGENCE);
+ HANDLE_TERMINATION(NO_CONVERGENCE);
+#undef HANDLE_TERMINATION
+};
+
+void TrackRegion(const FloatImage &image1,
+ const FloatImage &image2,
+ const double *x1, const double *y1,
+ const TrackRegionOptions &options,
+ double *x2, double *y2,
+ TrackRegionResult *result) {
+ // Enum is necessary due to templated nature of autodiff.
+#define HANDLE_MODE(mode_enum, mode_type) \
+ if (options.mode == TrackRegionOptions::mode_enum) { \
+ TemplatedTrackRegion<mode_type>(image1, image2, \
+ x1, y1, \
+ options, \
+ x2, y2, \
+ result); \
+ return; \
+ }
+ HANDLE_MODE(TRANSLATION, TranslationWarp);
+ HANDLE_MODE(TRANSLATION_SCALE, TranslationScaleWarp);
+ HANDLE_MODE(TRANSLATION_ROTATION, TranslationRotationWarp);
+ HANDLE_MODE(TRANSLATION_ROTATION_SCALE, TranslationRotationScaleWarp);
+ HANDLE_MODE(AFFINE, AffineWarp);
+ HANDLE_MODE(HOMOGRAPHY, HomographyWarp);
+#undef HANDLE_MODE
+}
+
+bool SamplePlanarPatch(const FloatImage &image,
+ const double *xs, const double *ys,
+ int num_samples_x, int num_samples_y,
+ FloatImage *mask, FloatImage *patch,
+ double *warped_position_x, double *warped_position_y) {
+ // Bail early if the points are outside the image.
+ if (!AllInBounds(image, xs, ys)) {
+ LG << "Can't sample patch: out of bounds.";
+ return false;
+ }
+
+ // Make the patch have the appropriate size, and match the depth of image.
+ patch->Resize(num_samples_y, num_samples_x, image.Depth());
+
+ // Compute the warp from rectangular coordinates.
+ Mat3 canonical_homography = ComputeCanonicalHomography(xs, ys,
+ num_samples_x,
+ num_samples_y);
+
+ // Walk over the coordinates in the canonical space, sampling from the image
+ // in the original space and copying the result into the patch.
+ for (int r = 0; r < num_samples_y; ++r) {
+ for (int c = 0; c < num_samples_x; ++c) {
+ Vec3 image_position = canonical_homography * Vec3(c, r, 1);
+ image_position /= image_position(2);
+ SampleLinear(image, image_position(1),
+ image_position(0),
+ &(*patch)(r, c, 0));
+ if (mask) {
+ float mask_value = SampleLinear(*mask, image_position(1),
+ image_position(0), 0);
+
+ for (int d = 0; d < image.Depth(); d++)
+ (*patch)(r, c, d) *= mask_value;
+ }
+ }
+ }
+
+ Vec3 warped_position = canonical_homography.inverse() * Vec3(xs[4], ys[4], 1);
+ warped_position /= warped_position(2);
+
+ *warped_position_x = warped_position(0);
+ *warped_position_y = warped_position(1);
+
+ return true;
+}
+
+} // namespace libmv