1 files changed, 450 insertions, 0 deletions
diff --git a/intern/libmv/libmv/simple_pipeline/keyframe_selection.cc b/intern/libmv/libmv/simple_pipeline/keyframe_selection.cc
new file mode 100644
index 00000000000..241b5600505
--- /dev/null
+++ b/intern/libmv/libmv/simple_pipeline/keyframe_selection.cc
@@ -0,0 +1,450 @@
+// Copyright (c) 2012 libmv authors.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to
+// deal in the Software without restriction, including without limitation the
+// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+// sell copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+// IN THE SOFTWARE.
+
+#include "libmv/simple_pipeline/keyframe_selection.h"
+
+#include "libmv/numeric/numeric.h"
+#include "ceres/ceres.h"
+#include "libmv/logging/logging.h"
+#include "libmv/multiview/homography.h"
+#include "libmv/multiview/fundamental.h"
+#include "libmv/simple_pipeline/intersect.h"
+#include "libmv/simple_pipeline/bundle.h"
+
+#include <Eigen/Eigenvalues>
+
+namespace libmv {
+namespace {
+
+Mat3 IntrinsicsNormalizationMatrix(const CameraIntrinsics &intrinsics) {
+  Mat3 T = Mat3::Identity(), S = Mat3::Identity();
+
+  T(0, 2) = -intrinsics.principal_point_x();
+  T(1, 2) = -intrinsics.principal_point_y();
+
+  S(0, 0) /= intrinsics.focal_length_x();
+  S(1, 1) /= intrinsics.focal_length_y();
+
+  return S * T;
+}
+
+// P.H.S. Torr
+// Geometric Motion Segmentation and Model Selection
+//
+// http://reference.kfupm.edu.sa/content/g/e/geometric_motion_segmentation_and_model__126445.pdf
+//
+// d is the number of dimensions modeled
+//     (d = 3 for a fundamental matrix or 2 for a homography)
+// k is the number of degrees of freedom in the model
+//     (k = 7 for a fundamental matrix or 8 for a homography)
+// r is the dimension of the data
+//     (r = 4 for 2D correspondences between two frames)
+double GRIC(const Vec &e, int d, int k, int r) {
+  int n = e.rows();
+  double lambda1 = log(static_cast<double>(r));
+  double lambda2 = log(static_cast<double>(r * n));
+
+  // lambda3 limits the residual error, and this paper
+  // http://elvera.nue.tu-berlin.de/files/0990Knorr2006.pdf
+  // suggests using lambda3 of 2
+  // same value is used in Torr's Problem of degeneracy in structure
+  // and motion recovery from uncalibrated image sequences
+  // http://www.robots.ox.ac.uk/~vgg/publications/papers/torr99.ps.gz
+  double lambda3 = 2.0;
+
+  // Variance of tracker position. Physically, this is typically about 0.1px,
+  // and when squared becomes 0.01 px^2.
+  double sigma2 = 0.01;
+
+  // Finally, calculate the GRIC score.
+  double gric = 0.0;
+  for (int i = 0; i < n; i++) {
+    gric += std::min(e(i) * e(i) / sigma2, lambda3 * (r - d));
+  }
+  gric += lambda1 * d * n;
+  gric += lambda2 * k;
+  return gric;
+}
+
+// Compute a generalized inverse using eigen value decomposition, clamping the
+// smallest eigenvalues if requested. This is needed to compute the variance of
+// reconstructed 3D points.
+//
+// TODO(keir): Consider moving this into the numeric code, since this is not
+// related to keyframe selection.
+Mat PseudoInverseWithClampedEigenvalues(const Mat &matrix,
+                                        int num_eigenvalues_to_clamp) {
+  Eigen::EigenSolver<Mat> eigen_solver(matrix);
+  Mat D = eigen_solver.pseudoEigenvalueMatrix();
+  Mat V = eigen_solver.pseudoEigenvectors();
+
+  // Clamp too-small singular values to zero to prevent numeric blowup.
+  double epsilon = std::numeric_limits<double>::epsilon();
+  for (int i = 0; i < D.cols(); ++i) {
+    if (D(i, i) > epsilon) {
+      D(i, i) = 1.0 / D(i, i);
+    } else {
+      D(i, i) = 0.0;
+    }
+  }
+
+  // Apply the clamp.
+  for (int i = D.cols() - num_eigenvalues_to_clamp; i < D.cols(); ++i) {
+    D(i, i) = 0.0;
+  }
+  return V * D * V.inverse();
+}
+
+void FilterZeroWeightMarkersFromTracks(const Tracks &tracks,
+                                       Tracks *filtered_tracks) {
+  vector<Marker> all_markers = tracks.AllMarkers();
+
+  for (int i = 0; i < all_markers.size(); ++i) {
+    Marker &marker = all_markers[i];
+    if (marker.weight != 0.0) {
+      filtered_tracks->Insert(marker.image,
+                              marker.track,
+                              marker.x,
+                              marker.y,
+                              marker.weight);
+    }
+  }
+}
+
+}  // namespace
+
+void SelectKeyframesBasedOnGRICAndVariance(const Tracks &_tracks,
+                                           const CameraIntrinsics &intrinsics,
+                                           vector<int> &keyframes) {
+  // Mirza Tahir Ahmed, Matthew N. Dailey
+  // Robust key frame extraction for 3D reconstruction from video streams
+  //
+  // http://www.cs.ait.ac.th/~mdailey/papers/Tahir-KeyFrame.pdf
+
+  Tracks filtered_tracks;
+  FilterZeroWeightMarkersFromTracks(_tracks, &filtered_tracks);
+
+  int max_image = filtered_tracks.MaxImage();
+  int next_keyframe = 1;
+  int number_keyframes = 0;
+
+  // Limit correspondence ratio from both sides.
+  // On the one hand if number of correspondent features is too low,
+  // triangulation will suffer.
+  // On the other hand high correspondence likely means short baseline.
+  // which also will affect om accuracy
+  const double Tmin = 0.8;
+  const double Tmax = 1.0;
+
+  Mat3 N = IntrinsicsNormalizationMatrix(intrinsics);
+  Mat3 N_inverse = N.inverse();
+
+  double Sc_best = std::numeric_limits<double>::max();
+  double success_intersects_factor_best = 0.0f;
+
+  while (next_keyframe != -1) {
+    int current_keyframe = next_keyframe;
+    double Sc_best_candidate = std::numeric_limits<double>::max();
+
+    LG << "Found keyframe " << next_keyframe;
+
+    number_keyframes++;
+    next_keyframe = -1;
+
+    for (int candidate_image = current_keyframe + 1;
+         candidate_image <= max_image;
+         candidate_image++) {
+      // Conjunction of all markers from both keyframes
+      vector<Marker> all_markers =
+        filtered_tracks.MarkersInBothImages(current_keyframe,
+                                            candidate_image);
+
+      // Match keypoints between frames current_keyframe and candidate_image
+      vector<Marker> tracked_markers =
+        filtered_tracks.MarkersForTracksInBothImages(current_keyframe,
+                                                     candidate_image);
+
+      // Correspondences in normalized space
+      Mat x1, x2;
+      CoordinatesForMarkersInImage(tracked_markers, current_keyframe, &x1);
+      CoordinatesForMarkersInImage(tracked_markers, candidate_image, &x2);
+
+      LG << "Found " << x1.cols()
+         << " correspondences between " << current_keyframe
+         << " and " << candidate_image;
+
+      // Not enough points to construct fundamental matrix
+      if (x1.cols() < 8 || x2.cols() < 8)
+        continue;
+
+      // STEP 1: Correspondence ratio constraint
+      int Tc = tracked_markers.size();
+      int Tf = all_markers.size();
+      double Rc = static_cast<double>(Tc) / Tf;
+
+      LG << "Correspondence between " << current_keyframe
+         << " and " << candidate_image
+         << ": " << Rc;
+
+      if (Rc < Tmin || Rc > Tmax)
+        continue;
+
+      Mat3 H, F;
+
+      // Estimate homography using default options.
+      EstimateHomographyOptions estimate_homography_options;
+      EstimateHomography2DFromCorrespondences(x1,
+                                              x2,
+                                              estimate_homography_options,
+                                              &H);
+
+      // Convert homography to original pixel space.
+      H = N_inverse * H * N;
+
+      EstimateFundamentalOptions estimate_fundamental_options;
+      EstimateFundamentalFromCorrespondences(x1,
+                                             x2,
+                                             estimate_fundamental_options,
+                                             &F);
+
+      // Convert fundamental to original pixel space.
+      F = N_inverse * F * N;
+
+      // TODO(sergey): STEP 2: Discard outlier matches
+
+      // STEP 3: Geometric Robust Information Criteria
+
+      // Compute error values for homography and fundamental matrices
+      Vec H_e, F_e;
+      H_e.resize(x1.cols());
+      F_e.resize(x1.cols());
+      for (int i = 0; i < x1.cols(); i++) {
+        Vec2 current_x1, current_x2;
+
+        intrinsics.NormalizedToImageSpace(x1(0, i), x1(1, i),
+                                          &current_x1(0), &current_x1(1));
+
+        intrinsics.NormalizedToImageSpace(x2(0, i), x2(1, i),
+                                          &current_x2(0), &current_x2(1));
+
+        H_e(i) = SymmetricGeometricDistance(H, current_x1, current_x2);
+        F_e(i) = SymmetricEpipolarDistance(F, current_x1, current_x2);
+      }
+
+      LG << "H_e: " << H_e.transpose();
+      LG << "F_e: " << F_e.transpose();
+
+      // Degeneracy constraint
+      double GRIC_H = GRIC(H_e, 2, 8, 4);
+      double GRIC_F = GRIC(F_e, 3, 7, 4);
+
+      LG << "GRIC values for frames " << current_keyframe
+         << " and " << candidate_image
+         << ", H-GRIC: " << GRIC_H
+         << ", F-GRIC: " << GRIC_F;
+
+      if (GRIC_H <= GRIC_F)
+        continue;
+
+      // TODO(sergey): STEP 4: PELC criterion
+
+      // STEP 5: Estimation of reconstruction error
+      //
+      // Uses paper Keyframe Selection for Camera Motion and Structure
+      // Estimation from Multiple Views
+      // Uses ftp://ftp.tnt.uni-hannover.de/pub/papers/2004/ECCV2004-TTHBAW.pdf
+      // Basically, equation (15)
+      //
+      // TODO(sergey): separate all the constraints into functions,
+      //               this one is getting to much cluttered already
+
+      // Definitions in equation (15):
+      // - I is the number of 3D feature points
+      // - A is the number of essential parameters of one camera
+
+      EuclideanReconstruction reconstruction;
+
+      // The F matrix should be an E matrix, but squash it just to be sure
+
+      // Reconstruction should happen using normalized fundamental matrix
+      Mat3 F_normal = N * F * N_inverse;
+
+      Mat3 E;
+      FundamentalToEssential(F_normal, &E);
+
+      // Recover motion between the two images. Since this function assumes a
+      // calibrated camera, use the identity for K
+      Mat3 R;
+      Vec3 t;
+      Mat3 K = Mat3::Identity();
+
+      if (!MotionFromEssentialAndCorrespondence(E,
+                                                K, x1.col(0),
+                                                K, x2.col(0),
+                                                &R, &t)) {
+        LG << "Failed to compute R and t from E and K";
+        continue;
+      }
+
+      LG << "Camera transform between frames " << current_keyframe
+         << " and " << candidate_image
+         << ":\nR:\n" << R
+         << "\nt:" << t.transpose();
+
+      // First camera is identity, second one is relative to it
+      reconstruction.InsertCamera(current_keyframe,
+                                  Mat3::Identity(),
+                                  Vec3::Zero());
+      reconstruction.InsertCamera(candidate_image, R, t);
+
+      // Reconstruct 3D points
+      int intersects_total = 0, intersects_success = 0;
+      for (int i = 0; i < tracked_markers.size(); i++) {
+        if (!reconstruction.PointForTrack(tracked_markers[i].track)) {
+          vector<Marker> reconstructed_markers;
+
+          int track = tracked_markers[i].track;
+
+          reconstructed_markers.push_back(tracked_markers[i]);
+
+          // We know there're always only two markers for a track
+          // Also, we're using brute-force search because we don't
+          // actually know about markers layout in a list, but
+          // at this moment this cycle will run just once, which
+          // is not so big deal
+
+          for (int j = i + 1; j < tracked_markers.size(); j++) {
+            if (tracked_markers[j].track == track) {
+              reconstructed_markers.push_back(tracked_markers[j]);
+              break;
+            }
+          }
+
+          intersects_total++;
+
+          if (EuclideanIntersect(reconstructed_markers, &reconstruction)) {
+            LG << "Ran Intersect() for track " << track;
+            intersects_success++;
+          } else {
+            LG << "Filed to intersect track " << track;
+          }
+        }
+      }
+
+      double success_intersects_factor =
+          (double) intersects_success / intersects_total;
+
+      if (success_intersects_factor < success_intersects_factor_best) {
+        LG << "Skip keyframe candidate because of "
+              "lower successful intersections ratio";
+
+        continue;
+      }
+
+      success_intersects_factor_best = success_intersects_factor;
+
+      Tracks two_frames_tracks(tracked_markers);
+      PolynomialCameraIntrinsics empty_intrinsics;
+      BundleEvaluation evaluation;
+      evaluation.evaluate_jacobian = true;
+
+      EuclideanBundleCommonIntrinsics(two_frames_tracks,
+                                      BUNDLE_NO_INTRINSICS,
+                                      BUNDLE_NO_CONSTRAINTS,
+                                      &reconstruction,
+                                      &empty_intrinsics,
+                                      &evaluation);
+
+      Mat &jacobian = evaluation.jacobian;
+
+      Mat JT_J = jacobian.transpose() * jacobian;
+      // There are 7 degrees of freedom, so clamp them out.
+      Mat JT_J_inv = PseudoInverseWithClampedEigenvalues(JT_J, 7);
+
+      Mat temp_derived = JT_J * JT_J_inv * JT_J;
+      bool is_inversed = (temp_derived - JT_J).cwiseAbs2().sum() <
+          1e-4 * std::min(temp_derived.cwiseAbs2().sum(),
+                          JT_J.cwiseAbs2().sum());
+
+      LG << "Check on inversed: " << (is_inversed ? "true" : "false" )
+         << ", det(JT_J): " << JT_J.determinant();
+
+      if (!is_inversed) {
+        LG << "Ignoring candidature due to poor jacobian stability";
+        continue;
+      }
+
+      Mat Sigma_P;
+      Sigma_P = JT_J_inv.bottomRightCorner(evaluation.num_points * 3,
+                                           evaluation.num_points * 3);
+
+      int I = evaluation.num_points;
+      int A = 12;
+
+      double Sc = static_cast<double>(I + A) / Square(3 * I) * Sigma_P.trace();
+
+      LG << "Expected estimation error between "
+         << current_keyframe << " and "
+         << candidate_image << ": " << Sc;
+
+      // Pairing with a lower Sc indicates a better choice
+      if (Sc > Sc_best_candidate)
+        continue;
+
+      Sc_best_candidate = Sc;
+
+      next_keyframe = candidate_image;
+    }
+
+    // This is a bit arbitrary and main reason of having this is to deal
+    // better with situations when there's no keyframes were found for
+    // current keyframe this could happen when there's no so much parallax
+    // in the beginning of image sequence and then most of features are
+    // getting occluded. In this case there could be good keyframe pair in
+    // the middle of the sequence
+    //
+    // However, it's just quick hack and smarter way to do this would be nice
+    if (next_keyframe == -1) {
+      next_keyframe = current_keyframe + 10;
+      number_keyframes = 0;
+
+      if (next_keyframe >= max_image)
+        break;
+
+      LG << "Starting searching for keyframes starting from " << next_keyframe;
+    } else {
+      // New pair's expected reconstruction error is lower
+      // than existing pair's one.
+      //
+      // For now let's store just one candidate, easy to
+      // store more candidates but needs some thoughts
+      // how to choose best one automatically from them
+      // (or allow user to choose pair manually).
+      if (Sc_best > Sc_best_candidate) {
+        keyframes.clear();
+        keyframes.push_back(current_keyframe);
+        keyframes.push_back(next_keyframe);
+        Sc_best = Sc_best_candidate;
+      }
+    }
+  }
+}
+
+}  // namespace libmv