From b16e924251f10d878c5c056aed0c4133d3e00c82 Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Tue, 30 Sep 2014 00:39:45 +0600 Subject: Libmv: Update Ceres to the latest upstream version Mainly to let ITERATIVE_SCHUR use an explicit Schur Complement matrix. --- extern/libmv/third_party/ceres/ChangeLog | 193 +++++++++++---------- .../libmv/third_party/ceres/include/ceres/solver.h | 24 +++ .../internal/ceres/block_jacobi_preconditioner.cc | 107 +++++------- .../internal/ceres/block_jacobi_preconditioner.h | 17 +- .../ceres/block_random_access_diagonal_matrix.cc | 40 ++++- .../ceres/block_random_access_diagonal_matrix.h | 9 +- .../ceres/block_random_access_sparse_matrix.cc | 40 ++++- .../ceres/block_random_access_sparse_matrix.h | 18 +- .../third_party/ceres/internal/ceres/callbacks.cc | 2 +- .../ceres/internal/ceres/linear_solver.cc | 6 +- .../ceres/internal/ceres/linear_solver.h | 4 +- .../internal/ceres/schur_complement_solver.cc | 158 ++++++++++++++++- .../ceres/internal/ceres/schur_complement_solver.h | 11 ++ .../internal/ceres/schur_jacobi_preconditioner.cc | 30 +--- .../internal/ceres/schur_jacobi_preconditioner.h | 4 - .../third_party/ceres/internal/ceres/solver.cc | 8 + .../internal/ceres/trust_region_preprocessor.cc | 2 + 17 files changed, 455 insertions(+), 218 deletions(-) (limited to 'extern') diff --git a/extern/libmv/third_party/ceres/ChangeLog b/extern/libmv/third_party/ceres/ChangeLog index cd168a44b35..ee67da5afb9 100644 --- a/extern/libmv/third_party/ceres/ChangeLog +++ b/extern/libmv/third_party/ceres/ChangeLog @@ -1,3 +1,106 @@ +commit b44cfdef25f6bf0917a23b3fd65cce38aa6a3362 +Author: Sameer Agarwal +Date: Mon Sep 29 07:53:54 2014 -0700 + + Let ITERATIVE_SCHUR use an explicit Schur Complement matrix. + + Up till now ITERATIVE_SCHUR evaluates matrix-vector products + between the Schur complement and a vector implicitly by exploiting + the algebraic expression for the Schur complement. + + This cost of this evaluation scales with the number of non-zeros + in the Jacobian. + + For small to medium sized problems there is a sweet spot where + computing the Schur complement is cheap enough that it is much + more efficient to explicitly compute it and use it for evaluating + the matrix-vector products. + + This changes implements support for an explicit Schur complement + in ITERATIVE_SCHUR in combination with the SCHUR_JACOBI preconditioner. + + API wise a new bool Solver::Options::use_explicit_schur_complement + has been added. + + The implementation extends the SparseSchurComplementSolver to use + Conjugate Gradients. + + Example speedup: + + use_explicit_schur_complement = false + + Time (in seconds): + Preprocessor 0.585 + + Residual evaluation 0.319 + Jacobian evaluation 1.590 + Linear solver 25.685 + Minimizer 27.990 + + Postprocessor 0.010 + Total 28.585 + + use_explicit_schur_complement = true + + Time (in seconds): + Preprocessor 0.638 + + Residual evaluation 0.318 + Jacobian evaluation 1.507 + Linear solver 5.930 + Minimizer 8.144 + + Postprocessor 0.010 + Total 8.791 + + Which indicates an end-to-end speedup of more than 3x, with the linear + solver being sped up by > 4x. + + The idea to explore this optimization was inspired by the recent paper: + + Mining structure fragments for smart bundle adjustment + L. Carlone, P. Alcantarilla, H. Chiu, K. Zsolt, F. Dellaert + British Machine Vision Conference, 2014 + + which uses a more complicated algorithm to compute parts of the + Schur complement to speed up the matrix-vector product. + + Change-Id: I95324af0ab351faa1600f5204039a1d2a64ae61d + +commit 4ad91490827f2ebebcc70d17e63ef653bf06fd0d +Author: Sameer Agarwal +Date: Wed Sep 24 23:54:18 2014 -0700 + + Simplify the Block Jacobi and Schur Jacobi preconditioners. + + 1. Extend the implementation of BlockRandomAccessDiagonalMatrix + by adding Invert and RightMultiply methods. + + 2. Simplify the implementation of the Schur Jacobi preconditioner + using these new methods. + + 3. Replace the custom storage used inside Block Jacobi preconditioner + with BlockRandomAccessDiagonalMatrix and simplify its implementation + too. + + Change-Id: I9d4888b35f0f228c08244abbdda5298b3ce9c466 + +commit 8f7be1036b853addc33224d97b92412b5a1281b6 +Author: Sameer Agarwal +Date: Mon Sep 29 08:13:35 2014 -0700 + + Fix a formatting error TrustRegionMinimizer logging. + + Change-Id: Iad1873c51eece46c3fdee1356d154367cfd7925e + +commit c99872d48e322662ea19efb9010a62b7432687ae +Author: Sameer Agarwal +Date: Wed Sep 24 21:30:02 2014 -0700 + + Add BlockRandomAccessSparseMatrix::SymmetricRightMultiply. + + Change-Id: Ib06a22a209b4c985ba218162dfb6bf46bd93169e + commit d3ecd18625ba260e0d00912a305a448b566acc59 Author: Sameer Agarwal Date: Tue Sep 23 10:12:42 2014 -0700 @@ -560,93 +663,3 @@ Date: Mon Aug 4 22:45:53 2014 -0700 Small changes from Jim Roseborough. Change-Id: Ic8b19ea5c5f4f8fd782eb4420b30514153087d18 - -commit a521fc3afc11425b46992388a83ef07017d02ac9 -Author: Sameer Agarwal -Date: Fri Aug 1 08:27:35 2014 -0700 - - Simplify, cleanup and instrument SchurComplementSolver. - - The instrumentation revealed that EIGEN_SPARSE can be upto - an order of magnitude slower than CX_SPARSE on some bundle - adjustment problems. - - The problem comes down to the quality of AMD ordering that - CXSparse/Eigen implements. It does particularly badly - on the Schur complement. In the CXSparse implementation - we got around this by considering the block sparsity structure - and computing the AMD ordering on it and lifting it to the - full matrix. - - This is currently not possible with the release version of - Eigen, as the support for using preordered/natural orderings - is in the master branch but has not been released yet. - - Change-Id: I25588d3e723e50606f327db5759f174f58439e29 - -commit b43e73a03485f0fd0fe514e356ad8925731d3a81 -Author: Sameer Agarwal -Date: Fri Aug 1 12:09:09 2014 -0700 - - Simplify the Eigen code in SparseNormalCholeskySolver. - - Simplifying some of the template handling, and remove the use - of SelfAdjointView as it is not needed. The solver itself takes - an argument for where the data is actually stored. - - The performance of SparseNormalCholesky with EIGEN_SPARSE - seems to be on par with CX_SPARSE. - - Change-Id: I69e22a144b447c052b6cbe59ef1aa33eae2dd9e3 - -commit 031598295c6b2f061c171b9b2338919f41b7eb0b -Author: Sameer Agarwal -Date: Thu Jul 17 14:35:18 2014 -0700 - - Enable Eigen as sparse linear algebra library. - - SPARSE_NORMAL_CHOLESKY and SPARSE_SCHUR can now be used - with EIGEN_SPARSE as the backend. - - The performance is not as good as CXSparse. This needs to be - investigated. Is it because the quality of AMD ordering that - we are computing is not as good as the one for CXSparse? This - could be because we are working with the scalar matrix instead - of the block matrix. - - Also, the upper/lower triangular story is not completely clear. - Both of these issues will be benchmarked and tackled in the - near future. - - Also included in this change is a bunch of cleanup to the - SparseNormalCholeskySolver and SparseSchurComplementSolver - classes around the use of the of defines used to conditionally - compile out parts of the code. - - The system_test has been updated to test EIGEN_SPARSE also. - - Change-Id: I46a57e9c4c97782696879e0b15cfc7a93fe5496a - -commit 1b17145adf6aa0072db2989ad799e90313970ab3 -Author: Sameer Agarwal -Date: Wed Jul 30 10:14:15 2014 -0700 - - Make canned loss functions more robust. - - The loss functions that ship with ceres can sometimes - generate a zero first derivative if the residual is too - large. - - In such cases Corrector fails with an ugly undebuggable - crash. This CL is the first in a series of fixes to - take care of this. - - We clamp the values of rho' from below by - numeric_limits::min(). - - Also included here is some minor cleanup where the constants - are treated as doubles rather than integers. - - Thanks to Pierre Moulon for reporting this problem. - - Change-Id: I3aaf375303ecc2659bbf6fb56a812e7dc3a41106 diff --git a/extern/libmv/third_party/ceres/include/ceres/solver.h b/extern/libmv/third_party/ceres/include/ceres/solver.h index 0af34cacef2..a5efa2a3915 100644 --- a/extern/libmv/third_party/ceres/include/ceres/solver.h +++ b/extern/libmv/third_party/ceres/include/ceres/solver.h @@ -118,6 +118,7 @@ class CERES_EXPORT Solver { #endif num_linear_solver_threads = 1; + use_explicit_schur_complement = false; use_postordering = false; dynamic_sparsity = false; min_linear_solver_iterations = 0; @@ -496,6 +497,29 @@ class CERES_EXPORT Solver { // smaller than the group containing cameras. shared_ptr linear_solver_ordering; + // Use an explicitly computed Schur complement matrix with + // ITERATIVE_SCHUR. + // + // By default this option is disabled and ITERATIVE_SCHUR + // evaluates evaluates matrix-vector products between the Schur + // complement and a vector implicitly by exploiting the algebraic + // expression for the Schur complement. + // + // The cost of this evaluation scales with the number of non-zeros + // in the Jacobian. + // + // For small to medium sized problems there is a sweet spot where + // computing the Schur complement is cheap enough that it is much + // more efficient to explicitly compute it and use it for evaluating + // the matrix-vector products. + // + // Enabling this option tells ITERATIVE_SCHUR to use an explicitly + // computed Schur complement. + // + // NOTE: This option can only be used with the SCHUR_JACOBI + // preconditioner. + bool use_explicit_schur_complement; + // Sparse Cholesky factorization algorithms use a fill-reducing // ordering to permute the columns of the Jacobian matrix. There // are two ways of doing this. diff --git a/extern/libmv/third_party/ceres/internal/ceres/block_jacobi_preconditioner.cc b/extern/libmv/third_party/ceres/internal/ceres/block_jacobi_preconditioner.cc index 19b749bfc39..ea49f077e37 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/block_jacobi_preconditioner.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/block_jacobi_preconditioner.cc @@ -30,9 +30,9 @@ #include "ceres/block_jacobi_preconditioner.h" -#include "Eigen/Cholesky" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" +#include "ceres/block_random_access_diagonal_matrix.h" #include "ceres/casts.h" #include "ceres/integral_types.h" #include "ceres/internal/eigen.h" @@ -41,27 +41,14 @@ namespace ceres { namespace internal { BlockJacobiPreconditioner::BlockJacobiPreconditioner( - const BlockSparseMatrix& A) - : num_rows_(A.num_rows()), - block_structure_(*A.block_structure()) { - // Calculate the amount of storage needed. - int storage_needed = 0; - for (int c = 0; c < block_structure_.cols.size(); ++c) { - int size = block_structure_.cols[c].size; - storage_needed += size * size; + const BlockSparseMatrix& A) { + const CompressedRowBlockStructure* bs = A.block_structure(); + vector blocks(bs->cols.size()); + for (int i = 0; i < blocks.size(); ++i) { + blocks[i] = bs->cols[i].size; } - // Size the offsets and storage. - blocks_.resize(block_structure_.cols.size()); - block_storage_.resize(storage_needed); - - // Put pointers to the storage in the offsets. - double* block_cursor = &block_storage_[0]; - for (int c = 0; c < block_structure_.cols.size(); ++c) { - int size = block_structure_.cols[c].size; - blocks_[c] = block_cursor; - block_cursor += size * size; - } + m_.reset(new BlockRandomAccessDiagonalMatrix(blocks)); } BlockJacobiPreconditioner::~BlockJacobiPreconditioner() {} @@ -69,70 +56,54 @@ BlockJacobiPreconditioner::~BlockJacobiPreconditioner() {} bool BlockJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A, const double* D) { const CompressedRowBlockStructure* bs = A.block_structure(); - - // Compute the diagonal blocks by block inner products. - std::fill(block_storage_.begin(), block_storage_.end(), 0.0); const double* values = A.values(); - for (int r = 0; r < bs->rows.size(); ++r) { - const int row_block_size = bs->rows[r].block.size; - const vector& cells = bs->rows[r].cells; - for (int c = 0; c < cells.size(); ++c) { - const int col_block_size = bs->cols[cells[c].block_id].size; - ConstMatrixRef m(values + cells[c].position, + m_->SetZero(); + for (int i = 0; i < bs->rows.size(); ++i) { + const int row_block_size = bs->rows[i].block.size; + const vector& cells = bs->rows[i].cells; + for (int j = 0; j < cells.size(); ++j) { + const int block_id = cells[j].block_id; + const int col_block_size = bs->cols[block_id].size; + + int r, c, row_stride, col_stride; + CellInfo* cell_info = m_->GetCell(block_id, block_id, + &r, &c, + &row_stride, &col_stride); + MatrixRef m(cell_info->values, row_stride, col_stride); + ConstMatrixRef b(values + cells[j].position, row_block_size, col_block_size); - - MatrixRef(blocks_[cells[c].block_id], - col_block_size, - col_block_size).noalias() += m.transpose() * m; - - // TODO(keir): Figure out when the below expression is actually faster - // than doing the full rank update. The issue is that for smaller sizes, - // the rankUpdate() function is slower than the full product done above. - // - // On the typical bundling problems, the above product is ~5% faster. - // - // MatrixRef(blocks_[cells[c].block_id], - // col_block_size, - // col_block_size) - // .selfadjointView() - // .rankUpdate(m); - // + m.block(r, c, col_block_size, col_block_size) += b.transpose() * b; } } - // Add the diagonal and invert each block. - for (int c = 0; c < bs->cols.size(); ++c) { - const int size = block_structure_.cols[c].size; - const int position = block_structure_.cols[c].position; - MatrixRef block(blocks_[c], size, size); - - if (D != NULL) { - block.diagonal() += - ConstVectorRef(D + position, size).array().square().matrix(); + if (D != NULL) { + // Add the diagonal. + int position = 0; + for (int i = 0; i < bs->cols.size(); ++i) { + const int block_size = bs->cols[i].size; + int r, c, row_stride, col_stride; + CellInfo* cell_info = m_->GetCell(i, i, + &r, &c, + &row_stride, &col_stride); + MatrixRef m(cell_info->values, row_stride, col_stride); + m.block(r, c, block_size, block_size).diagonal() += + ConstVectorRef(D + position, block_size).array().square().matrix(); + position += block_size; } - - block = block.selfadjointView() - .llt() - .solve(Matrix::Identity(size, size)); } + + m_->Invert(); return true; } void BlockJacobiPreconditioner::RightMultiply(const double* x, double* y) const { - for (int c = 0; c < block_structure_.cols.size(); ++c) { - const int size = block_structure_.cols[c].size; - const int position = block_structure_.cols[c].position; - ConstMatrixRef D(blocks_[c], size, size); - ConstVectorRef x_block(x + position, size); - VectorRef y_block(y + position, size); - y_block += D * x_block; - } + m_->RightMultiply(x, y); } void BlockJacobiPreconditioner::LeftMultiply(const double* x, double* y) const { - RightMultiply(x, y); + m_->RightMultiply(x, y); } } // namespace internal diff --git a/extern/libmv/third_party/ceres/internal/ceres/block_jacobi_preconditioner.h b/extern/libmv/third_party/ceres/internal/ceres/block_jacobi_preconditioner.h index 3505a01248b..85792970925 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/block_jacobi_preconditioner.h +++ b/extern/libmv/third_party/ceres/internal/ceres/block_jacobi_preconditioner.h @@ -1,5 +1,5 @@ // Ceres Solver - A fast non-linear least squares minimizer -// Copyright 2012 Google Inc. All rights reserved. +// Copyright 2014 Google Inc. All rights reserved. // http://code.google.com/p/ceres-solver/ // // Redistribution and use in source and binary forms, with or without @@ -32,6 +32,8 @@ #define CERES_INTERNAL_BLOCK_JACOBI_PRECONDITIONER_H_ #include +#include "ceres/block_random_access_diagonal_matrix.h" +#include "ceres/internal/scoped_ptr.h" #include "ceres/preconditioner.h" namespace ceres { @@ -39,7 +41,6 @@ namespace internal { class BlockSparseMatrix; struct CompressedRowBlockStructure; -class LinearOperator; // A block Jacobi preconditioner. This is intended for use with // conjugate gradients, or other iterative symmetric solvers. To use @@ -60,18 +61,14 @@ class BlockJacobiPreconditioner : public BlockSparseMatrixPreconditioner { // Preconditioner interface virtual void RightMultiply(const double* x, double* y) const; virtual void LeftMultiply(const double* x, double* y) const; - virtual int num_rows() const { return num_rows_; } - virtual int num_cols() const { return num_rows_; } + virtual int num_rows() const { return m_->num_rows(); } + virtual int num_cols() const { return m_->num_rows(); } + const BlockRandomAccessDiagonalMatrix& matrix() const { return *m_; } private: virtual bool UpdateImpl(const BlockSparseMatrix& A, const double* D); - std::vector blocks_; - std::vector block_storage_; - int num_rows_; - - // The block structure of the matrix this preconditioner is for (e.g. J). - const CompressedRowBlockStructure& block_structure_; + scoped_ptr m_; }; } // namespace internal diff --git a/extern/libmv/third_party/ceres/internal/ceres/block_random_access_diagonal_matrix.cc b/extern/libmv/third_party/ceres/internal/ceres/block_random_access_diagonal_matrix.cc index d8bf4ef0cb5..b7ff33184cb 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/block_random_access_diagonal_matrix.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/block_random_access_diagonal_matrix.cc @@ -34,16 +34,19 @@ #include #include #include +#include "Eigen/Dense" #include "ceres/internal/port.h" #include "ceres/internal/scoped_ptr.h" +#include "ceres/stl_util.h" #include "ceres/triplet_sparse_matrix.h" #include "ceres/types.h" -#include "ceres/stl_util.h" #include "glog/logging.h" namespace ceres { namespace internal { +// TODO(sameeragarwal): Drop the dependence on TripletSparseMatrix. + BlockRandomAccessDiagonalMatrix::BlockRandomAccessDiagonalMatrix( const vector& blocks) : blocks_(blocks) { @@ -51,9 +54,9 @@ BlockRandomAccessDiagonalMatrix::BlockRandomAccessDiagonalMatrix( // rows/columns. int num_cols = 0; int num_nonzeros = 0; - vector col_layout; + vector block_positions; for (int i = 0; i < blocks_.size(); ++i) { - col_layout.push_back(num_cols); + block_positions.push_back(num_cols); num_cols += blocks_[i]; num_nonzeros += blocks_[i] * blocks_[i]; } @@ -72,7 +75,7 @@ BlockRandomAccessDiagonalMatrix::BlockRandomAccessDiagonalMatrix( for (int i = 0; i < blocks_.size(); ++i) { const int block_size = blocks_[i]; layout_.push_back(new CellInfo(values + pos)); - const int block_begin = col_layout[i]; + const int block_begin = block_positions[i]; for (int r = 0; r < block_size; ++r) { for (int c = 0; c < block_size; ++c, ++pos) { rows[pos] = block_begin + r; @@ -116,5 +119,34 @@ void BlockRandomAccessDiagonalMatrix::SetZero() { } } +void BlockRandomAccessDiagonalMatrix::Invert() { + double* values = tsm_->mutable_values(); + for (int i = 0; i < blocks_.size(); ++i) { + const int block_size = blocks_[i]; + MatrixRef block(values, block_size, block_size); + block = + block + .selfadjointView() + .llt() + .solve(Matrix::Identity(block_size, block_size)); + values += block_size * block_size; + } +} + +void BlockRandomAccessDiagonalMatrix::RightMultiply(const double* x, + double* y) const { + CHECK_NOTNULL(x); + CHECK_NOTNULL(y); + const double* values = tsm_->values(); + for (int i = 0; i < blocks_.size(); ++i) { + const int block_size = blocks_[i]; + ConstMatrixRef block(values, block_size, block_size); + VectorRef(y, block_size).noalias() += block * ConstVectorRef(x, block_size); + x += block_size; + y += block_size; + values += block_size * block_size; + } +} + } // namespace internal } // namespace ceres diff --git a/extern/libmv/third_party/ceres/internal/ceres/block_random_access_diagonal_matrix.h b/extern/libmv/third_party/ceres/internal/ceres/block_random_access_diagonal_matrix.h index 6b3cff2338f..ea9967817db 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/block_random_access_diagonal_matrix.h +++ b/extern/libmv/third_party/ceres/internal/ceres/block_random_access_diagonal_matrix.h @@ -52,7 +52,7 @@ namespace internal { class BlockRandomAccessDiagonalMatrix : public BlockRandomAccessMatrix { public: // blocks is an array of block sizes. - BlockRandomAccessDiagonalMatrix(const vector& blocks); + explicit BlockRandomAccessDiagonalMatrix(const vector& blocks); // The destructor is not thread safe. It assumes that no one is // modifying any cells when the matrix is being destroyed. @@ -70,11 +70,16 @@ class BlockRandomAccessDiagonalMatrix : public BlockRandomAccessMatrix { // locked. virtual void SetZero(); + // Invert the matrix assuming that each block is positive definite. + void Invert(); + + // y += S * x + void RightMultiply(const double* x, double* y) const; + // Since the matrix is square, num_rows() == num_cols(). virtual int num_rows() const { return tsm_->num_rows(); } virtual int num_cols() const { return tsm_->num_cols(); } - // Access to the underlying matrix object. const TripletSparseMatrix* matrix() const { return tsm_.get(); } TripletSparseMatrix* mutable_matrix() { return tsm_.get(); } diff --git a/extern/libmv/third_party/ceres/internal/ceres/block_random_access_sparse_matrix.cc b/extern/libmv/third_party/ceres/internal/ceres/block_random_access_sparse_matrix.cc index f789436364a..9da16a469d5 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/block_random_access_sparse_matrix.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/block_random_access_sparse_matrix.cc @@ -54,9 +54,9 @@ BlockRandomAccessSparseMatrix::BlockRandomAccessSparseMatrix( // Build the row/column layout vector and count the number of scalar // rows/columns. int num_cols = 0; - vector col_layout; + block_positions_.reserve(blocks_.size()); for (int i = 0; i < blocks_.size(); ++i) { - col_layout.push_back(num_cols); + block_positions_.push_back(num_cols); num_cols += blocks_[i]; } @@ -105,8 +105,8 @@ BlockRandomAccessSparseMatrix::BlockRandomAccessSparseMatrix( layout_[IntPairToLong(row_block_id, col_block_id)]->values - values; for (int r = 0; r < row_block_size; ++r) { for (int c = 0; c < col_block_size; ++c, ++pos) { - rows[pos] = col_layout[row_block_id] + r; - cols[pos] = col_layout[col_block_id] + c; + rows[pos] = block_positions_[row_block_id] + r; + cols[pos] = block_positions_[col_block_id] + c; values[pos] = 1.0; DCHECK_LT(rows[pos], tsm_->num_rows()); DCHECK_LT(cols[pos], tsm_->num_rows()); @@ -154,5 +154,37 @@ void BlockRandomAccessSparseMatrix::SetZero() { } } +void BlockRandomAccessSparseMatrix::SymmetricRightMultiply(const double* x, + double* y) const { + for (LayoutType::const_iterator it = layout_.begin(); + it != layout_.end(); + ++it) { + int row; + int col; + LongToIntPair(it->first, &row, &col); + + const int row_block_size = blocks_[row]; + const int row_block_pos = block_positions_[row]; + const int col_block_size = blocks_[col]; + const int col_block_pos = block_positions_[col]; + + MatrixVectorMultiply( + it->second->values, row_block_size, col_block_size, + x + col_block_pos, + y + row_block_pos); + + // Since the matrix is symmetric, but only the upper triangular + // part is stored, if the block being accessed is not a diagonal + // block, then use the same block to do the corresponding lower + // triangular multiply also. + if (row != col) { + MatrixTransposeVectorMultiply( + it->second->values, row_block_size, col_block_size, + x + row_block_pos, + y + col_block_pos); + } + } +} + } // namespace internal } // namespace ceres diff --git a/extern/libmv/third_party/ceres/internal/ceres/block_random_access_sparse_matrix.h b/extern/libmv/third_party/ceres/internal/ceres/block_random_access_sparse_matrix.h index 27b10296d6c..a4f89d8130f 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/block_random_access_sparse_matrix.h +++ b/extern/libmv/third_party/ceres/internal/ceres/block_random_access_sparse_matrix.h @@ -43,6 +43,7 @@ #include "ceres/internal/port.h" #include "ceres/internal/scoped_ptr.h" #include "ceres/types.h" +#include "ceres/small_blas.h" namespace ceres { namespace internal { @@ -75,6 +76,12 @@ class BlockRandomAccessSparseMatrix : public BlockRandomAccessMatrix { // locked. virtual void SetZero(); + // Assume that the matrix is symmetric and only one half of the + // matrix is stored. + // + // y += S * x + void SymmetricRightMultiply(const double* x, double* y) const; + // Since the matrix is square, num_rows() == num_cols(). virtual int num_rows() const { return tsm_->num_rows(); } virtual int num_cols() const { return tsm_->num_cols(); } @@ -84,13 +91,20 @@ class BlockRandomAccessSparseMatrix : public BlockRandomAccessMatrix { TripletSparseMatrix* mutable_matrix() { return tsm_.get(); } private: - int64 IntPairToLong(int a, int b) { - return a * kMaxRowBlocks + b; + int64 IntPairToLong(int row, int col) const { + return row * kMaxRowBlocks + col; + } + + void LongToIntPair(int64 index, int* row, int* col) const { + *row = index / kMaxRowBlocks; + *col = index % kMaxRowBlocks; } const int64 kMaxRowBlocks; + // row/column block sizes. const vector blocks_; + vector block_positions_; // A mapping from to the position in // the values array of tsm_ where the block is stored. diff --git a/extern/libmv/third_party/ceres/internal/ceres/callbacks.cc b/extern/libmv/third_party/ceres/internal/ceres/callbacks.cc index d2236335c53..7d5ce2548e4 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/callbacks.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/callbacks.cc @@ -81,7 +81,7 @@ CallbackReturnType LoggingCallback::operator()( output = "iter cost cost_change |gradient| |step| tr_ratio tr_radius ls_iter iter_time total_time\n"; } const char* kReportRowFormat = - "% 4d % 8e % 3.2e % 3.2e % 3.2e % 3.2e % 3.2e % 3d % 3.2e % 3.2e"; + "% 4d % 8e % 3.2e % 3.2e % 3.2e % 3.2e % 3.2e % 4d % 3.2e % 3.2e"; output += StringPrintf(kReportRowFormat, summary.iteration, summary.cost, diff --git a/extern/libmv/third_party/ceres/internal/ceres/linear_solver.cc b/extern/libmv/third_party/ceres/internal/ceres/linear_solver.cc index d905ec2e1fd..e479b751363 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/linear_solver.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/linear_solver.cc @@ -96,7 +96,11 @@ LinearSolver* LinearSolver::Create(const LinearSolver::Options& options) { return new DenseSchurComplementSolver(options); case ITERATIVE_SCHUR: - return new IterativeSchurComplementSolver(options); + if (options.use_explicit_schur_complement) { + return new SparseSchurComplementSolver(options); + } else { + return new IterativeSchurComplementSolver(options); + } case DENSE_QR: return new DenseQRSolver(options); diff --git a/extern/libmv/third_party/ceres/internal/ceres/linear_solver.h b/extern/libmv/third_party/ceres/internal/ceres/linear_solver.h index 58b9044b8f9..5f59765f074 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/linear_solver.h +++ b/extern/libmv/third_party/ceres/internal/ceres/linear_solver.h @@ -99,6 +99,7 @@ class LinearSolver { sparse_linear_algebra_library_type(SUITE_SPARSE), use_postordering(false), dynamic_sparsity(false), + use_explicit_schur_complement(false), min_num_iterations(1), max_num_iterations(1), num_threads(1), @@ -114,9 +115,10 @@ class LinearSolver { DenseLinearAlgebraLibraryType dense_linear_algebra_library_type; SparseLinearAlgebraLibraryType sparse_linear_algebra_library_type; - // See solver.h for information about this flag. + // See solver.h for information about these flags. bool use_postordering; bool dynamic_sparsity; + bool use_explicit_schur_complement; // Number of internal iterations that the solver uses. This // parameter only makes sense for iterative solvers like CG. diff --git a/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc b/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc index d2aa168c807..33f812b8c96 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc @@ -40,6 +40,7 @@ #include "ceres/block_random_access_sparse_matrix.h" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" +#include "ceres/conjugate_gradients_solver.h" #include "ceres/cxsparse.h" #include "ceres/detect_structure.h" #include "ceres/internal/eigen.h" @@ -56,6 +57,61 @@ namespace ceres { namespace internal { +namespace { + +class BlockRandomAccessSparseMatrixAdapter : public LinearOperator { + public: + explicit BlockRandomAccessSparseMatrixAdapter( + const BlockRandomAccessSparseMatrix& m) + : m_(m) { + } + + virtual ~BlockRandomAccessSparseMatrixAdapter() {} + + // y = y + Ax; + virtual void RightMultiply(const double* x, double* y) const { + m_.SymmetricRightMultiply(x, y); + } + + // y = y + A'x; + virtual void LeftMultiply(const double* x, double* y) const { + m_.SymmetricRightMultiply(x, y); + } + + virtual int num_rows() const { return m_.num_rows(); } + virtual int num_cols() const { return m_.num_rows(); } + + private: + const BlockRandomAccessSparseMatrix& m_; +}; + +class BlockRandomAccessDiagonalMatrixAdapter : public LinearOperator { + public: + explicit BlockRandomAccessDiagonalMatrixAdapter( + const BlockRandomAccessDiagonalMatrix& m) + : m_(m) { + } + + virtual ~BlockRandomAccessDiagonalMatrixAdapter() {} + + // y = y + Ax; + virtual void RightMultiply(const double* x, double* y) const { + m_.RightMultiply(x, y); + } + + // y = y + A'x; + virtual void LeftMultiply(const double* x, double* y) const { + m_.RightMultiply(x, y); + } + + virtual int num_rows() const { return m_.num_rows(); } + virtual int num_cols() const { return m_.num_rows(); } + + private: + const BlockRandomAccessDiagonalMatrix& m_; +}; + +} // namespace LinearSolver::Summary SchurComplementSolver::SolveImpl( BlockSparseMatrix* A, @@ -82,7 +138,7 @@ LinearSolver::Summary SchurComplementSolver::SolveImpl( double* reduced_solution = x + A->num_cols() - lhs_->num_cols(); const LinearSolver::Summary summary = - SolveReducedLinearSystem(reduced_solution); + SolveReducedLinearSystem(per_solve_options, reduced_solution); event_logger.AddEvent("ReducedSolve"); if (summary.termination_type == LINEAR_SOLVER_SUCCESS) { @@ -115,7 +171,9 @@ void DenseSchurComplementSolver::InitStorage( // BlockRandomAccessDenseMatrix. The linear system is solved using // Eigen's Cholesky factorization. LinearSolver::Summary -DenseSchurComplementSolver::SolveReducedLinearSystem(double* solution) { +DenseSchurComplementSolver::SolveReducedLinearSystem( + const LinearSolver::PerSolveOptions& per_solve_options, + double* solution) { LinearSolver::Summary summary; summary.num_iterations = 0; summary.termination_type = LINEAR_SOLVER_SUCCESS; @@ -249,14 +307,25 @@ void SparseSchurComplementSolver::InitStorage( } LinearSolver::Summary -SparseSchurComplementSolver::SolveReducedLinearSystem(double* solution) { +SparseSchurComplementSolver::SolveReducedLinearSystem( + const LinearSolver::PerSolveOptions& per_solve_options, + double* solution) { + if (options().type == ITERATIVE_SCHUR) { + CHECK(options().use_explicit_schur_complement); + return SolveReducedLinearSystemUsingConjugateGradients(per_solve_options, + solution); + } + switch (options().sparse_linear_algebra_library_type) { case SUITE_SPARSE: - return SolveReducedLinearSystemUsingSuiteSparse(solution); + return SolveReducedLinearSystemUsingSuiteSparse(per_solve_options, + solution); case CX_SPARSE: - return SolveReducedLinearSystemUsingCXSparse(solution); + return SolveReducedLinearSystemUsingCXSparse(per_solve_options, + solution); case EIGEN_SPARSE: - return SolveReducedLinearSystemUsingEigen(solution); + return SolveReducedLinearSystemUsingEigen(per_solve_options, + solution); default: LOG(FATAL) << "Unknown sparse linear algebra library : " << options().sparse_linear_algebra_library_type; @@ -270,6 +339,7 @@ SparseSchurComplementSolver::SolveReducedLinearSystem(double* solution) { // CHOLMOD's sparse cholesky factorization routines. LinearSolver::Summary SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution) { #ifdef CERES_NO_SUITESPARSE @@ -377,6 +447,7 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse( // CXSparse's sparse cholesky factorization routines. LinearSolver::Summary SparseSchurComplementSolver::SolveReducedLinearSystemUsingCXSparse( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution) { #ifdef CERES_NO_CXSPARSE @@ -433,6 +504,7 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingCXSparse( // Eigen's sparse cholesky factorization routines. LinearSolver::Summary SparseSchurComplementSolver::SolveReducedLinearSystemUsingEigen( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution) { #ifndef CERES_USE_EIGEN_SPARSE @@ -514,5 +586,79 @@ SparseSchurComplementSolver::SolveReducedLinearSystemUsingEigen( #endif // CERES_USE_EIGEN_SPARSE } +LinearSolver::Summary +SparseSchurComplementSolver::SolveReducedLinearSystemUsingConjugateGradients( + const LinearSolver::PerSolveOptions& per_solve_options, + double* solution) { + const int num_rows = lhs()->num_rows(); + // The case where there are no f blocks, and the system is block + // diagonal. + if (num_rows == 0) { + LinearSolver::Summary summary; + summary.num_iterations = 0; + summary.termination_type = LINEAR_SOLVER_SUCCESS; + summary.message = "Success."; + return summary; + } + + // Only SCHUR_JACOBI is supported over here right now. + CHECK_EQ(options().preconditioner_type, SCHUR_JACOBI); + + if (preconditioner_.get() == NULL) { + preconditioner_.reset(new BlockRandomAccessDiagonalMatrix(blocks_)); + } + + BlockRandomAccessSparseMatrix* sc = + down_cast( + const_cast(lhs())); + + // Extract block diagonal from the Schur complement to construct the + // schur_jacobi preconditioner. + for (int i = 0; i < blocks_.size(); ++i) { + const int block_size = blocks_[i]; + + int sc_r, sc_c, sc_row_stride, sc_col_stride; + CellInfo* sc_cell_info = + CHECK_NOTNULL(sc->GetCell(i, i, + &sc_r, &sc_c, + &sc_row_stride, &sc_col_stride)); + MatrixRef sc_m(sc_cell_info->values, sc_row_stride, sc_col_stride); + + int pre_r, pre_c, pre_row_stride, pre_col_stride; + CellInfo* pre_cell_info = CHECK_NOTNULL( + preconditioner_->GetCell(i, i, + &pre_r, &pre_c, + &pre_row_stride, &pre_col_stride)); + MatrixRef pre_m(pre_cell_info->values, pre_row_stride, pre_col_stride); + + pre_m.block(pre_r, pre_c, block_size, block_size) = + sc_m.block(sc_r, sc_c, block_size, block_size); + } + preconditioner_->Invert(); + + VectorRef(solution, num_rows).setZero(); + + scoped_ptr lhs_adapter( + new BlockRandomAccessSparseMatrixAdapter(*sc)); + scoped_ptr preconditioner_adapter( + new BlockRandomAccessDiagonalMatrixAdapter(*preconditioner_)); + + + LinearSolver::Options cg_options; + cg_options.min_num_iterations = options().min_num_iterations; + cg_options.max_num_iterations = options().max_num_iterations; + ConjugateGradientsSolver cg_solver(cg_options); + + LinearSolver::PerSolveOptions cg_per_solve_options; + cg_per_solve_options.r_tolerance = per_solve_options.r_tolerance; + cg_per_solve_options.q_tolerance = per_solve_options.q_tolerance; + cg_per_solve_options.preconditioner = preconditioner_adapter.get(); + + return cg_solver.Solve(lhs_adapter.get(), + rhs(), + cg_per_solve_options, + solution); +} + } // namespace internal } // namespace ceres diff --git a/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.h b/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.h index 1b431dc5340..93d23e3d012 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.h +++ b/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.h @@ -46,6 +46,7 @@ #include "ceres/suitesparse.h" #include "ceres/internal/scoped_ptr.h" #include "ceres/types.h" +#include "ceres/block_random_access_diagonal_matrix.h" #ifdef CERES_USE_EIGEN_SPARSE #include "Eigen/SparseCholesky" @@ -134,6 +135,7 @@ class SchurComplementSolver : public BlockSparseMatrixSolver { private: virtual void InitStorage(const CompressedRowBlockStructure* bs) = 0; virtual LinearSolver::Summary SolveReducedLinearSystem( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution) = 0; LinearSolver::Options options_; @@ -155,6 +157,7 @@ class DenseSchurComplementSolver : public SchurComplementSolver { private: virtual void InitStorage(const CompressedRowBlockStructure* bs); virtual LinearSolver::Summary SolveReducedLinearSystem( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution); CERES_DISALLOW_COPY_AND_ASSIGN(DenseSchurComplementSolver); @@ -169,12 +172,19 @@ class SparseSchurComplementSolver : public SchurComplementSolver { private: virtual void InitStorage(const CompressedRowBlockStructure* bs); virtual LinearSolver::Summary SolveReducedLinearSystem( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution); LinearSolver::Summary SolveReducedLinearSystemUsingSuiteSparse( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution); LinearSolver::Summary SolveReducedLinearSystemUsingCXSparse( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution); LinearSolver::Summary SolveReducedLinearSystemUsingEigen( + const LinearSolver::PerSolveOptions& per_solve_options, + double* solution); + LinearSolver::Summary SolveReducedLinearSystemUsingConjugateGradients( + const LinearSolver::PerSolveOptions& per_solve_options, double* solution); // Size of the blocks in the Schur complement. @@ -206,6 +216,7 @@ class SparseSchurComplementSolver : public SchurComplementSolver { scoped_ptr simplicial_ldlt_; #endif + scoped_ptr preconditioner_; CERES_DISALLOW_COPY_AND_ASSIGN(SparseSchurComplementSolver); }; diff --git a/extern/libmv/third_party/ceres/internal/ceres/schur_jacobi_preconditioner.cc b/extern/libmv/third_party/ceres/internal/ceres/schur_jacobi_preconditioner.cc index 6dc9e89d3cc..cbdb7086102 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/schur_jacobi_preconditioner.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/schur_jacobi_preconditioner.cc @@ -32,7 +32,6 @@ #include #include -#include "Eigen/Dense" #include "ceres/block_random_access_diagonal_matrix.h" #include "ceres/block_sparse_matrix.h" #include "ceres/collections_port.h" @@ -55,12 +54,12 @@ SchurJacobiPreconditioner::SchurJacobiPreconditioner( << "Jacobian should have atleast 1 f_block for " << "SCHUR_JACOBI preconditioner."; - block_size_.resize(num_blocks); + vector blocks(num_blocks); for (int i = 0; i < num_blocks; ++i) { - block_size_[i] = bs.cols[i + options_.elimination_groups[0]].size; + blocks[i] = bs.cols[i + options_.elimination_groups[0]].size; } - m_.reset(new BlockRandomAccessDiagonalMatrix(block_size_)); + m_.reset(new BlockRandomAccessDiagonalMatrix(blocks)); InitEliminator(bs); } @@ -99,32 +98,13 @@ bool SchurJacobiPreconditioner::UpdateImpl(const BlockSparseMatrix& A, // Compute a subset of the entries of the Schur complement. eliminator_->Eliminate(&A, b.data(), D, m_.get(), rhs.data()); + m_->Invert(); return true; } void SchurJacobiPreconditioner::RightMultiply(const double* x, double* y) const { - CHECK_NOTNULL(x); - CHECK_NOTNULL(y); - - const double* lhs_values = - down_cast(m_.get())->matrix()->values(); - - // This loop can be easily multi-threaded with OpenMP if need be. - for (int i = 0; i < block_size_.size(); ++i) { - const int block_size = block_size_[i]; - ConstMatrixRef block(lhs_values, block_size, block_size); - - VectorRef(y, block_size) = - block - .selfadjointView() - .llt() - .solve(ConstVectorRef(x, block_size)); - - x += block_size; - y += block_size; - lhs_values += block_size * block_size; - } + m_->RightMultiply(x, y); } int SchurJacobiPreconditioner::num_rows() const { diff --git a/extern/libmv/third_party/ceres/internal/ceres/schur_jacobi_preconditioner.h b/extern/libmv/third_party/ceres/internal/ceres/schur_jacobi_preconditioner.h index aecb0151083..8b528e25075 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/schur_jacobi_preconditioner.h +++ b/extern/libmv/third_party/ceres/internal/ceres/schur_jacobi_preconditioner.h @@ -94,11 +94,7 @@ class SchurJacobiPreconditioner : public BlockSparseMatrixPreconditioner { virtual bool UpdateImpl(const BlockSparseMatrix& A, const double* D); Preconditioner::Options options_; - - // Sizes of the blocks in the schur complement. - vector block_size_; scoped_ptr eliminator_; - // Preconditioner matrix. scoped_ptr m_; CERES_DISALLOW_COPY_AND_ASSIGN(SchurJacobiPreconditioner); diff --git a/extern/libmv/third_party/ceres/internal/ceres/solver.cc b/extern/libmv/third_party/ceres/internal/ceres/solver.cc index f90045baac9..e1c5ee30ba5 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/solver.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/solver.cc @@ -120,6 +120,14 @@ bool TrustRegionOptionsAreValid(const Solver::Options& options, string* error) { OPTION_GT(max_consecutive_nonmonotonic_steps, 0); } + if (options.linear_solver_type == ITERATIVE_SCHUR && + options.use_explicit_schur_complement && + options.preconditioner_type != SCHUR_JACOBI) { + *error = "use_explicit_schur_complement only supports" + "SCHUR_JACOBI as the preconditioner."; + return false; + } + if (options.preconditioner_type == CLUSTER_JACOBI && options.sparse_linear_algebra_library_type != SUITE_SPARSE) { *error = "CLUSTER_JACOBI requires " diff --git a/extern/libmv/third_party/ceres/internal/ceres/trust_region_preprocessor.cc b/extern/libmv/third_party/ceres/internal/ceres/trust_region_preprocessor.cc index ba3baa12c1f..22ea1ec8c80 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/trust_region_preprocessor.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/trust_region_preprocessor.cc @@ -184,6 +184,8 @@ bool SetupLinearSolver(PreprocessedProblem* pp) { options.sparse_linear_algebra_library_type; pp->linear_solver_options.dense_linear_algebra_library_type = options.dense_linear_algebra_library_type; + pp->linear_solver_options.use_explicit_schur_complement = + options.use_explicit_schur_complement; pp->linear_solver_options.dynamic_sparsity = options.dynamic_sparsity; pp->linear_solver_options.num_threads = options.num_linear_solver_threads; -- cgit v1.2.3