diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2013-04-22 13:25:37 +0400 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2013-04-22 13:25:37 +0400 |
commit | d05f5da111e026c2a4dbe0432bca7b3266857893 (patch) | |
tree | 6a3aab8cbca37167aece3f7458538117bf855078 /extern | |
parent | a7f869df64136ba47cfa28327884966d6e90531f (diff) |
Update Ceres to current upstream version
This brings a fixes for threading issue in BLAS
making BA step more robust (there were some in-detemrinacy
caused by this threading issue).
Also brings some optimizations, which does not directly
affect on blender.
Diffstat (limited to 'extern')
28 files changed, 1010 insertions, 630 deletions
diff --git a/extern/libmv/third_party/ceres/CMakeLists.txt b/extern/libmv/third_party/ceres/CMakeLists.txt index e515280826b..06458834af3 100644 --- a/extern/libmv/third_party/ceres/CMakeLists.txt +++ b/extern/libmv/third_party/ceres/CMakeLists.txt @@ -110,6 +110,7 @@ set(SRC internal/ceres/wall_time.cc include/ceres/autodiff_cost_function.h + include/ceres/autodiff_local_parameterization.h include/ceres/ceres.h include/ceres/conditioned_cost_function.h include/ceres/cost_function.h diff --git a/extern/libmv/third_party/ceres/ChangeLog b/extern/libmv/third_party/ceres/ChangeLog index 062bc250617..1a945b01622 100644 --- a/extern/libmv/third_party/ceres/ChangeLog +++ b/extern/libmv/third_party/ceres/ChangeLog @@ -1,3 +1,323 @@ +commit 36f4cd23b24391106e9f3c15b0f9bbcaafc47b20 +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Sun Apr 21 09:42:26 2013 -0700 + + Disable threads completely if OpenMP is not present. + + This reduces the penalty paid by Mutex lock and unlock operations + in single threaded mode. + + Change-Id: I185380bde73fe87e901fc434d152d6c366ff1d5d + +commit 24fb32b42683cf711a6683e3cff3540b16bb5019 +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Sat Apr 20 09:02:06 2013 -0700 + + Add whole program optimization for Clang. + + Also reorder the way CERES_CXX_FLAGS is being used for clarity. + + Change-Id: I2bbb90e770d30dd18ecae72939ea03b7fa11e6ae + +commit 2b7497025096a681d7f0351081f83293398d62ef +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Fri Apr 19 19:52:58 2013 -0700 + + Fix a bounds error in the pre-ordering code. + + Change-Id: I33c968bb075b60ad50374593302e08f42aeacf25 + +commit 9189f4ea4bb2d71ea7f6b9d9bd3290415aee323d +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Fri Apr 19 17:09:49 2013 -0700 + + Enable pre-ordering for SPARSE_NORMAL_CHOLESKY. + + Sparse Cholesky factorization algorithms use a fill-reducing + ordering to permute the columns of the Jacobian matrix. There + are two ways of doing this. + + 1. Compute the Jacobian matrix in some order and then have the + factorization algorithm permute the columns of the Jacobian. + + 2. Compute the Jacobian with its columns already permuted. + + The first option incurs a significant memory penalty. The + factorization algorithm has to make a copy of the permuted + Jacobian matrix. + + Starting with this change Ceres pre-permutes the columns of the + Jacobian matrix and generally speaking, there is no performance + penalty for doing so. + + In some rare cases, it is worth using a more complicated + reordering algorithm which has slightly better runtime + performance at the expense of an extra copy of the Jacobian + matrix. Setting Solver::Options::use_postordering to true + enables this tradeoff. + + This change also removes Solver::Options::use_block_amd + as an option. All matrices are ordered using their block + structure. The ability to order them by their scalar + sparsity structure has been removed. + + Here is what performance on looks like on some BAL problems. + + Memory + ====== + HEAD pre-ordering + 16-22106 137957376.0 113516544.0 + 49-7776 56688640.0 46628864.0 + 245-198739 1718005760.0 1383550976.0 + 257-65132 387715072.0 319512576.0 + 356-226730 2014826496.0 1626087424.0 + 744-543562 4903358464.0 3957878784.0 + 1024-110968 968626176.0 822071296.0 + + Time + ==== + HEAD pre-ordering + 16-22106 3.8 3.7 + 49-7776 1.9 1.8 + 245-198739 82.6 81.9 + 257-65132 14.0 13.4 + 356-226730 98.8 95.8 + 744-543562 325.2 301.6 + 1024-110968 42.1 37.1 + + Change-Id: I6b2e25f3fed7310f88905386a7898ac94d37467e + +commit f7ed22efc3afee36aae71a4f7949b3d327b87f11 +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Fri Apr 19 14:24:48 2013 -0700 + + Add the ability to order the Program using AMD. + + This will allow CHOLMOD to compute the sparse + Cholesky factorization of J'J without making + a permuted copy of it. + + Change-Id: I25d0e18f5957ab7fdce15c543234bb2f09db482e + +commit c8f07905d76d9ac6fb8d7b9b02e180aa2fa0ab32 +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Fri Apr 19 08:01:04 2013 -0700 + + Refactor SolverImpl::CreateReducedProgram. + + Break up CreateReducedProgram into smaller functions in + preparation for more sophisticated ordering strategies. + + Change-Id: Ic3897522574fde770646d747fe383f5dbd7a6619 + +commit 2560b17b7cdda1de28c18049c95e6c3188dbde93 +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Fri Apr 19 08:19:11 2013 -0700 + + SuiteSparse cleanup. + + 1. CreateSparseMatrixTransposeView now returns a struct instead + of a pointer. + + 2. Add AnalyzeCholeskyWithNaturalOrdering. + + Change-Id: If27a5502949c3994edd95be0d25ec7a0d1fa1ae1 + +commit 7823cf23c765450b79f11ac31fc8a16f875c0d84 +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Thu Apr 18 16:13:56 2013 -0700 + + Fix a typo in problem.h + + Thanks as usual to William Rucklidge. + + Change-Id: If6e8628841ee7fa8978ec56918a80d60b4ff660e + +commit 3d9546963d7c8c5f5dfb12a2df745f4996fd2ec5 +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Thu Apr 18 14:54:55 2013 -0700 + + Add the ability to query the Problem about parameter blocks. + + Change-Id: Ieda1aefa28e7a1d18fe6c8d1665882e4d9c274f2 + +commit 69ebad42ebfc212339a22c6f06a12ec5a3368098 +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Wed Apr 17 15:38:00 2013 -0700 + + Change Minimizer::Options::min_trust_region_radius to double. + + This was accidentally an int, which was setting the minimum + trust region radius to zero and effectively disabling a convergence + test based on it. + + (Thanks to Sergey Sharybin for providing a reproduction for this) + + Change-Id: Id0b9e246bcfee074954a5dc6a3a2342adab56c16 + +commit e6707b2411b9a823b6c748f9f9d0b22225d767bb +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Tue Apr 16 15:44:23 2013 -0700 + + Lint fixes from William Rucklidge. + + Change-Id: I57a6383bb875b24083cd9b7049333292d26f718c + +commit c7e69beb52c2c47182eaf8295025a668d0eefd80 +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Tue Apr 16 09:39:16 2013 -0700 + + Add a missing mutex lock in the SchurEliminator. This + was lost somewhere along in the BLAS based refactoring. + + Change-Id: I90b94fa9c3a8ea1b900a18f76ef6a7d0dbf24318 + +commit faa72ace9abea24877173158bfec451d5b46597e +Author: Joydeep Biswas <joydeep.biswas@gmail.com> +Date: Mon Apr 15 17:34:43 2013 -0400 + + Update to compile with stricter gcc checks. + + Change-Id: Iecb37cbe7201a4d4f42b21b427fa1d35d0183b1b + +commit 487250eb27256a41d38c5037bdac9a09a3160edb +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Fri Apr 5 14:20:37 2013 -0700 + + Minor cleanups. + + 1. Further BLAS and heap allocation cleanups in schur_eliminator_impl.h + 2. Modularize blas.h using macros. + 3. Lint cleanups from William Rucklidge. + 4. Small changes to jet.h + 5. ResidualBlock now uses blas.h + + Performance improvements: + + For static and dynamic sized blocks, the peformance is not changed much. + + -use_quaternions -ordering user -linear_solver sparse_schur + + master change + problem: 16-22106 + gcc 3.4 3.3 + clang 2.8 2.7 + + problem: 49-7776 + gcc 1.7 1.7 + clang 1.4 1.4 + + problem: 245-198739 + gcc 80.1 79.6 + clang 80.6 76.2 + + problem: 257-65132 + gcc 12.2 12.0 + clang 10.4 10.2 + + problem: 356-226730 + gcc 99.0 96.8 + clang 88.9 88.3 + + problem: 744-543562 + gcc 361.5 356.2 + clang 352.7 343.5 + + problem: 1024-110968 + gcc 45.9 45.6 + clang 42.6 42.1 + + However, performance when using local parameterizations is + significantly improved due to residual_block.cc using blas.h + + -use_quaternions -use_local_parameterization -ordering user -linear_solver sparse_schur + + master change + problem: 16-22106 + gcc 3.6 3.3 + clang 3.5 2.8 + + problem: 49-7776 + gcc 1.8 1.6 + clang 1.7 1.4 + + problem: 245-198739 + gcc 79.7 76.1 + clang 79.7 73.0 + + problem: 257-65132 + gcc 12.8 11.9 + clang 12.3 9.8 + + problem: 356-226730 + gcc 101.9 93.5 + clang 105.0 86.8 + + problem: 744-543562 + gcc 367.9 350.5 + clang 355.3 323.1 + + problem: 1024-110968 + gcc 43.0 40.3 + clang 41.0 37.5 + + Change-Id: I6dcf7476ddaa77cb116558d112a9cf1e832f5fc9 + +commit eeedd3a59281eb27025d7f9aa944d9aff0666590 +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Wed Apr 10 23:58:32 2013 +0600 + + Autodiff local parameterization class + + This class is used to create local parameterization + with Jacobians computed via automatic differentiation. + + To get an auto differentiated local parameterization, + class with a templated operator() (a functor) that + computes + + plus_delta = Plus(x, delta); + + shall be defined. + + Then given such functor, the auto differentiated local + parameterization can be constructed as + + LocalParameterization* local_parameterization = + new AutoDiffLocalParameterization<PlusFunctor, 4, 3>; + | | + Global Size ---------------+ | + Local Size -------------------+ + + See autodiff_local_parameterization.h for more information + and usage example. + + Initial implementation by Keir Mierle, finished by self + and integrated into Ceres and covered with unit tests + by Sameer Agarwal. + + Change-Id: I1b3e48ae89f81e0cf1f51416c5696e18223f4b21 + +commit d8d541674da5f3ba7a15c4003fa18577479f8f8c +Author: Sergey Sharybin <sergey.vfx@gmail.com> +Date: Wed Apr 10 11:13:27 2013 +0600 + + Do not modify cached CMAKE_CXX_FLAGS_RELEASE + + Adding compiler's flags and force updating cached value + of release C++ flags lead to appending special compiler + flags on every edit of any CMakeList.txt. + + For compile result this is harmless, but was annoying + slight modification of CMakeList.txt triggered full + project rebuild. + + Now modified C++ flags are used for the whole subtree + starting from the project root, but this doesn't lead + to flags modified in cache. + + Change-Id: Ieb32bd7f96d5a09632f0b2b5325f6567be8cb5a8 + commit c290df85a40a8dd117b5eccc515bf22b0d9b1945 Author: Sameer Agarwal <sameeragarwal@google.com> Date: Sun Apr 7 09:17:47 2013 -0700 @@ -395,227 +715,3 @@ Date: Mon Mar 4 10:17:30 2013 -0800 Thanks to Alexander Mordvintsev for reporting this. Change-Id: I5c6be4d3d28f062e83a1ad41cb8089c19362a005 - -commit 480f9b8551c02c429bc027197f3d868c5cc522c9 -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Sun Mar 3 20:15:22 2013 -0800 - - Add gerrit instructions to the docs. - - Change-Id: Ic98f20273f3ccbaeb8b4ca00c4ce0042a0d262f8 - -commit 7c60b5c2c6170f0f81a29dbaa2ca7d8031db843b -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Sun Mar 3 18:28:02 2013 -0800 - - version history update - - Change-Id: Ia92caeb0f6659667ce1e56eefd0e3c87b3f6e538 - -commit a363a7b69c7b97e17ad671ba1aee30f201eafdd1 -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Sun Mar 3 18:06:00 2013 -0800 - - Multithread DENSE_SCHUR - - Replace the global lock in BlockRandomAccessDenseMatrix - with a per cell lock. - - Change-Id: Iddbe38616157b6e0d3770eede3335a056c3ba18c - -commit 31730ef55df802d1e251edab3bac3c0cdcb30647 -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Thu Feb 28 11:20:28 2013 -0800 - - DenseSparseMatrix is now column-major. - - 1. Introduce new typdefs in eigen.h to allow for column - major matrices. - - 2. Clean up old unused typedefs, and the aligned typedefs - since they do not actually add any real performance. - - 3. Made eigen.h conform to the google style guide by removing - the using directives. They were polluting the ceres namespace. - - 4. Made the template specialization generator work again. - - Change-Id: Ic2268c784534b737ebd6e1a043e2a327adaeca37 - -commit f8e43f7f2724c5413015e1f113ce860ee8b30428 -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Wed Feb 27 08:55:20 2013 -0800 - - version history update - - Change-Id: Ibd412a9e5beac3b3ac3e15b26fb11aa061956095 - -commit fef82b3a7af1e44f18f5343601fb19a4dd6f89ad -Author: Alex Stewart <alexs.mac@gmail.com> -Date: Wed Feb 27 10:44:12 2013 +0000 - - Bugfix - commenting-out unused member which results in build error on OS X with latest Xcode. - - - Build error due to -Werror,-Wunused-private-field clang args. - - Raised with gtest group (as it also occurs with latest gtest:master but for a different - variable) with patch, but they don't want to fix for compatibility with legacy compilers/OS - see here: https://groups.google.com/forum/?fromgroups=#!topic/googletestframework/S1KLl2jkzws - - Change-Id: I99984bcd9d07f6eb0e3fac58e27ddf0ac9e54265 - -commit 0bc3540b66cf9de4d4a317c6a760849aa66d414e -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Wed Feb 27 08:46:48 2013 -0800 - - Version history update - - Change-Id: I6f79dd87e45bedf4bcf821e7b44f8b9553c39a7b - -commit b59ac43b9d1122da3d00882efa7c5d6833c06ea7 -Author: Alex Stewart <alexs.mac@gmail.com> -Date: Wed Feb 27 09:10:19 2013 +0000 - - Issue 83 fix: use correct pthread linker flags with clang. - - 1. -lpthreads was previously added to the CMAKE_CXX_FLAGS which are - not passed to the linker thus linking would fail. - 2. Clang would emit a warning about -lpthreads being added to a - build instruction with -c (compile only). - - This patch fixes both of these issues by adding -lpthreads to the - linker flags (and removes them from the CXX flags). - - Change-Id: I5e54de3ab7eced177aa31f311926893598af5b56 - -commit 6fb1024ed5b197da261f71d1bb02716661da2fff -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Tue Feb 26 22:20:18 2013 -0800 - - Fix a small bug in evaluator.h - - Change-Id: I2c4b8637e0ac8645721109f8b6bb2396ce8bb37b - -commit 039ff07dd1a02e6c9cff335551f05bfe8269224b -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Tue Feb 26 09:15:39 2013 -0800 - - Evaluate ResidualBlocks without LossFunction if needed. - - 1. Add the ability to evaluate the problem without loss function. - 2. Remove static Evaluator::Evaluate - 3. Refactor the common code from problem_test.cc and - evaluator_test.cc into evaluator_test_utils.cc - - Change-Id: I1aa841580afe91d288fbb65288b0ffdd1e43e827 - -commit c3fd3b960e489348d5b2c8b8f0167760e52ecbd9 -Author: Taylor Braun-Jones <taylor@braun-jones.org> -Date: Tue Feb 26 00:30:35 2013 -0500 - - Only use cmake28 macro for RHEL6 - - This makes it possible to use the same spec to build on Fedora. It drops any - chance of building on RHEL5, but I doubt that was possible anyway. - - Change-Id: Ia956eb6416504e520962ec2f617e03b40ca18203 - -commit b73148b9f38fe41032e696436566b78043a368db -Author: Taylor Braun-Jones <taylor@braun-jones.org> -Date: Mon Feb 25 02:34:00 2013 -0500 - - Remove -Wno-return-type-c-linkage option when using gcc - - Only use this option when compiling with CLang which supports it. - - Change-Id: I8555c16e82d61302f6a43672d0d63e5d4800c6b6 - -commit ba9442160dabf612a1dc51baf098937459b4b5ca -Author: Keir Mierle <mierle@gmail.com> -Date: Mon Feb 25 12:46:44 2013 -0800 - - Add the number of effective parameters to the final report. - - Here is an example report, obtained by running: - - bin/Debug/bundle_adjuster \ - --input=../ceres-solver/data/problem-16-22106-pre.txt \ - --linear_solver=iterative_schur \ - --num_iterations=1 \ - --alsologtostderr \ - --use_local_parameterization \ - --use_quaternions - - Note that effective parameters is less than parameters by 16, which is the - number of cameras. In this case the local parameterization has a 3 dimensional - tangent space for the 4-dimensional quaternions. - - Ceres Solver Report - ------------------- - Original Reduced - Parameter blocks 22138 22138 - Parameters 66478 66478 - Effective parameters 66462 66462 - Residual blocks 83718 83718 - Residual 167436 167436 - - Minimizer TRUST_REGION - Trust Region Strategy LEVENBERG_MARQUARDT - - Given Used - Linear solver ITERATIVE_SCHUR ITERATIVE_SCHUR - Preconditioner JACOBI JACOBI - Threads: 1 1 - Linear solver threads 1 1 - Linear solver ordering AUTOMATIC 22106, 32 - - Cost: - Initial 4.185660e+06 - Final 7.221647e+04 - Change 4.113443e+06 - - Number of iterations: - Successful 1 - Unsuccessful 0 - Total 1 - - Time (in seconds): - Preprocessor 0.697 - - Residual Evaluations 0.063 - Jacobian Evaluations 27.608 - Linear Solver 13.360 - Minimizer 43.973 - - Postprocessor 0.004 - Total 44.756 - - Termination: NO_CONVERGENCE - - Change-Id: I6b6b8ac24f71bd187e67d95651290917642be74f - -commit 36dc14ddf2fd53238c2ce21f172aa1989b31c0fd -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Mon Feb 25 10:33:10 2013 -0800 - - Fix a clang warning - - Change-Id: I5ef32c6329f1f75efb30b16519b8de146a8339fa - -commit 931c309b2734329ec6e5f0b88ce4a0b488ac47e5 -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Mon Feb 25 09:46:21 2013 -0800 - - Cleanup based on comments by William Rucklidge - - Change-Id: If269ba8e388965a8ea32260fd6f17a133a19ab9b - -commit df36218c953e05d665df2cc96a6d7625e2307d97 -Author: Taylor Braun-Jones <taylor@braun-jones.org> -Date: Fri Feb 15 18:28:11 2013 -0500 - - Add support for the CMake "LIB_SUFFIX" convention - - Allows `make install` to work correctly on e.g. 64-bit systems where the - native libraries are installed to /usr/lib64 - - Change-Id: I71b4fae7b459c003cb5fac981278c668f2e29779 diff --git a/extern/libmv/third_party/ceres/files.txt b/extern/libmv/third_party/ceres/files.txt index 0c60e5a055a..cb6c8af533e 100644 --- a/extern/libmv/third_party/ceres/files.txt +++ b/extern/libmv/third_party/ceres/files.txt @@ -1,4 +1,5 @@ include/ceres/autodiff_cost_function.h +include/ceres/autodiff_local_parameterization.h include/ceres/ceres.h include/ceres/conditioned_cost_function.h include/ceres/cost_function.h diff --git a/extern/libmv/third_party/ceres/include/ceres/autodiff_local_parameterization.h b/extern/libmv/third_party/ceres/include/ceres/autodiff_local_parameterization.h new file mode 100644 index 00000000000..1099061bec8 --- /dev/null +++ b/extern/libmv/third_party/ceres/include/ceres/autodiff_local_parameterization.h @@ -0,0 +1,144 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2013 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sergey.vfx@gmail.com (Sergey Sharybin) +// mierle@gmail.com (Keir Mierle) +// sameeragarwal@google.com (Sameer Agarwal) + +#ifndef CERES_PUBLIC_AUTODIFF_LOCAL_PARAMETERIZATION_H_ +#define CERES_PUBLIC_AUTODIFF_LOCAL_PARAMETERIZATION_H_ + +#include "ceres/internal/autodiff.h" +#include "ceres/internal/scoped_ptr.h" +#include "ceres/local_parameterization.h" + +namespace ceres { + +// Create local parameterization with Jacobians computed via automatic +// differentiation. For more information on local parameterizations, +// see include/ceres/local_parameterization.h +// +// To get an auto differentiated local parameterization, you must define +// a class with a templated operator() (a functor) that computes +// +// x_plus_delta = Plus(x, delta); +// +// the template parameter T. The autodiff framework substitutes appropriate +// "Jet" objects for T in order to compute the derivative when necessary, but +// this is hidden, and you should write the function as if T were a scalar type +// (e.g. a double-precision floating point number). +// +// The function must write the computed value in the last argument (the only +// non-const one) and return true to indicate success. +// +// For example, Quaternions have a three dimensional local +// parameterization. It's plus operation can be implemented as (taken +// from interncal/ceres/auto_diff_local_parameterization_test.cc) +// +// struct QuaternionPlus { +// template<typename T> +// bool operator()(const T* x, const T* delta, T* x_plus_delta) const { +// const T squared_norm_delta = +// delta[0] * delta[0] + delta[1] * delta[1] + delta[2] * delta[2]; +// +// T q_delta[4]; +// if (squared_norm_delta > T(0.0)) { +// T norm_delta = sqrt(squared_norm_delta); +// const T sin_delta_by_delta = sin(norm_delta) / norm_delta; +// q_delta[0] = cos(norm_delta); +// q_delta[1] = sin_delta_by_delta * delta[0]; +// q_delta[2] = sin_delta_by_delta * delta[1]; +// q_delta[3] = sin_delta_by_delta * delta[2]; +// } else { +// // We do not just use q_delta = [1,0,0,0] here because that is a +// // constant and when used for automatic differentiation will +// // lead to a zero derivative. Instead we take a first order +// // approximation and evaluate it at zero. +// q_delta[0] = T(1.0); +// q_delta[1] = delta[0]; +// q_delta[2] = delta[1]; +// q_delta[3] = delta[2]; +// } +// +// QuaternionProduct(q_delta, x, x_plus_delta); +// return true; +// } +// }; +// +// Then given this struct, the auto differentiated local +// parameterization can now be constructed as +// +// LocalParameterization* local_parameterization = +// new AutoDiffLocalParameterization<QuaternionPlus, 4, 3>; +// | | +// Global Size ---------------+ | +// Local Size -------------------+ +// +// WARNING: Since the functor will get instantiated with different types for +// T, you must to convert from other numeric types to T before mixing +// computations with other variables of type T. In the example above, this is +// seen where instead of using k_ directly, k_ is wrapped with T(k_). + +template <typename Functor, int kGlobalSize, int kLocalSize> +class AutoDiffLocalParameterization : public LocalParameterization { + public: + virtual ~AutoDiffLocalParameterization() {} + virtual bool Plus(const double* x, + const double* delta, + double* x_plus_delta) const { + return Functor()(x, delta, x_plus_delta); + } + + virtual bool ComputeJacobian(const double* x, double* jacobian) const { + double zero_delta[kLocalSize]; + for (int i = 0; i < kLocalSize; ++i) { + zero_delta[i] = 0.0; + } + + double x_plus_delta[kGlobalSize]; + for (int i = 0; i < kGlobalSize; ++i) { + x_plus_delta[i] = 0.0; + } + + const double* parameter_ptrs[2] = {x, zero_delta}; + double* jacobian_ptrs[2] = { NULL, jacobian }; + return internal::AutoDiff<Functor, double, kGlobalSize, kLocalSize> + ::Differentiate(Functor(), + parameter_ptrs, + kGlobalSize, + x_plus_delta, + jacobian_ptrs); + } + + virtual int GlobalSize() const { return kGlobalSize; } + virtual int LocalSize() const { return kLocalSize; } +}; + +} // namespace ceres + +#endif // CERES_PUBLIC_AUTODIFF_LOCAL_PARAMETERIZATION_H_ diff --git a/extern/libmv/third_party/ceres/include/ceres/ceres.h b/extern/libmv/third_party/ceres/include/ceres/ceres.h index 7878806aa45..ac76e97c834 100644 --- a/extern/libmv/third_party/ceres/include/ceres/ceres.h +++ b/extern/libmv/third_party/ceres/include/ceres/ceres.h @@ -38,6 +38,7 @@ #define CERES_ABI_VERSION 1.5.0 #include "ceres/autodiff_cost_function.h" +#include "ceres/autodiff_local_parameterization.h" #include "ceres/cost_function.h" #include "ceres/cost_function_to_functor.h" #include "ceres/crs_matrix.h" diff --git a/extern/libmv/third_party/ceres/include/ceres/dynamic_autodiff_cost_function.h b/extern/libmv/third_party/ceres/include/ceres/dynamic_autodiff_cost_function.h index e4549c54b43..38bdb0aa618 100644 --- a/extern/libmv/third_party/ceres/include/ceres/dynamic_autodiff_cost_function.h +++ b/extern/libmv/third_party/ceres/include/ceres/dynamic_autodiff_cost_function.h @@ -123,7 +123,8 @@ class DynamicAutoDiffCostFunction : public CostFunction { vector<Jet<double, Stride> > output_jets(num_residuals()); // Make the parameter pack that is sent to the functor (reused). - vector<Jet<double, Stride>* > jet_parameters(num_parameter_blocks, NULL); + vector<Jet<double, Stride>* > jet_parameters(num_parameter_blocks, + static_cast<Jet<double, Stride>* >(NULL)); int num_active_parameters = 0; int start_derivative_section = -1; for (int i = 0, parameter_cursor = 0; i < num_parameter_blocks; ++i) { diff --git a/extern/libmv/third_party/ceres/include/ceres/jet.h b/extern/libmv/third_party/ceres/include/ceres/jet.h index 1238123a8fb..000bd1c116a 100644 --- a/extern/libmv/third_party/ceres/include/ceres/jet.h +++ b/extern/libmv/third_party/ceres/include/ceres/jet.h @@ -348,8 +348,8 @@ Jet<T, N> operator/(const Jet<T, N>& f, // b + v (b + v)(b - v) b^2 // // which holds because v*v = 0. - h.a = f.a / g.a; - const T g_a_inverse = 1.0 / g.a; + const T g_a_inverse = T(1.0) / g.a; + h.a = f.a * g_a_inverse; const T f_a_by_g_a = f.a * g_a_inverse; for (int i = 0; i < N; ++i) { h.v[i] = (f.v[i] - f_a_by_g_a * g.v[i]) * g_a_inverse; @@ -450,7 +450,7 @@ template <typename T, int N> inline Jet<T, N> sqrt(const Jet<T, N>& f) { Jet<T, N> g; g.a = sqrt(f.a); - const T two_a_inverse = 1.0 / (T(2.0) * g.a); + const T two_a_inverse = T(1.0) / (T(2.0) * g.a); g.v = f.v * two_a_inverse; return g; } diff --git a/extern/libmv/third_party/ceres/include/ceres/problem.h b/extern/libmv/third_party/ceres/include/ceres/problem.h index 0a449cb99f1..33394ce0e17 100644 --- a/extern/libmv/third_party/ceres/include/ceres/problem.h +++ b/extern/libmv/third_party/ceres/include/ceres/problem.h @@ -328,6 +328,19 @@ class Problem { // sizes of all of the residual blocks. int NumResiduals() const; + // The size of the parameter block. + int ParameterBlockSize(double* values) const; + + // The size of local parameterization for the parameter block. If + // there is no local parameterization associated with this parameter + // block, then ParameterBlockLocalSize = ParameterBlockSize. + int ParameterBlockLocalSize(double* values) const; + + // Fills the passed parameter_blocks vector with pointers to the + // parameter blocks currently in the problem. After this call, + // parameter_block.size() == NumParameterBlocks. + void GetParameterBlocks(vector<double*>* parameter_blocks) const; + // Options struct to control Problem::Evaluate. struct EvaluateOptions { EvaluateOptions() diff --git a/extern/libmv/third_party/ceres/include/ceres/solver.h b/extern/libmv/third_party/ceres/include/ceres/solver.h index 8c2ff32d80b..97d082d80e0 100644 --- a/extern/libmv/third_party/ceres/include/ceres/solver.h +++ b/extern/libmv/third_party/ceres/include/ceres/solver.h @@ -95,13 +95,8 @@ class Solver { #endif num_linear_solver_threads = 1; - -#if defined(CERES_NO_SUITESPARSE) - use_block_amd = false; -#else - use_block_amd = true; -#endif linear_solver_ordering = NULL; + use_postordering = false; use_inner_iterations = false; inner_iteration_ordering = NULL; linear_solver_min_num_iterations = 1; @@ -356,19 +351,29 @@ class Solver { // deallocate the memory when destroyed. ParameterBlockOrdering* linear_solver_ordering; - // By virtue of the modeling layer in Ceres being block oriented, - // all the matrices used by Ceres are also block oriented. When - // doing sparse direct factorization of these matrices (for - // SPARSE_NORMAL_CHOLESKY, SPARSE_SCHUR and ITERATIVE in - // conjunction with CLUSTER_TRIDIAGONAL AND CLUSTER_JACOBI - // preconditioners), the fill-reducing ordering algorithms can - // either be run on the block or the scalar form of these matrices. - // Running it on the block form exposes more of the super-nodal - // structure of the matrix to the factorization routines. Setting - // this parameter to true runs the ordering algorithms in block - // form. Currently this option only makes sense with - // sparse_linear_algebra_library = SUITE_SPARSE. - bool use_block_amd; + // Note: This option only applies to the SPARSE_NORMAL_CHOLESKY + // solver when used with SUITE_SPARSE. + + // Sparse Cholesky factorization algorithms use a fill-reducing + // ordering to permute the columns of the Jacobian matrix. There + // are two ways of doing this. + + // 1. Compute the Jacobian matrix in some order and then have the + // factorization algorithm permute the columns of the Jacobian. + + // 2. Compute the Jacobian with its columns already permuted. + + // The first option incurs a significant memory penalty. The + // factorization algorithm has to make a copy of the permuted + // Jacobian matrix, thus Ceres pre-permutes the columns of the + // Jacobian matrix and generally speaking, there is no performance + // penalty for doing so. + + // In some rare cases, it is worth using a more complicated + // reordering algorithm which has slightly better runtime + // performance at the expense of an extra copy of the Jacobian + // // matrix. Setting use_postordering to true enables this tradeoff. + bool use_postordering; // Some non-linear least squares problems have additional // structure in the way the parameter blocks interact that it is diff --git a/extern/libmv/third_party/ceres/internal/ceres/blas.h b/extern/libmv/third_party/ceres/internal/ceres/blas.h index bacf1fff96a..9629b3da550 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/blas.h +++ b/extern/libmv/third_party/ceres/internal/ceres/blas.h @@ -65,20 +65,71 @@ namespace internal { #define CERES_MAYBE_NOALIAS .noalias() #endif -// For the matrix-matrix functions below, there are three functions -// for each functionality. Foo, FooNaive and FooEigen. Foo is the one -// to be called by the user. FooNaive is a basic loop based +// The following three macros are used to share code and reduce +// template junk across the various GEMM variants. +#define CERES_GEMM_BEGIN(name) \ + template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> \ + inline void name(const double* A, \ + const int num_row_a, \ + const int num_col_a, \ + const double* B, \ + const int num_row_b, \ + const int num_col_b, \ + double* C, \ + const int start_row_c, \ + const int start_col_c, \ + const int row_stride_c, \ + const int col_stride_c) + +#define CERES_GEMM_NAIVE_HEADER \ + DCHECK_GT(num_row_a, 0); \ + DCHECK_GT(num_col_a, 0); \ + DCHECK_GT(num_row_b, 0); \ + DCHECK_GT(num_col_b, 0); \ + DCHECK_GE(start_row_c, 0); \ + DCHECK_GE(start_col_c, 0); \ + DCHECK_GT(row_stride_c, 0); \ + DCHECK_GT(col_stride_c, 0); \ + DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); \ + DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); \ + DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b)); \ + DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b)); \ + const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); \ + const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); \ + const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b); \ + const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b); + +#define CERES_GEMM_EIGEN_HEADER \ + const typename EigenTypes<kRowA, kColA>::ConstMatrixRef \ + Aref(A, num_row_a, num_col_a); \ + const typename EigenTypes<kRowB, kColB>::ConstMatrixRef \ + Bref(B, num_row_b, num_col_b); \ + MatrixRef Cref(C, row_stride_c, col_stride_c); \ + +#define CERES_CALL_GEMM(name) \ + name<kRowA, kColA, kRowB, kColB, kOperation>( \ + A, num_row_a, num_col_a, \ + B, num_row_b, num_col_b, \ + C, start_row_c, start_col_c, row_stride_c, col_stride_c); + + +// For the matrix-matrix functions below, there are three variants for +// each functionality. Foo, FooNaive and FooEigen. Foo is the one to +// be called by the user. FooNaive is a basic loop based // implementation and FooEigen uses Eigen's implementation. Foo // chooses between FooNaive and FooEigen depending on how many of the // template arguments are fixed at compile time. Currently, FooEigen -// is called if all matrix dimenions are compile time +// is called if all matrix dimensions are compile time // constants. FooNaive is called otherwise. This leads to the best // performance currently. // -// TODO(sameeragarwal): Benchmark and simplify the matrix-vector -// functions. - -// C op A * B; +// The MatrixMatrixMultiply variants compute: +// +// C op A * B; +// +// The MatrixTransposeMatrixMultiply variants compute: +// +// C op A' * B // // where op can be +=, -=, or =. // @@ -91,7 +142,7 @@ namespace internal { // kOperation = -1 -> C -= A * B // kOperation = 0 -> C = A * B // -// The function can write into matrices C which are larger than the +// The functions can write into matrices C which are larger than the // matrix A * B. This is done by specifying the true size of C via // row_stride_c and col_stride_c, and then indicating where A * B // should be written into by start_row_c and start_col_c. @@ -110,24 +161,11 @@ namespace internal { // ------------ // ------------ // -template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> -inline void MatrixMatrixMultiplyEigen(const double* A, - const int num_row_a, - const int num_col_a, - const double* B, - const int num_row_b, - const int num_col_b, - double* C, - const int start_row_c, - const int start_col_c, - const int row_stride_c, - const int col_stride_c) { - const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a); - const typename EigenTypes<kRowB, kColB>::ConstMatrixRef Bref(B, num_row_b, num_col_b); - MatrixRef Cref(C, row_stride_c, col_stride_c); - Eigen::Block<MatrixRef, kRowA, kColB> block(Cref, - start_row_c, start_col_c, - num_row_a, num_col_b); +CERES_GEMM_BEGIN(MatrixMatrixMultiplyEigen) { + CERES_GEMM_EIGEN_HEADER + Eigen::Block<MatrixRef, kRowA, kColB> + block(Cref, start_row_c, start_col_c, num_row_a, num_col_b); + if (kOperation > 0) { block CERES_MAYBE_NOALIAS += Aref * Bref; } else if (kOperation < 0) { @@ -137,36 +175,8 @@ inline void MatrixMatrixMultiplyEigen(const double* A, } } -template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> -inline void MatrixMatrixMultiplyNaive(const double* A, - const int num_row_a, - const int num_col_a, - const double* B, - const int num_row_b, - const int num_col_b, - double* C, - const int start_row_c, - const int start_col_c, - const int row_stride_c, - const int col_stride_c) { - DCHECK_GT(num_row_a, 0); - DCHECK_GT(num_col_a, 0); - DCHECK_GT(num_row_b, 0); - DCHECK_GT(num_col_b, 0); - DCHECK_GE(start_row_c, 0); - DCHECK_GE(start_col_c, 0); - DCHECK_GT(row_stride_c, 0); - DCHECK_GT(col_stride_c, 0); - - DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); - DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); - DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b)); - DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b)); - - const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); - const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); - const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b); - const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b); +CERES_GEMM_BEGIN(MatrixMatrixMultiplyNaive) { + CERES_GEMM_NAIVE_HEADER DCHECK_EQ(NUM_COL_A, NUM_ROW_B); const int NUM_ROW_C = NUM_ROW_A; @@ -193,91 +203,26 @@ inline void MatrixMatrixMultiplyNaive(const double* A, } } -template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> -inline void MatrixMatrixMultiply(const double* A, - const int num_row_a, - const int num_col_a, - const double* B, - const int num_row_b, - const int num_col_b, - double* C, - const int start_row_c, - const int start_col_c, - const int row_stride_c, - const int col_stride_c) { +CERES_GEMM_BEGIN(MatrixMatrixMultiply) { #ifdef CERES_NO_CUSTOM_BLAS - MatrixMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>( - A, num_row_a, num_col_a, - B, num_row_b, num_col_b, - C, start_row_c, start_col_c, row_stride_c, col_stride_c); + + CERES_CALL_GEMM(MatrixMatrixMultiplyEigen) return; #else if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic && kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) { - MatrixMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>( - A, num_row_a, num_col_a, - B, num_row_b, num_col_b, - C, start_row_c, start_col_c, row_stride_c, col_stride_c); + CERES_CALL_GEMM(MatrixMatrixMultiplyEigen) } else { - MatrixMatrixMultiplyNaive<kRowA, kColA, kRowB, kColB, kOperation>( - A, num_row_a, num_col_a, - B, num_row_b, num_col_b, - C, start_row_c, start_col_c, row_stride_c, col_stride_c); + CERES_CALL_GEMM(MatrixMatrixMultiplyNaive) } #endif } - -// C op A' * B; -// -// where op can be +=, -=, or =. -// -// The template parameters (kRowA, kColA, kRowB, kColB) allow -// specialization of the loop at compile time. If this information is -// not available, then Eigen::Dynamic should be used as the template -// argument. -// -// kOperation = 1 -> C += A' * B -// kOperation = -1 -> C -= A' * B -// kOperation = 0 -> C = A' * B -// -// The function can write into matrices C which are larger than the -// matrix A' * B. This is done by specifying the true size of C via -// row_stride_c and col_stride_c, and then indicating where A * B -// should be written into by start_row_c and start_col_c. -// -// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c = -// 4 and start_col_c = 5, then if A = 2x3 and B = 2x4, we get -// -// ------------ -// ------------ -// ------------ -// ------------ -// -----xxxx--- -// -----xxxx--- -// -----xxxx--- -// ------------ -// ------------ -// ------------ -// -template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> -inline void MatrixTransposeMatrixMultiplyEigen(const double* A, - const int num_row_a, - const int num_col_a, - const double* B, - const int num_row_b, - const int num_col_b, - double* C, - const int start_row_c, - const int start_col_c, - const int row_stride_c, - const int col_stride_c) { - const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a); - const typename EigenTypes<kRowB, kColB>::ConstMatrixRef Bref(B, num_row_b, num_col_b); - MatrixRef Cref(C, row_stride_c, col_stride_c); +CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyEigen) { + CERES_GEMM_EIGEN_HEADER Eigen::Block<MatrixRef, kColA, kColB> block(Cref, start_row_c, start_col_c, num_col_a, num_col_b); @@ -290,36 +235,8 @@ inline void MatrixTransposeMatrixMultiplyEigen(const double* A, } } -template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> -inline void MatrixTransposeMatrixMultiplyNaive(const double* A, - const int num_row_a, - const int num_col_a, - const double* B, - const int num_row_b, - const int num_col_b, - double* C, - const int start_row_c, - const int start_col_c, - const int row_stride_c, - const int col_stride_c) { - DCHECK_GT(num_row_a, 0); - DCHECK_GT(num_col_a, 0); - DCHECK_GT(num_row_b, 0); - DCHECK_GT(num_col_b, 0); - DCHECK_GE(start_row_c, 0); - DCHECK_GE(start_col_c, 0); - DCHECK_GT(row_stride_c, 0); - DCHECK_GT(col_stride_c, 0); - - DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); - DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); - DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b)); - DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b)); - - const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); - const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); - const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b); - const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b); +CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiplyNaive) { + CERES_GEMM_NAIVE_HEADER DCHECK_EQ(NUM_ROW_A, NUM_ROW_B); const int NUM_ROW_C = NUM_COL_A; @@ -346,43 +263,37 @@ inline void MatrixTransposeMatrixMultiplyNaive(const double* A, } } -template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> -inline void MatrixTransposeMatrixMultiply(const double* A, - const int num_row_a, - const int num_col_a, - const double* B, - const int num_row_b, - const int num_col_b, - double* C, - const int start_row_c, - const int start_col_c, - const int row_stride_c, - const int col_stride_c) { +CERES_GEMM_BEGIN(MatrixTransposeMatrixMultiply) { #ifdef CERES_NO_CUSTOM_BLAS - MatrixTransposeMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>( - A, num_row_a, num_col_a, - B, num_row_b, num_col_b, - C, start_row_c, start_col_c, row_stride_c, col_stride_c); + + CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen) return; #else if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic && kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) { - MatrixTransposeMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>( - A, num_row_a, num_col_a, - B, num_row_b, num_col_b, - C, start_row_c, start_col_c, row_stride_c, col_stride_c); + CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyEigen) } else { - MatrixTransposeMatrixMultiplyNaive<kRowA, kColA, kRowB, kColB, kOperation>( - A, num_row_a, num_col_a, - B, num_row_b, num_col_b, - C, start_row_c, start_col_c, row_stride_c, col_stride_c); + CERES_CALL_GEMM(MatrixTransposeMatrixMultiplyNaive) } #endif } +// Matrix-Vector multiplication +// +// c op A * b; +// +// where op can be +=, -=, or =. +// +// The template parameters (kRowA, kColA) allow specialization of the +// loop at compile time. If this information is not available, then +// Eigen::Dynamic should be used as the template argument. +// +// kOperation = 1 -> c += A' * b +// kOperation = -1 -> c -= A' * b +// kOperation = 0 -> c = A' * b template<int kRowA, int kColA, int kOperation> inline void MatrixVectorMultiply(const double* A, const int num_row_a, @@ -390,7 +301,8 @@ inline void MatrixVectorMultiply(const double* A, const double* b, double* c) { #ifdef CERES_NO_CUSTOM_BLAS - const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a); + const typename EigenTypes<kRowA, kColA>::ConstMatrixRef + Aref(A, num_row_a, num_col_a); const typename EigenTypes<kColA>::ConstVectorRef bref(b, num_col_a); typename EigenTypes<kRowA>::VectorRef cref(c, num_row_a); @@ -430,17 +342,9 @@ inline void MatrixVectorMultiply(const double* A, #endif // CERES_NO_CUSTOM_BLAS } -// c op A' * b; +// Similar to MatrixVectorMultiply, except that A is transposed, i.e., // -// where op can be +=, -=, or =. -// -// The template parameters (kRowA, kColA) allow specialization of the -// loop at compile time. If this information is not available, then -// Eigen::Dynamic should be used as the template argument. -// -// kOperation = 1 -> c += A' * b -// kOperation = -1 -> c -= A' * b -// kOperation = 0 -> c = A' * b +// c op A' * b; template<int kRowA, int kColA, int kOperation> inline void MatrixTransposeVectorMultiply(const double* A, const int num_row_a, @@ -448,7 +352,8 @@ inline void MatrixTransposeVectorMultiply(const double* A, const double* b, double* c) { #ifdef CERES_NO_CUSTOM_BLAS - const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a); + const typename EigenTypes<kRowA, kColA>::ConstMatrixRef + Aref(A, num_row_a, num_col_a); const typename EigenTypes<kRowA>::ConstVectorRef bref(b, num_row_a); typename EigenTypes<kColA>::VectorRef cref(c, num_col_a); @@ -488,7 +393,12 @@ inline void MatrixTransposeVectorMultiply(const double* A, #endif // CERES_NO_CUSTOM_BLAS } + #undef CERES_MAYBE_NOALIAS +#undef CERES_GEMM_BEGIN +#undef CERES_GEMM_EIGEN_HEADER +#undef CERES_GEMM_NAIVE_HEADER +#undef CERES_CALL_GEMM } // namespace internal } // namespace ceres diff --git a/extern/libmv/third_party/ceres/internal/ceres/block_sparse_matrix.cc b/extern/libmv/third_party/ceres/internal/ceres/block_sparse_matrix.cc index ab6fcef1945..ae36d60c900 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/block_sparse_matrix.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/block_sparse_matrix.cc @@ -147,7 +147,6 @@ void BlockSparseMatrix::LeftMultiply(const double* x, double* y) const { values_.get() + cells[j].position, row_block_size, col_block_size, x + row_block_pos, y + col_block_pos); - } } } diff --git a/extern/libmv/third_party/ceres/internal/ceres/iterative_schur_complement_solver.cc b/extern/libmv/third_party/ceres/internal/ceres/iterative_schur_complement_solver.cc index cf5562ac771..15e0bdcd81a 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/iterative_schur_complement_solver.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/iterative_schur_complement_solver.cc @@ -99,7 +99,6 @@ LinearSolver::Summary IterativeSchurComplementSolver::SolveImpl( preconditioner_options.type = options_.preconditioner_type; preconditioner_options.sparse_linear_algebra_library = options_.sparse_linear_algebra_library; - preconditioner_options.use_block_amd = options_.use_block_amd; preconditioner_options.num_threads = options_.num_threads; preconditioner_options.row_block_size = options_.row_block_size; preconditioner_options.e_block_size = options_.e_block_size; diff --git a/extern/libmv/third_party/ceres/internal/ceres/linear_solver.h b/extern/libmv/third_party/ceres/internal/ceres/linear_solver.h index f4bd0fb6f9f..ca10faa24b4 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/linear_solver.h +++ b/extern/libmv/third_party/ceres/internal/ceres/linear_solver.h @@ -74,7 +74,7 @@ class LinearSolver { : type(SPARSE_NORMAL_CHOLESKY), preconditioner_type(JACOBI), sparse_linear_algebra_library(SUITE_SPARSE), - use_block_amd(true), + use_postordering(false), min_num_iterations(1), max_num_iterations(1), num_threads(1), @@ -90,8 +90,8 @@ class LinearSolver { SparseLinearAlgebraLibraryType sparse_linear_algebra_library; - // See solver.h for explanation of this option. - bool use_block_amd; + // See solver.h for information about this flag. + bool use_postordering; // Number of internal iterations that the solver uses. This // parameter only makes sense for iterative solvers like CG. diff --git a/extern/libmv/third_party/ceres/internal/ceres/minimizer.h b/extern/libmv/third_party/ceres/internal/ceres/minimizer.h index 708974d63c2..040ddd96fbb 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/minimizer.h +++ b/extern/libmv/third_party/ceres/internal/ceres/minimizer.h @@ -113,7 +113,7 @@ class Minimizer { DumpFormatType lsqp_dump_format_type; string lsqp_dump_directory; int max_num_consecutive_invalid_steps; - int min_trust_region_radius; + double min_trust_region_radius; LineSearchDirectionType line_search_direction_type; LineSearchType line_search_type; NonlinearConjugateGradientType nonlinear_conjugate_gradient_type; diff --git a/extern/libmv/third_party/ceres/internal/ceres/preconditioner.h b/extern/libmv/third_party/ceres/internal/ceres/preconditioner.h index bfc8464db17..d7c88293687 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/preconditioner.h +++ b/extern/libmv/third_party/ceres/internal/ceres/preconditioner.h @@ -47,7 +47,6 @@ class Preconditioner : public LinearOperator { Options() : type(JACOBI), sparse_linear_algebra_library(SUITE_SPARSE), - use_block_amd(true), num_threads(1), row_block_size(Eigen::Dynamic), e_block_size(Eigen::Dynamic), @@ -58,9 +57,6 @@ class Preconditioner : public LinearOperator { SparseLinearAlgebraLibraryType sparse_linear_algebra_library; - // See solver.h for explanation of this option. - bool use_block_amd; - // If possible, how many threads the preconditioner can use. int num_threads; diff --git a/extern/libmv/third_party/ceres/internal/ceres/problem.cc b/extern/libmv/third_party/ceres/internal/ceres/problem.cc index 43e78835b15..b483932b2c1 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/problem.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/problem.cc @@ -206,4 +206,16 @@ int Problem::NumResiduals() const { return problem_impl_->NumResiduals(); } +int Problem::ParameterBlockSize(double* parameter_block) const { + return problem_impl_->ParameterBlockSize(parameter_block); +}; + +int Problem::ParameterBlockLocalSize(double* parameter_block) const { + return problem_impl_->ParameterBlockLocalSize(parameter_block); +}; + +void Problem::GetParameterBlocks(vector<double*>* parameter_blocks) const { + problem_impl_->GetParameterBlocks(parameter_blocks); +} + } // namespace ceres diff --git a/extern/libmv/third_party/ceres/internal/ceres/problem_impl.cc b/extern/libmv/third_party/ceres/internal/ceres/problem_impl.cc index f4615f94768..34c37857538 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/problem_impl.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/problem_impl.cc @@ -711,5 +711,25 @@ int ProblemImpl::NumResiduals() const { return program_->NumResiduals(); } +int ProblemImpl::ParameterBlockSize(double* parameter_block) const { + return FindParameterBlockOrDie(parameter_block_map_, parameter_block)->Size(); +}; + +int ProblemImpl::ParameterBlockLocalSize(double* parameter_block) const { + return FindParameterBlockOrDie(parameter_block_map_, + parameter_block)->LocalSize(); +}; + +void ProblemImpl::GetParameterBlocks(vector<double*>* parameter_blocks) const { + CHECK_NOTNULL(parameter_blocks); + parameter_blocks->resize(0); + for (ParameterMap::const_iterator it = parameter_block_map_.begin(); + it != parameter_block_map_.end(); + ++it) { + parameter_blocks->push_back(it->first); + } +} + + } // namespace internal } // namespace ceres diff --git a/extern/libmv/third_party/ceres/internal/ceres/problem_impl.h b/extern/libmv/third_party/ceres/internal/ceres/problem_impl.h index ccc315de6b6..2609389645a 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/problem_impl.h +++ b/extern/libmv/third_party/ceres/internal/ceres/problem_impl.h @@ -139,6 +139,10 @@ class ProblemImpl { int NumResidualBlocks() const; int NumResiduals() const; + int ParameterBlockSize(double* parameter_block) const; + int ParameterBlockLocalSize(double* parameter_block) const; + void GetParameterBlocks(vector<double*>* parameter_blocks) const; + const Program& program() const { return *program_; } Program* mutable_program() { return program_.get(); } diff --git a/extern/libmv/third_party/ceres/internal/ceres/residual_block.cc b/extern/libmv/third_party/ceres/internal/ceres/residual_block.cc index b91b0ed7843..649f3f714c2 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/residual_block.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/residual_block.cc @@ -35,6 +35,7 @@ #include <cstddef> #include <vector> +#include "ceres/blas.h" #include "ceres/corrector.h" #include "ceres/parameter_block.h" #include "ceres/residual_block_utils.h" @@ -44,6 +45,8 @@ #include "ceres/local_parameterization.h" #include "ceres/loss_function.h" +using Eigen::Dynamic; + namespace ceres { namespace internal { @@ -139,17 +142,15 @@ bool ResidualBlock::Evaluate(const bool apply_loss_function, // Apply local reparameterization to the jacobians. if (parameter_block->LocalParameterizationJacobian() != NULL) { - ConstMatrixRef local_to_global( + // jacobians[i] = global_jacobians[i] * global_to_local_jacobian. + MatrixMatrixMultiply<Dynamic, Dynamic, Dynamic, Dynamic, 0>( + global_jacobians[i], + num_residuals, + parameter_block->Size(), parameter_block->LocalParameterizationJacobian(), parameter_block->Size(), - parameter_block->LocalSize()); - MatrixRef global_jacobian(global_jacobians[i], - num_residuals, - parameter_block->Size()); - MatrixRef local_jacobian(jacobians[i], - num_residuals, - parameter_block->LocalSize()); - local_jacobian.noalias() = global_jacobian * local_to_global; + parameter_block->LocalSize(), + jacobians[i], 0, 0, num_residuals, parameter_block->LocalSize()); } } } diff --git a/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc b/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc index 875f4551044..8afb1215015 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc @@ -286,11 +286,7 @@ bool SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse( // Symbolic factorization is computed if we don't already have one handy. if (factor_ == NULL) { - if (options().use_block_amd) { - factor_ = ss_.BlockAnalyzeCholesky(cholmod_lhs, blocks_, blocks_); - } else { - factor_ = ss_.AnalyzeCholesky(cholmod_lhs); - } + factor_ = ss_.BlockAnalyzeCholesky(cholmod_lhs, blocks_, blocks_); } cholmod_dense* cholmod_solution = diff --git a/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator.h b/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator.h index e0c7fdafcb6..f2c247a5adb 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator.h +++ b/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator.h @@ -277,7 +277,7 @@ class SchurEliminator : public SchurEliminatorBase { const double* b, int row_block_counter, typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix* eet, - typename EigenTypes<kEBlockSize>::Vector* g, + double* g, double* buffer, BlockRandomAccessMatrix* lhs); @@ -285,7 +285,7 @@ class SchurEliminator : public SchurEliminatorBase { const BlockSparseMatrixBase* A, const double* b, int row_block_counter, - const Vector& inverse_ete_g, + const double* inverse_ete_g, double* rhs); void ChunkOuterProduct(const CompressedRowBlockStructure* bs, @@ -336,7 +336,7 @@ class SchurEliminator : public SchurEliminatorBase { // ChunkOuterProduct. Like buffer_ it is of size num_threads * // buffer_size_. Each thread accesses the chunk // - // [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_] + // [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_ -1] // scoped_array<double> chunk_outer_product_buffer_; diff --git a/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator_impl.h b/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator_impl.h index b46eab92c34..835f879caf6 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator_impl.h +++ b/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator_impl.h @@ -51,16 +51,18 @@ #include <algorithm> #include <map> -#include "Eigen/Dense" + #include "ceres/blas.h" #include "ceres/block_random_access_matrix.h" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" #include "ceres/internal/eigen.h" +#include "ceres/internal/fixed_array.h" #include "ceres/internal/scoped_ptr.h" #include "ceres/map_util.h" #include "ceres/schur_eliminator.h" #include "ceres/stl_util.h" +#include "Eigen/Dense" #include "glog/logging.h" namespace ceres { @@ -240,8 +242,9 @@ Eliminate(const BlockSparseMatrixBase* A, ete.setZero(); } - typename EigenTypes<kEBlockSize>::Vector g(e_block_size); - g.setZero(); + FixedArray<double, 8> g(e_block_size); + typename EigenTypes<kEBlockSize>::VectorRef gref(g.get(), e_block_size); + gref.setZero(); // We are going to be computing // @@ -256,7 +259,7 @@ Eliminate(const BlockSparseMatrixBase* A, // in this chunk (g) and add the outer product of the f_blocks to // Schur complement (S += F'F). ChunkDiagonalBlockAndGradient( - chunk, A, b, chunk.start, &ete, &g, buffer, lhs); + chunk, A, b, chunk.start, &ete, g.get(), buffer, lhs); // Normally one wouldn't compute the inverse explicitly, but // e_block_size will typically be a small number like 3, in @@ -273,7 +276,16 @@ Eliminate(const BlockSparseMatrixBase* A, // linear system. // // rhs = F'b - F'E(E'E)^(-1) E'b - UpdateRhs(chunk, A, b, chunk.start, inverse_ete * g, rhs); + + FixedArray<double, 8> inverse_ete_g(e_block_size); + MatrixVectorMultiply<kEBlockSize, kEBlockSize, 0>( + inverse_ete.data(), + e_block_size, + e_block_size, + g.get(), + inverse_ete_g.get()); + + UpdateRhs(chunk, A, b, chunk.start, inverse_ete_g.get(), rhs); // S -= F'E(E'E)^{-1}E'F ChunkOuterProduct(bs, inverse_ete, buffer, chunk.buffer_layout, lhs); @@ -318,7 +330,9 @@ BackSubstitute(const BlockSparseMatrixBase* A, const Cell& e_cell = row.cells.front(); DCHECK_EQ(e_block_id, e_cell.block_id); - typename EigenTypes<kRowBlockSize>::Vector sj = + FixedArray<double, 8> sj(row.block.size); + + typename EigenTypes<kRowBlockSize>::VectorRef(sj.get(), row.block.size) = typename EigenTypes<kRowBlockSize>::ConstVectorRef (b + bs->rows[chunk.start + j].block.position, row.block.size); @@ -330,16 +344,16 @@ BackSubstitute(const BlockSparseMatrixBase* A, MatrixVectorMultiply<kRowBlockSize, kFBlockSize, -1>( row_values + row.cells[c].position, row.block.size, f_block_size, z + lhs_row_layout_[r_block], - sj.data()); + sj.get()); } MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>( row_values + e_cell.position, row.block.size, e_block_size, - sj.data(), + sj.get(), y_ptr); MatrixTransposeMatrixMultiply - <kRowBlockSize, kEBlockSize,kRowBlockSize, kEBlockSize, 1>( + <kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>( row_values + e_cell.position, row.block.size, e_block_size, row_values + e_cell.position, row.block.size, e_block_size, ete.data(), 0, 0, e_block_size, e_block_size); @@ -359,25 +373,25 @@ UpdateRhs(const Chunk& chunk, const BlockSparseMatrixBase* A, const double* b, int row_block_counter, - const Vector& inverse_ete_g, + const double* inverse_ete_g, double* rhs) { const CompressedRowBlockStructure* bs = A->block_structure(); - const int e_block_size = inverse_ete_g.rows(); + const int e_block_id = bs->rows[chunk.start].cells.front().block_id; + const int e_block_size = bs->cols[e_block_id].size; + int b_pos = bs->rows[row_block_counter].block.position; for (int j = 0; j < chunk.size; ++j) { const CompressedRow& row = bs->rows[row_block_counter + j]; const double *row_values = A->RowBlockValues(row_block_counter + j); const Cell& e_cell = row.cells.front(); - const typename EigenTypes<kRowBlockSize, kEBlockSize>::ConstMatrixRef - e_block(row_values + e_cell.position, - row.block.size, - e_block_size); - - const typename EigenTypes<kRowBlockSize>::Vector - sj = + typename EigenTypes<kRowBlockSize>::Vector sj = typename EigenTypes<kRowBlockSize>::ConstVectorRef - (b + b_pos, row.block.size) - e_block * (inverse_ete_g); + (b + b_pos, row.block.size); + + MatrixVectorMultiply<kRowBlockSize, kEBlockSize, -1>( + row_values + e_cell.position, row.block.size, e_block_size, + inverse_ete_g, sj.data()); for (int c = 1; c < row.cells.size(); ++c) { const int block_id = row.cells[c].block_id; @@ -421,7 +435,7 @@ ChunkDiagonalBlockAndGradient( const double* b, int row_block_counter, typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix* ete, - typename EigenTypes<kEBlockSize>::Vector* g, + double* g, double* buffer, BlockRandomAccessMatrix* lhs) { const CompressedRowBlockStructure* bs = A->block_structure(); @@ -453,7 +467,7 @@ ChunkDiagonalBlockAndGradient( MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>( row_values + e_cell.position, row.block.size, e_block_size, b + b_pos, - g->data()); + g); // buffer = E'F. This computation is done by iterating over the @@ -550,10 +564,10 @@ NoEBlockRowsUpdate(const BlockSparseMatrixBase* A, const int block_id = row.cells[c].block_id; const int block_size = bs->cols[block_id].size; const int block = block_id - num_eliminate_blocks_; - VectorRef(rhs + lhs_row_layout_[block], block_size).noalias() - += (ConstMatrixRef(row_values + row.cells[c].position, - row.block.size, block_size).transpose() * - ConstVectorRef(b + row.block.position, row.block.size)); + MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( + row_values + row.cells[c].position, row.block.size, block_size, + b + row.block.position, + rhs + lhs_row_layout_[block]); } NoEBlockRowOuterProduct(A, row_block_counter, lhs); } @@ -588,8 +602,6 @@ NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A, DCHECK_GE(block1, 0); const int block1_size = bs->cols[row.cells[i].block_id].size; - const ConstMatrixRef b1(row_values + row.cells[i].position, - row.block.size, block1_size); int r, c, row_stride, col_stride; CellInfo* cell_info = lhs->GetCell(block1, block1, &r, &c, @@ -668,6 +680,7 @@ EBlockRowOuterProduct(const BlockSparseMatrixBase* A, &row_stride, &col_stride); if (cell_info != NULL) { // block += b1.transpose() * b2; + CeresMutexLock l(&cell_info->m); MatrixTransposeMatrixMultiply <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>( row_values + row.cells[i].position, row.block.size, block1_size, diff --git a/extern/libmv/third_party/ceres/internal/ceres/solver_impl.cc b/extern/libmv/third_party/ceres/internal/ceres/solver_impl.cc index ffc347ec9f0..43c0be6180d 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/solver_impl.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/solver_impl.cc @@ -38,8 +38,8 @@ #include "ceres/gradient_checking_cost_function.h" #include "ceres/iteration_callback.h" #include "ceres/levenberg_marquardt_strategy.h" -#include "ceres/linear_solver.h" #include "ceres/line_search_minimizer.h" +#include "ceres/linear_solver.h" #include "ceres/map_util.h" #include "ceres/minimizer.h" #include "ceres/ordered_groups.h" @@ -50,6 +50,7 @@ #include "ceres/program.h" #include "ceres/residual_block.h" #include "ceres/stringprintf.h" +#include "ceres/suitesparse.h" #include "ceres/trust_region_minimizer.h" #include "ceres/wall_time.h" @@ -505,19 +506,6 @@ void SolverImpl::TrustRegionSolve(const Solver::Options& original_options, summary->trust_region_strategy_type = options.trust_region_strategy_type; summary->dogleg_type = options.dogleg_type; - // Only Schur types require the lexicographic reordering. - if (IsSchurType(options.linear_solver_type)) { - const int num_eliminate_blocks = - options.linear_solver_ordering - ->group_to_elements().begin() - ->second.size(); - if (!LexicographicallyOrderResidualBlocks(num_eliminate_blocks, - reduced_program.get(), - &summary->error)) { - return; - } - } - scoped_ptr<Evaluator> evaluator(CreateEvaluator(options, problem_impl->parameter_map(), reduced_program.get(), @@ -953,11 +941,14 @@ bool SolverImpl::RemoveFixedBlocksFromProgram(Program* program, parameter_blocks->resize(j); } - CHECK(((program->NumResidualBlocks() == 0) && + if (!(((program->NumResidualBlocks() == 0) && (program->NumParameterBlocks() == 0)) || ((program->NumResidualBlocks() != 0) && - (program->NumParameterBlocks() != 0))) - << "Congratulations, you found a bug in Ceres. Please report it."; + (program->NumParameterBlocks() != 0)))) { + *error = "Congratulations, you found a bug in Ceres. Please report it."; + return false; + } + return true; } @@ -965,19 +956,14 @@ Program* SolverImpl::CreateReducedProgram(Solver::Options* options, ProblemImpl* problem_impl, double* fixed_cost, string* error) { - EventLogger event_logger("CreateReducedProgram"); - CHECK_NOTNULL(options->linear_solver_ordering); Program* original_program = problem_impl->mutable_program(); scoped_ptr<Program> transformed_program(new Program(*original_program)); - event_logger.AddEvent("TransformedProgram"); ParameterBlockOrdering* linear_solver_ordering = options->linear_solver_ordering; - const int min_group_id = linear_solver_ordering->group_to_elements().begin()->first; - const int original_num_groups = linear_solver_ordering->NumGroups(); if (!RemoveFixedBlocksFromProgram(transformed_program.get(), linear_solver_ordering, @@ -986,97 +972,45 @@ Program* SolverImpl::CreateReducedProgram(Solver::Options* options, return NULL; } - event_logger.AddEvent("RemoveFixedBlocks"); - if (transformed_program->NumParameterBlocks() == 0) { - if (transformed_program->NumResidualBlocks() > 0) { - *error = "Zero parameter blocks but non-zero residual blocks" - " in the reduced program. Congratulations, you found a " - "Ceres bug! Please report this error to the developers."; - return NULL; - } - LOG(WARNING) << "No varying parameter blocks to optimize; " << "bailing early."; return transformed_program.release(); } - // If the user supplied an linear_solver_ordering with just one - // group, it is equivalent to the user supplying NULL as - // ordering. Ceres is completely free to choose the parameter block - // ordering as it sees fit. For Schur type solvers, this means that - // the user wishes for Ceres to identify the e_blocks, which we do - // by computing a maximal independent set. - if (original_num_groups == 1 && IsSchurType(options->linear_solver_type)) { - vector<ParameterBlock*> schur_ordering; - const int num_eliminate_blocks = ComputeSchurOrdering(*transformed_program, - &schur_ordering); - CHECK_EQ(schur_ordering.size(), transformed_program->NumParameterBlocks()) - << "Congratulations, you found a Ceres bug! Please report this error " - << "to the developers."; - - for (int i = 0; i < schur_ordering.size(); ++i) { - linear_solver_ordering->AddElementToGroup( - schur_ordering[i]->mutable_user_state(), - (i < num_eliminate_blocks) ? 0 : 1); - } - } - event_logger.AddEvent("SchurOrdering"); - - if (!ApplyUserOrdering(problem_impl->parameter_map(), - linear_solver_ordering, - transformed_program.get(), - error)) { - return NULL; - } - event_logger.AddEvent("ApplyOrdering"); - - // If the user requested the use of a Schur type solver, and - // supplied a non-NULL linear_solver_ordering object with more than - // one elimination group, then it can happen that after all the - // parameter blocks which are fixed or unused have been removed from - // the program and the ordering, there are no more parameter blocks - // in the first elimination group. - // - // In such a case, the use of a Schur type solver is not possible, - // as they assume there is at least one e_block. Thus, we - // automatically switch to one of the other solvers, depending on - // the user's indicated preferences. if (IsSchurType(options->linear_solver_type) && - original_num_groups > 1 && linear_solver_ordering->GroupSize(min_group_id) == 0) { - string msg = "No e_blocks remaining. Switching from "; - if (options->linear_solver_type == SPARSE_SCHUR) { - options->linear_solver_type = SPARSE_NORMAL_CHOLESKY; - msg += "SPARSE_SCHUR to SPARSE_NORMAL_CHOLESKY."; - } else if (options->linear_solver_type == DENSE_SCHUR) { - // TODO(sameeragarwal): This is probably not a great choice. - // Ideally, we should have a DENSE_NORMAL_CHOLESKY, that can - // take a BlockSparseMatrix as input. - options->linear_solver_type = DENSE_QR; - msg += "DENSE_SCHUR to DENSE_QR."; - } else if (options->linear_solver_type == ITERATIVE_SCHUR) { - msg += StringPrintf("ITERATIVE_SCHUR with %s preconditioner " - "to CGNR with JACOBI preconditioner.", - PreconditionerTypeToString( - options->preconditioner_type)); - options->linear_solver_type = CGNR; - if (options->preconditioner_type != IDENTITY) { - // CGNR currently only supports the JACOBI preconditioner. - options->preconditioner_type = JACOBI; - } + // If the user requested the use of a Schur type solver, and + // supplied a non-NULL linear_solver_ordering object with more than + // one elimination group, then it can happen that after all the + // parameter blocks which are fixed or unused have been removed from + // the program and the ordering, there are no more parameter blocks + // in the first elimination group. + // + // In such a case, the use of a Schur type solver is not possible, + // as they assume there is at least one e_block. Thus, we + // automatically switch to the closest solver to the one indicated + // by the user. + AlternateLinearSolverForSchurTypeLinearSolver(options); + } + + if (IsSchurType(options->linear_solver_type)) { + if (!ReorderProgramForSchurTypeLinearSolver(problem_impl->parameter_map(), + linear_solver_ordering, + transformed_program.get(), + error)) { + return NULL; } - - LOG(WARNING) << msg; + return transformed_program.release(); } - event_logger.AddEvent("AlternateSolver"); + if (options->linear_solver_type == SPARSE_NORMAL_CHOLESKY && + options->sparse_linear_algebra_library == SUITE_SPARSE) { + ReorderProgramForSparseNormalCholesky(transformed_program.get()); + return transformed_program.release(); + } - // Since the transformed program is the "active" program, and it is - // mutated, update the parameter offsets and indices. transformed_program->SetParameterOffsetsAndIndex(); - - event_logger.AddEvent("SetOffsets"); return transformed_program.release(); } @@ -1158,11 +1092,10 @@ LinearSolver* SolverImpl::CreateLinearSolver(Solver::Options* options, linear_solver_options.preconditioner_type = options->preconditioner_type; linear_solver_options.sparse_linear_algebra_library = options->sparse_linear_algebra_library; - + linear_solver_options.use_postordering = options->use_postordering; linear_solver_options.num_threads = options->num_linear_solver_threads; options->num_linear_solver_threads = linear_solver_options.num_threads; - linear_solver_options.use_block_amd = options->use_block_amd; const map<int, set<double*> >& groups = options->linear_solver_ordering->group_to_elements(); for (map<int, set<double*> >::const_iterator it = groups.begin(); @@ -1398,5 +1331,172 @@ CoordinateDescentMinimizer* SolverImpl::CreateInnerIterationMinimizer( return inner_iteration_minimizer.release(); } +void SolverImpl::AlternateLinearSolverForSchurTypeLinearSolver( + Solver::Options* options) { + if (!IsSchurType(options->linear_solver_type)) { + return; + } + + string msg = "No e_blocks remaining. Switching from "; + if (options->linear_solver_type == SPARSE_SCHUR) { + options->linear_solver_type = SPARSE_NORMAL_CHOLESKY; + msg += "SPARSE_SCHUR to SPARSE_NORMAL_CHOLESKY."; + } else if (options->linear_solver_type == DENSE_SCHUR) { + // TODO(sameeragarwal): This is probably not a great choice. + // Ideally, we should have a DENSE_NORMAL_CHOLESKY, that can + // take a BlockSparseMatrix as input. + options->linear_solver_type = DENSE_QR; + msg += "DENSE_SCHUR to DENSE_QR."; + } else if (options->linear_solver_type == ITERATIVE_SCHUR) { + options->linear_solver_type = CGNR; + if (options->preconditioner_type != IDENTITY) { + msg += StringPrintf("ITERATIVE_SCHUR with %s preconditioner " + "to CGNR with JACOBI preconditioner.", + PreconditionerTypeToString( + options->preconditioner_type)); + // CGNR currently only supports the JACOBI preconditioner. + options->preconditioner_type = JACOBI; + } else { + msg += StringPrintf("ITERATIVE_SCHUR with IDENTITY preconditioner " + "to CGNR with IDENTITY preconditioner."); + } + } + LOG(WARNING) << msg; +} + +bool SolverImpl::ReorderProgramForSchurTypeLinearSolver( + const ProblemImpl::ParameterMap& parameter_map, + ParameterBlockOrdering* ordering, + Program* program, + string* error) { + // At this point one of two things is true. + // + // 1. The user did not specify an ordering - ordering has one + // group containined all the parameter blocks. + + // 2. The user specified an ordering, and the first group has + // non-zero elements. + // + // We handle these two cases in turn. + if (ordering->NumGroups() == 1) { + // If the user supplied an ordering with just one + // group, it is equivalent to the user supplying NULL as an + // ordering. Ceres is completely free to choose the parameter + // block ordering as it sees fit. For Schur type solvers, this + // means that the user wishes for Ceres to identify the e_blocks, + // which we do by computing a maximal independent set. + vector<ParameterBlock*> schur_ordering; + const int num_eliminate_blocks = ComputeSchurOrdering(*program, + &schur_ordering); + + CHECK_EQ(schur_ordering.size(), program->NumParameterBlocks()) + << "Congratulations, you found a Ceres bug! Please report this error " + << "to the developers."; + + // Update the ordering object. + for (int i = 0; i < schur_ordering.size(); ++i) { + double* parameter_block = schur_ordering[i]->mutable_user_state(); + const int group_id = (i < num_eliminate_blocks) ? 0 : 1; + ordering->AddElementToGroup(parameter_block, group_id); + } + + // Apply the parameter block re-ordering. Technically we could + // call ApplyUserOrdering, but this is cheaper and simpler. + swap(*program->mutable_parameter_blocks(), schur_ordering); + } else { + // The user supplied an ordering. + if (!ApplyUserOrdering(parameter_map, ordering, program, error)) { + return false; + } + } + + program->SetParameterOffsetsAndIndex(); + + const int num_eliminate_blocks = + ordering->group_to_elements().begin()->second.size(); + + // Schur type solvers also require that their residual blocks be + // lexicographically ordered. + return LexicographicallyOrderResidualBlocks(num_eliminate_blocks, + program, + error); +} + +TripletSparseMatrix* SolverImpl::CreateJacobianBlockSparsityTranspose( + const Program* program) { + + // Matrix to store the block sparsity structure of + TripletSparseMatrix* tsm = + new TripletSparseMatrix(program->NumParameterBlocks(), + program->NumResidualBlocks(), + 10 * program->NumResidualBlocks()); + int num_nonzeros = 0; + int* rows = tsm->mutable_rows(); + int* cols = tsm->mutable_cols(); + double* values = tsm->mutable_values(); + + const vector<ResidualBlock*>& residual_blocks = program->residual_blocks(); + for (int c = 0; c < residual_blocks.size(); ++c) { + const ResidualBlock* residual_block = residual_blocks[c]; + const int num_parameter_blocks = residual_block->NumParameterBlocks(); + ParameterBlock* const* parameter_blocks = + residual_block->parameter_blocks(); + + for (int j = 0; j < num_parameter_blocks; ++j) { + if (parameter_blocks[j]->IsConstant()) { + continue; + } + + // Re-size the matrix if needed. + if (num_nonzeros >= tsm->max_num_nonzeros()) { + tsm->Reserve(2 * num_nonzeros); + rows = tsm->mutable_rows(); + cols = tsm->mutable_cols(); + values = tsm->mutable_values(); + } + CHECK_LT(num_nonzeros, tsm->max_num_nonzeros()); + + const int r = parameter_blocks[j]->index(); + rows[num_nonzeros] = r; + cols[num_nonzeros] = c; + values[num_nonzeros] = 1.0; + ++num_nonzeros; + } + } + + tsm->set_num_nonzeros(num_nonzeros); + return tsm; +} + +void SolverImpl::ReorderProgramForSparseNormalCholesky(Program* program) { +#ifndef CERES_NO_SUITESPARSE + // Set the offsets and index for CreateJacobianSparsityTranspose. + program->SetParameterOffsetsAndIndex(); + + // Compute a block sparse presentation of J'. + scoped_ptr<TripletSparseMatrix> tsm_block_jacobian_transpose( + SolverImpl::CreateJacobianBlockSparsityTranspose(program)); + + // Order rows using AMD. + SuiteSparse ss; + cholmod_sparse* block_jacobian_transpose = + ss.CreateSparseMatrix(tsm_block_jacobian_transpose.get()); + + vector<int> ordering(program->NumParameterBlocks(), -1); + ss.ApproximateMinimumDegreeOrdering(block_jacobian_transpose, &ordering[0]); + ss.Free(block_jacobian_transpose); + + // Apply ordering. + vector<ParameterBlock*>& parameter_blocks = + *(program->mutable_parameter_blocks()); + const vector<ParameterBlock*> parameter_blocks_copy(parameter_blocks); + for (int i = 0; i < program->NumParameterBlocks(); ++i) { + parameter_blocks[i] = parameter_blocks_copy[ordering[i]]; + } +#endif + + program->SetParameterOffsetsAndIndex(); +} + } // namespace internal } // namespace ceres diff --git a/extern/libmv/third_party/ceres/internal/ceres/solver_impl.h b/extern/libmv/third_party/ceres/internal/ceres/solver_impl.h index c05483021a4..22ca6229b81 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/solver_impl.h +++ b/extern/libmv/third_party/ceres/internal/ceres/solver_impl.h @@ -46,6 +46,7 @@ class CoordinateDescentMinimizer; class Evaluator; class LinearSolver; class Program; +class TripletSparseMatrix; class SolverImpl { public: @@ -151,6 +152,47 @@ class SolverImpl { const Program& program, const ProblemImpl::ParameterMap& parameter_map, Solver::Summary* summary); + + // If the linear solver is of Schur type, then replace it with the + // closest equivalent linear solver. This is done when the user + // requested a Schur type solver but the problem structure makes it + // impossible to use one. + // + // If the linear solver is not of Schur type, the function is a + // no-op. + static void AlternateLinearSolverForSchurTypeLinearSolver( + Solver::Options* options); + + // Schur type solvers require that all parameter blocks eliminated + // by the Schur eliminator occur before others and the residuals be + // sorted in lexicographic order of their parameter blocks. + // + // If ordering has atleast two groups, then apply the ordering, + // otherwise compute a new ordering using a Maximal Independent Set + // algorithm and apply it. + // + // Upon return, ordering contains the parameter block ordering that + // was used to order the program. + static bool ReorderProgramForSchurTypeLinearSolver( + const ProblemImpl::ParameterMap& parameter_map, + ParameterBlockOrdering* ordering, + Program* program, + string* error); + + // CHOLMOD when doing the sparse cholesky factorization of the + // Jacobian matrix, reorders its columns to reduce the + // fill-in. Compute this permutation and re-order the parameter + // blocks. + // + static void ReorderProgramForSparseNormalCholesky(Program* program); + + // Create a TripletSparseMatrix which contains the zero-one + // structure corresponding to the block sparsity of the transpose of + // the Jacobian matrix. + // + // Caller owns the result. + static TripletSparseMatrix* CreateJacobianBlockSparsityTranspose( + const Program* program); }; } // namespace internal diff --git a/extern/libmv/third_party/ceres/internal/ceres/sparse_normal_cholesky_solver.cc b/extern/libmv/third_party/ceres/internal/ceres/sparse_normal_cholesky_solver.cc index aacba86e64a..bc1f98334ae 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/sparse_normal_cholesky_solver.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/sparse_normal_cholesky_solver.cc @@ -199,24 +199,23 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImplUsingSuiteSparse( VectorRef(x, num_cols).setZero(); - scoped_ptr<cholmod_sparse> lhs(ss_.CreateSparseMatrixTransposeView(A)); - CHECK_NOTNULL(lhs.get()); - + cholmod_sparse lhs = ss_.CreateSparseMatrixTransposeView(A); cholmod_dense* rhs = ss_.CreateDenseVector(Atb.data(), num_cols, num_cols); event_logger.AddEvent("Setup"); if (factor_ == NULL) { - if (options_.use_block_amd) { - factor_ = ss_.BlockAnalyzeCholesky(lhs.get(), + if (options_.use_postordering) { + factor_ = ss_.BlockAnalyzeCholesky(&lhs, A->col_blocks(), A->row_blocks()); } else { - factor_ = ss_.AnalyzeCholesky(lhs.get()); + factor_ = ss_.AnalyzeCholeskyWithNaturalOrdering(&lhs); } } + event_logger.AddEvent("Analysis"); - cholmod_dense* sol = ss_.SolveCholesky(lhs.get(), factor_, rhs); + cholmod_dense* sol = ss_.SolveCholesky(&lhs, factor_, rhs); event_logger.AddEvent("Solve"); ss_.Free(rhs); diff --git a/extern/libmv/third_party/ceres/internal/ceres/suitesparse.cc b/extern/libmv/third_party/ceres/internal/ceres/suitesparse.cc index 87fae75287e..5138b522d09 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/suitesparse.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/suitesparse.cc @@ -87,23 +87,23 @@ cholmod_sparse* SuiteSparse::CreateSparseMatrixTranspose( return cholmod_triplet_to_sparse(&triplet, triplet.nnz, &cc_); } -cholmod_sparse* SuiteSparse::CreateSparseMatrixTransposeView( +cholmod_sparse SuiteSparse::CreateSparseMatrixTransposeView( CompressedRowSparseMatrix* A) { - cholmod_sparse* m = new cholmod_sparse_struct; - m->nrow = A->num_cols(); - m->ncol = A->num_rows(); - m->nzmax = A->num_nonzeros(); - - m->p = reinterpret_cast<void*>(A->mutable_rows()); - m->i = reinterpret_cast<void*>(A->mutable_cols()); - m->x = reinterpret_cast<void*>(A->mutable_values()); - - m->stype = 0; // Matrix is not symmetric. - m->itype = CHOLMOD_INT; - m->xtype = CHOLMOD_REAL; - m->dtype = CHOLMOD_DOUBLE; - m->sorted = 1; - m->packed = 1; + cholmod_sparse m; + m.nrow = A->num_cols(); + m.ncol = A->num_rows(); + m.nzmax = A->num_nonzeros(); + m.nz = NULL; + m.p = reinterpret_cast<void*>(A->mutable_rows()); + m.i = reinterpret_cast<void*>(A->mutable_cols()); + m.x = reinterpret_cast<void*>(A->mutable_values()); + m.z = NULL; + m.stype = 0; // Matrix is not symmetric. + m.itype = CHOLMOD_INT; + m.xtype = CHOLMOD_REAL; + m.dtype = CHOLMOD_DOUBLE; + m.sorted = 1; + m.packed = 1; return m; } @@ -172,6 +172,23 @@ cholmod_factor* SuiteSparse::AnalyzeCholeskyWithUserOrdering( return factor; } +cholmod_factor* SuiteSparse::AnalyzeCholeskyWithNaturalOrdering(cholmod_sparse* A) { + cc_.nmethods = 1; + cc_.method[0].ordering = CHOLMOD_NATURAL; + cc_.postorder = 0; + + cholmod_factor* factor = cholmod_analyze(A, &cc_); + CHECK_EQ(cc_.status, CHOLMOD_OK) + << "Cholmod symbolic analysis failed " << cc_.status; + CHECK_NOTNULL(factor); + + if (VLOG_IS_ON(2)) { + cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_); + } + + return factor; +} + bool SuiteSparse::BlockAMDOrdering(const cholmod_sparse* A, const vector<int>& row_blocks, const vector<int>& col_blocks, @@ -380,6 +397,11 @@ cholmod_dense* SuiteSparse::SolveCholesky(cholmod_sparse* A, return NULL; } +void SuiteSparse::ApproximateMinimumDegreeOrdering(cholmod_sparse* matrix, + int* ordering) { + cholmod_amd(matrix, NULL, 0, ordering, &cc_); +} + } // namespace internal } // namespace ceres diff --git a/extern/libmv/third_party/ceres/internal/ceres/suitesparse.h b/extern/libmv/third_party/ceres/internal/ceres/suitesparse.h index 5a8ef63bd2b..a1a4f355d76 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/suitesparse.h +++ b/extern/libmv/third_party/ceres/internal/ceres/suitesparse.h @@ -70,10 +70,8 @@ class SuiteSparse { // Create a cholmod_sparse wrapper around the contents of A. This is // a shallow object, which refers to the contents of A and does not - // use the SuiteSparse machinery to allocate memory, this object - // should be disposed off with a delete and not a call to Free as is - // the case for objects returned by CreateSparseMatrixTranspose. - cholmod_sparse* CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A); + // use the SuiteSparse machinery to allocate memory. + cholmod_sparse CreateSparseMatrixTransposeView(CompressedRowSparseMatrix* A); // Given a vector x, build a cholmod_dense vector of size out_size // with the first in_size entries copied from x. If x is NULL, then @@ -135,6 +133,12 @@ class SuiteSparse { cholmod_factor* AnalyzeCholeskyWithUserOrdering(cholmod_sparse* A, const vector<int>& ordering); + // Perform a symbolic factorization of A without re-ordering A. No + // postordering of the elimination tree is performed. This ensures + // that the symbolic factor does not introduce an extra permutation + // on the matrix. See the documentation for CHOLMOD for more details. + cholmod_factor* AnalyzeCholeskyWithNaturalOrdering(cholmod_sparse* A); + // Use the symbolic factorization in L, to find the numerical // factorization for the matrix A or AA^T. Return true if // successful, false otherwise. L contains the numeric factorization @@ -203,6 +207,11 @@ class SuiteSparse { vector<int>* block_rows, vector<int>* block_cols); + // Find a fill reducing approximate minimum degree + // ordering. ordering is expected to be large enough to hold the + // ordering. + void ApproximateMinimumDegreeOrdering(cholmod_sparse* matrix, int* ordering); + void Free(cholmod_sparse* m) { cholmod_free_sparse(&m, &cc_); } void Free(cholmod_dense* m) { cholmod_free_dense(&m, &cc_); } void Free(cholmod_factor* m) { cholmod_free_factor(&m, &cc_); } diff --git a/extern/libmv/third_party/ceres/internal/ceres/visibility_based_preconditioner.cc b/extern/libmv/third_party/ceres/internal/ceres/visibility_based_preconditioner.cc index f5c46bfcc68..4b1e26af6d2 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/visibility_based_preconditioner.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/visibility_based_preconditioner.cc @@ -421,11 +421,7 @@ bool VisibilityBasedPreconditioner::Factorize() { // Symbolic factorization is computed if we don't already have one handy. if (factor_ == NULL) { - if (options_.use_block_amd) { - factor_ = ss_.BlockAnalyzeCholesky(lhs, block_size_, block_size_); - } else { - factor_ = ss_.AnalyzeCholesky(lhs); - } + factor_ = ss_.BlockAnalyzeCholesky(lhs, block_size_, block_size_); } bool status = ss_.Cholesky(lhs, factor_); |