diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2013-04-05 13:22:54 +0400 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2013-04-05 13:22:54 +0400 |
commit | 43b61fb8bd6743072f53d7006d8bebe9ff06caf3 (patch) | |
tree | 54cfc05064194bee46eccf974d9cc22324d366eb /extern | |
parent | a00b72bc7503fd1aecdae41a7cb9338f2eb9b35b (diff) |
Bundle current master of ceres-solver
Thins brings up some speed improvements:
SPARSE_SCHUR is approx 1.3-1.5x times faster
ITERATIVE_SCHUR is approx 1.2x times faster
For blender this means camera solution go a bit
faster now. Would not have affect on tracking
speed.
Diffstat (limited to 'extern')
14 files changed, 911 insertions, 403 deletions
diff --git a/extern/libmv/third_party/ceres/CMakeLists.txt b/extern/libmv/third_party/ceres/CMakeLists.txt index b71fe1d5cdb..e515280826b 100644 --- a/extern/libmv/third_party/ceres/CMakeLists.txt +++ b/extern/libmv/third_party/ceres/CMakeLists.txt @@ -141,6 +141,7 @@ set(SRC include/ceres/solver.h include/ceres/types.h internal/ceres/array_utils.h + internal/ceres/blas.h internal/ceres/block_evaluate_preparer.h internal/ceres/block_jacobian_writer.h internal/ceres/block_jacobi_preconditioner.h diff --git a/extern/libmv/third_party/ceres/ChangeLog b/extern/libmv/third_party/ceres/ChangeLog index 458a045d8be..46c7b6ac291 100644 --- a/extern/libmv/third_party/ceres/ChangeLog +++ b/extern/libmv/third_party/ceres/ChangeLog @@ -1,3 +1,185 @@ +commit 520d35ef22dbcb05e344451c03ae64304e524a06 +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Thu Apr 4 08:16:02 2013 -0700 + + Further BLAS improvements. + + 1. Switch to Eigen's implementation when all dimensions are fixed. + 2. Use lazyProduct for eigen matrix-vector product. This brings + eigen's performance on iterative_schur closer to what it used + to be before the last commit. There is however still an + improvement to be had by using the naive implementation when + the matrix and vector have dynamic dimensions. + + BENCHMARK + HEAD CHANGE + + problem-16-22106-pre.txt + gcc-eigen sparse_schur 0.859 gcc-eigen sparse_schur 0.853 + clang-eigen sparse_schur 0.848 clang-eigen sparse_schur 0.850 + gcc-blas sparse_schur 0.956 gcc-blas sparse_schur 0.865 + clang-blas sparse_schur 0.954 clang-blas sparse_schur 0.858 + gcc-eigen iterative_schur 4.656 gcc-eigen iterative_schur 3.271 + clang-eigen iterative_schur 4.664 clang-eigen iterative_schur 3.307 + gcc-blas iterative_schur 2.598 gcc-blas iterative_schur 2.620 + clang-blas iterative_schur 2.554 clang-blas iterative_schur 2.567 + + problem-49-7776-pre.txt + gcc-eigen sparse_schur 0.477 gcc-eigen sparse_schur 0.472 + clang-eigen sparse_schur 0.475 clang-eigen sparse_schur 0.479 + gcc-blas sparse_schur 0.521 gcc-blas sparse_schur 0.469 + clang-blas sparse_schur 0.508 clang-blas sparse_schur 0.471 + gcc-eigen iterative_schur 3.172 gcc-eigen iterative_schur 2.088 + clang-eigen iterative_schur 3.161 clang-eigen iterative_schur 2.079 + gcc-blas iterative_schur 1.701 gcc-blas iterative_schur 1.720 + clang-blas iterative_schur 1.708 clang-blas iterative_schur 1.694 + + problem-245-198739-pre.txt + gcc-eigen sparse_schur 28.092 gcc-eigen sparse_schur 28.233 + clang-eigen sparse_schur 28.148 clang-eigen sparse_schur 28.400 + gcc-blas sparse_schur 30.919 gcc-blas sparse_schur 28.110 + clang-blas sparse_schur 31.001 clang-blas sparse_schur 28.407 + gcc-eigen iterative_schur 63.095 gcc-eigen iterative_schur 43.694 + clang-eigen iterative_schur 63.412 clang-eigen iterative_schur 43.473 + gcc-blas iterative_schur 33.353 gcc-blas iterative_schur 33.321 + clang-blas iterative_schur 33.276 clang-blas iterative_schur 33.278 + + problem-257-65132-pre.txt + gcc-eigen sparse_schur 3.687 gcc-eigen sparse_schur 3.629 + clang-eigen sparse_schur 3.669 clang-eigen sparse_schur 3.652 + gcc-blas sparse_schur 3.947 gcc-blas sparse_schur 3.673 + clang-blas sparse_schur 3.952 clang-blas sparse_schur 3.678 + gcc-eigen iterative_schur 121.512 gcc-eigen iterative_schur 76.833 + clang-eigen iterative_schur 123.547 clang-eigen iterative_schur 78.763 + gcc-blas iterative_schur 68.334 gcc-blas iterative_schur 68.612 + clang-blas iterative_schur 67.793 clang-blas iterative_schur 68.266 + + Notes: + + 1. Naive BLAS was a bit worse than eigen on fixed sized matrices. We did not see this + before because of the different inlining thresholds. Fixing this boosted eigen's + performance. Also the disparity between gcc and clang has gone away. + + 2. SPARSE_SCHUR performance remains the same, since it is only testing static sized + matrices. + + 3. ITERATIVE_SCHUR performance goes up substantially due to the lazyProduct change, + but even there, since most of the products are dynamic sized, the naive implementation + wins handily. + + Change-Id: Idc17f35b9c68aaebb1b2e131adf3af8374a85a4c + +commit 25ac54807eedf16fd6c34efc390901ee549a7d14 +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Wed Apr 3 18:51:27 2013 -0700 + + Speed up Jets. + + Change-Id: I101bac1b1a1cf72ca49ffcf843b73c0ef5a6dfcb + +commit 3d6eceb45cf27024865908f0c10a5c2b0f8719cf +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Tue Apr 2 21:45:48 2013 -0700 + + Replace more instances of Eigen GEMV with Ceres BLAS. + + With this ITERATIVE_SCHUR with JACOBI preconditioner went down from + 280 seconds to 150 seconds on problem-744-543562-pre.txt. + + Change-Id: I4f319c1108421e8d59f58654a4c0576ad65df609 + +commit 296fa9b1279ee1900c8ae32d70e97cd10fc0b46b +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Tue Apr 2 09:44:15 2013 -0700 + + Replace Eigen block operations with small GEMM and GEMV loops. + + 1. Add Matrix-Matrix and Matrix-Vector multiply functions. + 2. Replace Eigen usage in SchurEliminator with these custom + matrix operations. + 3. Save on some memory allocations in ChunkOuterProduct. + 4. Replace LDLT with LLT. + + As a result on problem-16-22106-pre.txt, the linear solver time + goes down from 1.2s to 0.64s. + + Change-Id: I2daa667960e0a1e8834489965a30be31f37fd87f + +commit 222ca20e8facf706582fe696b7f41247391eac12 +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Mon Apr 1 09:11:07 2013 -0700 + + SuiteSparse cleanup. + + 1. Silence CHOLMOD's indefiniteness warnings. + 2. Add a comment about how the error handling in suitesparse.cc + needs to be improved. + 3. Move the analysis logging into suitesparse.cc and out of the + three callsites. + + Change-Id: Idd396b8ea4bf59fc1ffc7f9fcbbc7b38ed71643c + +commit b7ba93459b7f584eedb1a9f42f3d06bccabd33dc +Author: Petter Strandmark <petter.strandmark@gmail.com> +Date: Tue Feb 19 12:52:58 2013 +0100 + + Enable larger tuple sizes for Visual Studio 2012. + + Visual Studio 2012 does not have variadic templates and implements + tuples differently. By default, only sizes up to 5 are supported, + which conflicts with Gtest. + + Change-Id: Ieb8d59e4329863cbfa2729d8a76db0714c08d259 + +commit 564a83fcc690ac8383bf52a782c45757ae7fa2ad +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Tue Mar 26 11:11:43 2013 -0700 + + Lint cleanup from William Rucklidge. + + Change-Id: I8d4a0aa3e264775d20e99a6b5265f3023de92560 + +commit cbe64827becbbaab5b435a71bf00353b4ddd026b +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Mon Mar 25 17:39:53 2013 -0700 + + Update documentation + + Change-Id: Iea3c4b5409e593b1fb070a491ba8a479db32ca58 + +commit 802639c89603c9541e624766349d1989a1f641c0 +Author: Pablo Speciale <pablo.speciale@gmail.com> +Date: Mon Mar 25 20:53:45 2013 -0700 + + ceresproblem label was incorrect + + Change-Id: I3e210375adba4fa50ff3c25398b20a65d241cb35 + +commit 6bcb8d9c304a3b218f8788018dfdfe368bb7d60c +Author: Pablo Speciale <pablo.speciale@gmail.com> +Date: Mon Mar 25 16:40:26 2013 -0700 + + Compiling against static or shared library + + Change-Id: I3fb35e9a49f90b8894f59dde49c90a7c2dd74b0a + +commit 619cfe692020c078275b68eac2167232fafdfffb +Author: Sameer Agarwal <sameeragarwal@google.com> +Date: Mon Mar 25 14:03:41 2013 -0700 + + Update version history + + Change-Id: I1d036efad1507efd19d8581f147b38170b1f0543 + +commit 6ae34757850a5fa8213e0d1a540d9d75d6840a08 +Author: Pablo Speciale <pablo.speciale@gmail.com> +Date: Sun Mar 24 22:30:52 2013 -0700 + + Added documentation regarding the use of Ceres with cmake (once installed) + Commets about the flag ``BUILD_DOCUMENTATION=ON`` + + Change-Id: I8814927e60af190c8043bfc36e77fe76bfe6f562 + commit f46de9e697eb5b8756084615e29ded48600a4d39 Author: Sergey Sharybin <sergey.vfx@gmail.com> Date: Thu Mar 21 15:31:35 2013 +0600 @@ -437,171 +619,3 @@ Date: Sun Feb 24 14:15:45 2013 -0800 Minor release script fixes. Change-Id: Ifd0a7f4f584c85d4d9574eca46094b372a8d7aff - -commit b53c9667f508c125b8aa833e7a063fa44ef8a98e -Author: Sergey Sharybin <sergey.vfx@gmail.com> -Date: Mon Feb 25 01:14:26 2013 +0600 - - Solve No Previous Prototype GCC warning - - In some cases there were missing includes of own - header files from implementation files. - - In other cases moved function which are only used - within single file into an anonymous namespace. - - Change-Id: I2c6b411bcfbc521e2a5f21265dc8e009a548b1c8 - -commit 267ccc45a3e875bf87832a8ad615be690b4926d3 -Author: Sergey Sharybin <sergey.vfx@gmail.com> -Date: Mon Feb 25 01:04:16 2013 +0600 - - Fix for MinGW build on Windows - - GG_LONGLONG and GG_ULONGLONG shall use LL and ULL suffixes, - since MinGW is actuall a GCC compiler. - - Solved by checking whether compilation happens with MinGW - or not using standard MinGW's __MINGW32__ and __MINGW64__ - definitions. - - Change-Id: I789b34f6342a56ba42f4b280f7242700022ab7a1 - -commit 509f68cfe3fd13b794c4e67ff38c761407c858cf -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Wed Feb 20 01:39:03 2013 -0800 - - Problem::Evaluate implementation. - - 1. Add Problem::Evaluate and tests. - 2. Remove Solver::Summary::initial/final_* - 3. Remove Solver::Options::return_* members. - 4. Various cpplint cleanups. - - Change-Id: I4266de53489896f72d9c6798c5efde6748d68a47 - -commit d4a0bf86d688d1b68e00ff302858de5a4e0d9727 -Author: Keir Mierle <mierle@gmail.com> -Date: Sun Feb 24 10:35:44 2013 -0800 - - Fix threading build on Windows. - - On Windows, including the "windows.h" header defines an enormous number of - symbols; some of which are macros with common names. In particular, "ERROR" and - "min" and "max" get defined. This causes clashes when user code references - these names in a context other than the intended use in windows.h. - - To deal with this, the Microsoft engineers added the ability to control the - definition of these symbols by adding extra defines. In particular, including - windows.h in the following way - - #define NOGDI - #define NOMINMAX - - will reduce the number of macros defined. This way they will not conflict with - other uses in Ceres. For example, numeric_limits<double>::max() is impossible - to call without defining NOMINMAX. - - Change-Id: I166f5d3bb6dc0e2e4b2ebf800fb19e49206f7874 - -commit beb4505311011130a7e54632137b0fbb5824cc9b -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Fri Feb 22 13:37:01 2013 -0800 - - Minor fixes - - Based on William Rucklidge's review, including - a nasty bug in parameter block removal. - - Change-Id: I3a692e589f600ff560ecae9fa85bb0b76063d403 - -commit 9a88bd7c4b40e2a1e0cd9b0dc09a3517c467e04e -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Tue Feb 19 13:09:12 2013 -0800 - - Minor bug fixes - - Change-Id: I94e4521adf76a6c77db954c4a8955168e9d37b55 - -commit 956ed7e8f2054623615e6ae3601055d613897f26 -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Tue Feb 19 07:06:15 2013 -0800 - - Various minor fixes. - - 1. Unused variable warnings and fixes. - 2. Minor documentation update. - - Change-Id: I815588a5806df1030a7c8750f4fb594c503f8998 - -commit 3e2c4ef9ad35e94198f4f3367b99fd91e26996a1 -Author: Keir Mierle <mierle@gmail.com> -Date: Sun Feb 17 12:37:55 2013 -0800 - - Add adapters for column/row-major matrices to rotation.h - - This patch introduces a matrix wrapper (MatrixAdapter) that allows to - transparently pass pointers to row-major or column-major matrices - to the conversion functions. - - Change-Id: I7f1683a8722088cffcc542f593ce7eb46fca109b - -commit 04938efe4bedec112083c5ceb227ba004f96bd01 -Author: Keir Mierle <mierle@gmail.com> -Date: Sun Feb 17 12:37:55 2013 -0800 - - Add support for removing parameter and residual blocks. - - This adds support for removing parameter and residual blocks. - There are two modes of operation: in the first, removals of - paremeter blocks are expensive, since each remove requires - scanning all residual blocks to find ones that depend on the - removed parameter. In the other, extra memory is sacrificed to - maintain a list of the residuals a parameter block depends on, - removing the need to scan. In both cases, removing residual blocks - is fast. - - As a caveat, any removals destroys the ordering of the parameters, - so the residuals or jacobian returned from Solver::Solve() is - meaningless. There is some debate on the best way to handle this; - the details remain for a future change. - - This also adds some overhead, even in the case that fast removals - are not requested: - - - 1 int32 to each residual, to track its position in the program. - - 1 pointer to each parameter, to store the dependent residuals. - - Change-Id: I71dcac8656679329a15ee7fc12c0df07030c12af - -commit fa21df8cd969bb257b87c9ef7c0147d8d5ea8725 -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Mon Feb 18 08:48:52 2013 -0800 - - Add script for building documentation. - - Update make_release - - Minor documentation fixes. - - Change-Id: I1248ec3f58be66b5929aee6f2aa392c15d53ed83 - -commit 290b975d1d4eba44205bbeb0fa6b3ce8a6fa4a0c -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Sun Feb 17 16:50:37 2013 -0800 - - Preconditioner refactoring. - - 1. Added a Preconditioner interface. - 2. SCHUR_JACOBI is now its own class and is independent of - SuiteSparse. - - Change-Id: Id912ab19cf3736e61d1b90ddaf5bfba33e877ec4 - -commit d010de543530001fa917501a13ba02879c8ea52f -Author: Sameer Agarwal <sameeragarwal@google.com> -Date: Fri Feb 15 14:26:56 2013 -0800 - - Solver::Summary::FullReport() supports line search now. - - Change-Id: Ib08d300198b85d9732cfb5785af4235ca4bd5226 diff --git a/extern/libmv/third_party/ceres/files.txt b/extern/libmv/third_party/ceres/files.txt index 8f4b7b97b50..0c60e5a055a 100644 --- a/extern/libmv/third_party/ceres/files.txt +++ b/extern/libmv/third_party/ceres/files.txt @@ -31,6 +31,7 @@ include/ceres/solver.h include/ceres/types.h internal/ceres/array_utils.cc internal/ceres/array_utils.h +internal/ceres/blas.h internal/ceres/block_evaluate_preparer.cc internal/ceres/block_evaluate_preparer.h internal/ceres/block_jacobian_writer.cc diff --git a/extern/libmv/third_party/ceres/include/ceres/jet.h b/extern/libmv/third_party/ceres/include/ceres/jet.h index 96e2256fd02..1238123a8fb 100644 --- a/extern/libmv/third_party/ceres/include/ceres/jet.h +++ b/extern/libmv/third_party/ceres/include/ceres/jet.h @@ -349,7 +349,11 @@ Jet<T, N> operator/(const Jet<T, N>& f, // // which holds because v*v = 0. h.a = f.a / g.a; - h.v = (f.v - f.a / g.a * g.v) / g.a; + const T g_a_inverse = 1.0 / g.a; + const T f_a_by_g_a = f.a * g_a_inverse; + for (int i = 0; i < N; ++i) { + h.v[i] = (f.v[i] - f_a_by_g_a * g.v[i]) * g_a_inverse; + } return h; } @@ -358,7 +362,8 @@ template<typename T, int N> inline Jet<T, N> operator/(T s, const Jet<T, N>& g) { Jet<T, N> h; h.a = s / g.a; - h.v = - s * g.v / (g.a * g.a); + const T minus_s_g_a_inverse2 = -s / (g.a * g.a); + h.v = g.v * minus_s_g_a_inverse2; return h; } @@ -366,8 +371,9 @@ Jet<T, N> operator/(T s, const Jet<T, N>& g) { template<typename T, int N> inline Jet<T, N> operator/(const Jet<T, N>& f, T s) { Jet<T, N> h; - h.a = f.a / s; - h.v = f.v / s; + const T s_inverse = 1.0 / s; + h.a = f.a * s_inverse; + h.v = f.v * s_inverse; return h; } @@ -425,7 +431,8 @@ template <typename T, int N> inline Jet<T, N> log(const Jet<T, N>& f) { Jet<T, N> g; g.a = log(f.a); - g.v = f.v / f.a; + const T a_inverse = T(1.0) / f.a; + g.v = f.v * a_inverse; return g; } @@ -443,7 +450,8 @@ template <typename T, int N> inline Jet<T, N> sqrt(const Jet<T, N>& f) { Jet<T, N> g; g.a = sqrt(f.a); - g.v = f.v / (T(2.0) * g.a); + const T two_a_inverse = 1.0 / (T(2.0) * g.a); + g.v = f.v * two_a_inverse; return g; } @@ -452,7 +460,7 @@ template <typename T, int N> inline Jet<T, N> cos(const Jet<T, N>& f) { Jet<T, N> g; g.a = cos(f.a); - T sin_a = sin(f.a); + const T sin_a = sin(f.a); g.v = - sin_a * f.v; return g; } @@ -462,7 +470,8 @@ template <typename T, int N> inline Jet<T, N> acos(const Jet<T, N>& f) { Jet<T, N> g; g.a = acos(f.a); - g.v = - T(1.0) / sqrt(T(1.0) - f.a * f.a) * f.v; + const T tmp = - T(1.0) / sqrt(T(1.0) - f.a * f.a); + g.v = tmp * f.v; return g; } @@ -471,7 +480,7 @@ template <typename T, int N> inline Jet<T, N> sin(const Jet<T, N>& f) { Jet<T, N> g; g.a = sin(f.a); - T cos_a = cos(f.a); + const T cos_a = cos(f.a); g.v = cos_a * f.v; return g; } @@ -481,7 +490,8 @@ template <typename T, int N> inline Jet<T, N> asin(const Jet<T, N>& f) { Jet<T, N> g; g.a = asin(f.a); - g.v = T(1.0) / sqrt(T(1.0) - f.a * f.a) * f.v; + const T tmp = T(1.0) / sqrt(T(1.0) - f.a * f.a); + g.v = tmp * f.v; return g; } diff --git a/extern/libmv/third_party/ceres/internal/ceres/blas.h b/extern/libmv/third_party/ceres/internal/ceres/blas.h new file mode 100644 index 00000000000..12667d09e55 --- /dev/null +++ b/extern/libmv/third_party/ceres/internal/ceres/blas.h @@ -0,0 +1,496 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2013 Google Inc. All rights reserved. +// http://code.google.com/p/ceres-solver/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: sameeragarwal@google.com (Sameer Agarwal) +// +// Simple blas functions for use in the Schur Eliminator. These are +// fairly basic implementations which already yield a significant +// speedup in the eliminator performance. + +#ifndef CERES_INTERNAL_BLAS_H_ +#define CERES_INTERNAL_BLAS_H_ + +#include "ceres/internal/eigen.h" +#include "glog/logging.h" + +namespace ceres { +namespace internal { + +// Remove the ".noalias()" annotation from the matrix matrix +// mutliplies to produce a correct build with the Android NDK, +// including versions 6, 7, 8, and 8b, when built with STLPort and the +// non-standalone toolchain (i.e. ndk-build). This appears to be a +// compiler bug; if the workaround is not in place, the line +// +// block.noalias() -= A * B; +// +// gets compiled to +// +// block.noalias() += A * B; +// +// which breaks schur elimination. Introducing a temporary by removing the +// .noalias() annotation causes the issue to disappear. Tracking this +// issue down was tricky, since the test suite doesn't run when built with +// the non-standalone toolchain. +// +// TODO(keir): Make a reproduction case for this and send it upstream. +#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG +#define CERES_MAYBE_NOALIAS +#else +#define CERES_MAYBE_NOALIAS .noalias() +#endif + +// For the matrix-matrix functions below, there are three functions +// for each functionality. Foo, FooNaive and FooEigen. Foo is the one +// to be called by the user. FooNaive is a basic loop based +// implementation and FooEigen uses Eigen's implementation. Foo +// chooses between FooNaive and FooEigen depending on how many of the +// template arguments are fixed at compile time. Currently, FooEigen +// is called if all matrix dimenions are compile time +// constants. FooNaive is called otherwise. This leads to the best +// performance currently. +// +// TODO(sameeragarwal): Benchmark and simplify the matrix-vector +// functions. + +// C op A * B; +// +// where op can be +=, -=, or =. +// +// The template parameters (kRowA, kColA, kRowB, kColB) allow +// specialization of the loop at compile time. If this information is +// not available, then Eigen::Dynamic should be used as the template +// argument. +// +// kOperation = 1 -> C += A * B +// kOperation = -1 -> C -= A * B +// kOperation = 0 -> C = A * B +// +// The function can write into matrices C which are larger than the +// matrix A * B. This is done by specifying the true size of C via +// row_stride_c and col_stride_c, and then indicating where A * B +// should be written into by start_row_c and start_col_c. +// +// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c = +// 4 and start_col_c = 5, then if A = 3x2 and B = 2x4, we get +// +// ------------ +// ------------ +// ------------ +// ------------ +// -----xxxx--- +// -----xxxx--- +// -----xxxx--- +// ------------ +// ------------ +// ------------ +// +template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> +inline void MatrixMatrixMultiplyEigen(const double* A, + const int num_row_a, + const int num_col_a, + const double* B, + const int num_row_b, + const int num_col_b, + double* C, + const int start_row_c, + const int start_col_c, + const int row_stride_c, + const int col_stride_c) { + const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a); + const typename EigenTypes<kRowB, kColB>::ConstMatrixRef Bref(B, num_row_b, num_col_b); + MatrixRef Cref(C, row_stride_c, col_stride_c); + Eigen::Block<MatrixRef, kRowA, kColB> block(Cref, + start_row_c, start_col_c, + num_row_a, num_col_b); + if (kOperation > 0) { + block CERES_MAYBE_NOALIAS += Aref * Bref; + } else if (kOperation < 0) { + block CERES_MAYBE_NOALIAS -= Aref * Bref; + } else { + block CERES_MAYBE_NOALIAS = Aref * Bref; + } +} + +template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> +inline void MatrixMatrixMultiplyNaive(const double* A, + const int num_row_a, + const int num_col_a, + const double* B, + const int num_row_b, + const int num_col_b, + double* C, + const int start_row_c, + const int start_col_c, + const int row_stride_c, + const int col_stride_c) { + DCHECK_GT(num_row_a, 0); + DCHECK_GT(num_col_a, 0); + DCHECK_GT(num_row_b, 0); + DCHECK_GT(num_col_b, 0); + DCHECK_GE(start_row_c, 0); + DCHECK_GE(start_col_c, 0); + DCHECK_GT(row_stride_c, 0); + DCHECK_GT(col_stride_c, 0); + + DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); + DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); + DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b)); + DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b)); + + const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); + const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); + const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b); + const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b); + DCHECK_EQ(NUM_COL_A, NUM_ROW_B); + + const int NUM_ROW_C = NUM_ROW_A; + const int NUM_COL_C = NUM_COL_B; + DCHECK_LT(start_row_c + NUM_ROW_C, row_stride_c); + DCHECK_LT(start_col_c + NUM_COL_C, col_stride_c); + + for (int row = 0; row < NUM_ROW_C; ++row) { + for (int col = 0; col < NUM_COL_C; ++col) { + double tmp = 0.0; + for (int k = 0; k < NUM_COL_A; ++k) { + tmp += A[row * NUM_COL_A + k] * B[k * NUM_COL_B + col]; + } + + const int index = (row + start_row_c) * col_stride_c + start_col_c + col; + if (kOperation > 0) { + C[index] += tmp; + } else if (kOperation < 0) { + C[index] -= tmp; + } else { + C[index] = tmp; + } + } + } +} + +template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> +inline void MatrixMatrixMultiply(const double* A, + const int num_row_a, + const int num_col_a, + const double* B, + const int num_row_b, + const int num_col_b, + double* C, + const int start_row_c, + const int start_col_c, + const int row_stride_c, + const int col_stride_c) { +#ifdef CERES_NO_CUSTOM_BLAS + MatrixMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>( + A, num_row_a, num_col_a, + B, num_row_b, num_col_b, + C, start_row_c, start_col_c, row_stride_c, col_stride_c); + return; + +#else + + if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic && + kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) { + MatrixMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>( + A, num_row_a, num_col_a, + B, num_row_b, num_col_b, + C, start_row_c, start_col_c, row_stride_c, col_stride_c); + } else { + MatrixMatrixMultiplyNaive<kRowA, kColA, kRowB, kColB, kOperation>( + A, num_row_a, num_col_a, + B, num_row_b, num_col_b, + C, start_row_c, start_col_c, row_stride_c, col_stride_c); + } + +#endif +} + + +// C op A' * B; +// +// where op can be +=, -=, or =. +// +// The template parameters (kRowA, kColA, kRowB, kColB) allow +// specialization of the loop at compile time. If this information is +// not available, then Eigen::Dynamic should be used as the template +// argument. +// +// kOperation = 1 -> C += A' * B +// kOperation = -1 -> C -= A' * B +// kOperation = 0 -> C = A' * B +// +// The function can write into matrices C which are larger than the +// matrix A' * B. This is done by specifying the true size of C via +// row_stride_c and col_stride_c, and then indicating where A * B +// should be written into by start_row_c and start_col_c. +// +// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c = +// 4 and start_col_c = 5, then if A = 2x3 and B = 2x4, we get +// +// ------------ +// ------------ +// ------------ +// ------------ +// -----xxxx--- +// -----xxxx--- +// -----xxxx--- +// ------------ +// ------------ +// ------------ +// +template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> +inline void MatrixTransposeMatrixMultiplyEigen(const double* A, + const int num_row_a, + const int num_col_a, + const double* B, + const int num_row_b, + const int num_col_b, + double* C, + const int start_row_c, + const int start_col_c, + const int row_stride_c, + const int col_stride_c) { + const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a); + const typename EigenTypes<kRowB, kColB>::ConstMatrixRef Bref(B, num_row_b, num_col_b); + MatrixRef Cref(C, row_stride_c, col_stride_c); + Eigen::Block<MatrixRef, kColA, kColB> block(Cref, + start_row_c, start_col_c, + num_col_a, num_col_b); + if (kOperation > 0) { + block CERES_MAYBE_NOALIAS += Aref.transpose() * Bref; + } else if (kOperation < 0) { + block CERES_MAYBE_NOALIAS -= Aref.transpose() * Bref; + } else { + block CERES_MAYBE_NOALIAS = Aref.transpose() * Bref; + } +} + +template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> +inline void MatrixTransposeMatrixMultiplyNaive(const double* A, + const int num_row_a, + const int num_col_a, + const double* B, + const int num_row_b, + const int num_col_b, + double* C, + const int start_row_c, + const int start_col_c, + const int row_stride_c, + const int col_stride_c) { + DCHECK_GT(num_row_a, 0); + DCHECK_GT(num_col_a, 0); + DCHECK_GT(num_row_b, 0); + DCHECK_GT(num_col_b, 0); + DCHECK_GE(start_row_c, 0); + DCHECK_GE(start_col_c, 0); + DCHECK_GT(row_stride_c, 0); + DCHECK_GT(col_stride_c, 0); + + DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); + DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); + DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b)); + DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b)); + + const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); + const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); + const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b); + const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b); + DCHECK_EQ(NUM_ROW_A, NUM_ROW_B); + + const int NUM_ROW_C = NUM_COL_A; + const int NUM_COL_C = NUM_COL_B; + DCHECK_LT(start_row_c + NUM_ROW_C, row_stride_c); + DCHECK_LT(start_col_c + NUM_COL_C, col_stride_c); + + for (int row = 0; row < NUM_ROW_C; ++row) { + for (int col = 0; col < NUM_COL_C; ++col) { + double tmp = 0.0; + for (int k = 0; k < NUM_ROW_A; ++k) { + tmp += A[k * NUM_COL_A + row] * B[k * NUM_COL_B + col]; + } + + const int index = (row + start_row_c) * col_stride_c + start_col_c + col; + if (kOperation > 0) { + C[index]+= tmp; + } else if (kOperation < 0) { + C[index]-= tmp; + } else { + C[index]= tmp; + } + } + } +} + +template<int kRowA, int kColA, int kRowB, int kColB, int kOperation> +inline void MatrixTransposeMatrixMultiply(const double* A, + const int num_row_a, + const int num_col_a, + const double* B, + const int num_row_b, + const int num_col_b, + double* C, + const int start_row_c, + const int start_col_c, + const int row_stride_c, + const int col_stride_c) { +#ifdef CERES_NO_CUSTOM_BLAS + MatrixTransposeMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>( + A, num_row_a, num_col_a, + B, num_row_b, num_col_b, + C, start_row_c, start_col_c, row_stride_c, col_stride_c); + return; + +#else + + if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic && + kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) { + MatrixTransposeMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>( + A, num_row_a, num_col_a, + B, num_row_b, num_col_b, + C, start_row_c, start_col_c, row_stride_c, col_stride_c); + } else { + MatrixTransposeMatrixMultiplyNaive<kRowA, kColA, kRowB, kColB, kOperation>( + A, num_row_a, num_col_a, + B, num_row_b, num_col_b, + C, start_row_c, start_col_c, row_stride_c, col_stride_c); + } + +#endif +} + +template<int kRowA, int kColA, int kOperation> +inline void MatrixVectorMultiply(const double* A, + const int num_row_a, + const int num_col_a, + const double* b, + double* c) { +#ifdef CERES_NO_CUSTOM_BLAS + const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a); + const typename EigenTypes<kColA>::ConstVectorRef bref(b, num_col_a); + typename EigenTypes<kRowA>::VectorRef cref(c, num_row_a); + + // lazyProduct works better than .noalias() for matrix-vector + // products. + if (kOperation > 0) { + cref += Aref.lazyProduct(bref); + } else if (kOperation < 0) { + cref -= Aref.lazyProduct(bref); + } else { + cref -= Aref.lazyProduct(bref); + } +#else + + DCHECK_GT(num_row_a, 0); + DCHECK_GT(num_col_a, 0); + DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); + DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); + + const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); + const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); + + for (int row = 0; row < NUM_ROW_A; ++row) { + double tmp = 0.0; + for (int col = 0; col < NUM_COL_A; ++col) { + tmp += A[row * NUM_COL_A + col] * b[col]; + } + + if (kOperation > 0) { + c[row] += tmp; + } else if (kOperation < 0) { + c[row] -= tmp; + } else { + c[row] = tmp; + } + } +#endif // CERES_NO_CUSTOM_BLAS +} + +// c op A' * b; +// +// where op can be +=, -=, or =. +// +// The template parameters (kRowA, kColA) allow specialization of the +// loop at compile time. If this information is not available, then +// Eigen::Dynamic should be used as the template argument. +// +// kOperation = 1 -> c += A' * b +// kOperation = -1 -> c -= A' * b +// kOperation = 0 -> c = A' * b +template<int kRowA, int kColA, int kOperation> +inline void MatrixTransposeVectorMultiply(const double* A, + const int num_row_a, + const int num_col_a, + const double* b, + double* c) { +#ifdef CERES_NO_CUSTOM_BLAS + const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a); + const typename EigenTypes<kRowA>::ConstVectorRef bref(b, num_row_a); + typename EigenTypes<kColA>::VectorRef cref(c, num_col_a); + + // lazyProduct works better than .noalias() for matrix-vector + // products. + if (kOperation > 0) { + cref += Aref.transpose().lazyProduct(bref); + } else if (kOperation < 0) { + cref -= Aref.transpose().lazyProduct(bref); + } else { + cref -= Aref.transpose().lazyProduct(bref); + } +#else + + DCHECK_GT(num_row_a, 0); + DCHECK_GT(num_col_a, 0); + DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a)); + DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a)); + + const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a); + const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a); + + for (int row = 0; row < NUM_COL_A; ++row) { + double tmp = 0.0; + for (int col = 0; col < NUM_ROW_A; ++col) { + tmp += A[col * NUM_COL_A + row] * b[col]; + } + + if (kOperation > 0) { + c[row] += tmp; + } else if (kOperation < 0) { + c[row] -= tmp; + } else { + c[row] = tmp; + } + } +#endif // CERES_NO_CUSTOM_BLAS +} + +#undef CERES_MAYBE_NOALIAS + +} // namespace internal +} // namespace ceres + +#endif // CERES_INTERNAL_BLAS_H_ diff --git a/extern/libmv/third_party/ceres/internal/ceres/block_sparse_matrix.cc b/extern/libmv/third_party/ceres/internal/ceres/block_sparse_matrix.cc index dbe5ec93ef0..ab6fcef1945 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/block_sparse_matrix.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/block_sparse_matrix.cc @@ -33,6 +33,7 @@ #include <cstddef> #include <algorithm> #include <vector> +#include "ceres/blas.h" #include "ceres/block_structure.h" #include "ceres/internal/eigen.h" #include "ceres/matrix_proto.h" @@ -117,16 +118,15 @@ void BlockSparseMatrix::RightMultiply(const double* x, double* y) const { for (int i = 0; i < block_structure_->rows.size(); ++i) { int row_block_pos = block_structure_->rows[i].block.position; int row_block_size = block_structure_->rows[i].block.size; - VectorRef yref(y + row_block_pos, row_block_size); const vector<Cell>& cells = block_structure_->rows[i].cells; for (int j = 0; j < cells.size(); ++j) { int col_block_id = cells[j].block_id; int col_block_size = block_structure_->cols[col_block_id].size; int col_block_pos = block_structure_->cols[col_block_id].position; - ConstVectorRef xref(x + col_block_pos, col_block_size); - MatrixRef m(values_.get() + cells[j].position, - row_block_size, col_block_size); - yref += m.lazyProduct(xref); + MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( + values_.get() + cells[j].position, row_block_size, col_block_size, + x + col_block_pos, + y + row_block_pos); } } } @@ -138,16 +138,16 @@ void BlockSparseMatrix::LeftMultiply(const double* x, double* y) const { for (int i = 0; i < block_structure_->rows.size(); ++i) { int row_block_pos = block_structure_->rows[i].block.position; int row_block_size = block_structure_->rows[i].block.size; - const ConstVectorRef xref(x + row_block_pos, row_block_size); const vector<Cell>& cells = block_structure_->rows[i].cells; for (int j = 0; j < cells.size(); ++j) { int col_block_id = cells[j].block_id; int col_block_size = block_structure_->cols[col_block_id].size; int col_block_pos = block_structure_->cols[col_block_id].position; - VectorRef yref(y + col_block_pos, col_block_size); - MatrixRef m(values_.get() + cells[j].position, - row_block_size, col_block_size); - yref += m.transpose().lazyProduct(xref); + MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( + values_.get() + cells[j].position, row_block_size, col_block_size, + x + row_block_pos, + y + col_block_pos); + } } } diff --git a/extern/libmv/third_party/ceres/internal/ceres/partitioned_matrix_view.cc b/extern/libmv/third_party/ceres/internal/ceres/partitioned_matrix_view.cc index 0722fc82c02..c488184ac93 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/partitioned_matrix_view.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/partitioned_matrix_view.cc @@ -35,6 +35,7 @@ #include <algorithm> #include <cstring> #include <vector> +#include "ceres/blas.h" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" #include "ceres/internal/eigen.h" @@ -103,13 +104,10 @@ void PartitionedMatrixView::RightMultiplyE(const double* x, double* y) const { const int col_block_id = cell.block_id; const int col_block_pos = bs->cols[col_block_id].position; const int col_block_size = bs->cols[col_block_id].size; - - ConstVectorRef xref(x + col_block_pos, col_block_size); - VectorRef yref(y + row_block_pos, row_block_size); - ConstMatrixRef m(row_values + cell.position, - row_block_size, - col_block_size); - yref += m.lazyProduct(xref); + MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( + row_values + cell.position, row_block_size, col_block_size, + x + col_block_pos, + y + row_block_pos); } } @@ -124,20 +122,16 @@ void PartitionedMatrixView::RightMultiplyF(const double* x, double* y) const { for (int r = 0; r < bs->rows.size(); ++r) { const int row_block_pos = bs->rows[r].block.position; const int row_block_size = bs->rows[r].block.size; - VectorRef yref(y + row_block_pos, row_block_size); const vector<Cell>& cells = bs->rows[r].cells; for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) { const double* row_values = matrix_.RowBlockValues(r); const int col_block_id = cells[c].block_id; const int col_block_pos = bs->cols[col_block_id].position; const int col_block_size = bs->cols[col_block_id].size; - - ConstVectorRef xref(x + col_block_pos - num_cols_e(), - col_block_size); - ConstMatrixRef m(row_values + cells[c].position, - row_block_size, - col_block_size); - yref += m.lazyProduct(xref); + MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( + row_values + cells[c].position, row_block_size, col_block_size, + x + col_block_pos - num_cols_e(), + y + row_block_pos); } } } @@ -155,13 +149,10 @@ void PartitionedMatrixView::LeftMultiplyE(const double* x, double* y) const { const int col_block_id = cell.block_id; const int col_block_pos = bs->cols[col_block_id].position; const int col_block_size = bs->cols[col_block_id].size; - - ConstVectorRef xref(x + row_block_pos, row_block_size); - VectorRef yref(y + col_block_pos, col_block_size); - ConstMatrixRef m(row_values + cell.position, - row_block_size, - col_block_size); - yref += m.transpose().lazyProduct(xref); + MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( + row_values + cell.position, row_block_size, col_block_size, + x + row_block_pos, + y + col_block_pos); } } @@ -176,19 +167,16 @@ void PartitionedMatrixView::LeftMultiplyF(const double* x, double* y) const { for (int r = 0; r < bs->rows.size(); ++r) { const int row_block_pos = bs->rows[r].block.position; const int row_block_size = bs->rows[r].block.size; - ConstVectorRef xref(x + row_block_pos, row_block_size); const vector<Cell>& cells = bs->rows[r].cells; for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) { const double* row_values = matrix_.RowBlockValues(r); const int col_block_id = cells[c].block_id; const int col_block_pos = bs->cols[col_block_id].position; const int col_block_size = bs->cols[col_block_id].size; - - VectorRef yref(y + col_block_pos - num_cols_e(), col_block_size); - ConstMatrixRef m(row_values + cells[c].position, - row_block_size, - col_block_size); - yref += m.transpose().lazyProduct(xref); + MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>( + row_values + cells[c].position, row_block_size, col_block_size, + x + row_block_pos, + y + col_block_pos - num_cols_e()); } } } @@ -267,15 +255,15 @@ void PartitionedMatrixView::UpdateBlockDiagonalEtE( const int row_block_size = bs->rows[r].block.size; const int block_id = cell.block_id; const int col_block_size = bs->cols[block_id].size; - ConstMatrixRef m(row_values + cell.position, - row_block_size, - col_block_size); - const int cell_position = block_diagonal_structure->rows[block_id].cells[0].position; - MatrixRef(block_diagonal->mutable_values() + cell_position, - col_block_size, col_block_size).noalias() += m.transpose() * m; + MatrixTransposeMatrixMultiply + <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>( + row_values + cell.position, row_block_size, col_block_size, + row_values + cell.position, row_block_size, col_block_size, + block_diagonal->mutable_values() + cell_position, + 0, 0, col_block_size, col_block_size); } } @@ -298,15 +286,16 @@ void PartitionedMatrixView::UpdateBlockDiagonalFtF( for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) { const int col_block_id = cells[c].block_id; const int col_block_size = bs->cols[col_block_id].size; - ConstMatrixRef m(row_values + cells[c].position, - row_block_size, - col_block_size); const int diagonal_block_id = col_block_id - num_col_blocks_e_; const int cell_position = block_diagonal_structure->rows[diagonal_block_id].cells[0].position; - MatrixRef(block_diagonal->mutable_values() + cell_position, - col_block_size, col_block_size).noalias() += m.transpose() * m; + MatrixTransposeMatrixMultiply + <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>( + row_values + cells[c].position, row_block_size, col_block_size, + row_values + cells[c].position, row_block_size, col_block_size, + block_diagonal->mutable_values() + cell_position, + 0, 0, col_block_size, col_block_size); } } } diff --git a/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc b/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc index e1be4e2a78a..875f4551044 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc @@ -291,14 +291,8 @@ bool SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse( } else { factor_ = ss_.AnalyzeCholesky(cholmod_lhs); } - - if (VLOG_IS_ON(2)) { - cholmod_print_common(const_cast<char*>("Symbolic Analysis"), ss_.mutable_cc()); - } } - CHECK_NOTNULL(factor_); - cholmod_dense* cholmod_solution = ss_.SolveCholesky(cholmod_lhs, factor_, cholmod_rhs); diff --git a/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator.h b/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator.h index 877621bb48c..e0c7fdafcb6 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator.h +++ b/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator.h @@ -321,9 +321,25 @@ class SchurEliminator : public SchurEliminatorBase { // see the documentation of the Chunk object above. vector<Chunk> chunks_; + // TODO(sameeragarwal): The following two arrays contain per-thread + // storage. They should be refactored into a per thread struct. + // Buffer to store the products of the y and z blocks generated - // during the elimination phase. + // during the elimination phase. buffer_ is of size num_threads * + // buffer_size_. Each thread accesses the chunk + // + // [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_] + // scoped_array<double> buffer_; + + // Buffer to store per thread matrix matrix products used by + // ChunkOuterProduct. Like buffer_ it is of size num_threads * + // buffer_size_. Each thread accesses the chunk + // + // [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_] + // + scoped_array<double> chunk_outer_product_buffer_; + int buffer_size_; int num_threads_; int uneliminated_row_begins_; diff --git a/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator_impl.h b/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator_impl.h index 339c44bc41c..b46eab92c34 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator_impl.h +++ b/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator_impl.h @@ -34,10 +34,6 @@ #ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_ #define CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_ -#ifdef CERES_USE_OPENMP -#include <omp.h> -#endif - // Eigen has an internal threshold switching between different matrix // multiplication algorithms. In particular for matrices larger than // EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD it uses a cache friendly @@ -46,19 +42,25 @@ // are thin and long, the default choice may not be optimal. This is // the case for us, as the default choice causes a 30% performance // regression when we moved from Eigen2 to Eigen3. + #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 10 +#ifdef CERES_USE_OPENMP +#include <omp.h> +#endif + #include <algorithm> #include <map> #include "Eigen/Dense" +#include "ceres/blas.h" #include "ceres/block_random_access_matrix.h" #include "ceres/block_sparse_matrix.h" #include "ceres/block_structure.h" +#include "ceres/internal/eigen.h" +#include "ceres/internal/scoped_ptr.h" #include "ceres/map_util.h" #include "ceres/schur_eliminator.h" #include "ceres/stl_util.h" -#include "ceres/internal/eigen.h" -#include "ceres/internal/scoped_ptr.h" #include "glog/logging.h" namespace ceres { @@ -149,13 +151,16 @@ Init(int num_eliminate_blocks, const CompressedRowBlockStructure* bs) { buffer_.reset(new double[buffer_size_ * num_threads_]); + // chunk_outer_product_buffer_ only needs to store e_block_size * + // f_block_size, which is always less than buffer_size_, so we just + // allocate buffer_size_ per thread. + chunk_outer_product_buffer_.reset(new double[buffer_size_ * num_threads_]); + STLDeleteElements(&rhs_locks_); rhs_locks_.resize(num_col_blocks - num_eliminate_blocks_); for (int i = 0; i < num_col_blocks - num_eliminate_blocks_; ++i) { rhs_locks_[i] = new Mutex; } - - VLOG(1) << "Eliminator threads: " << num_threads_; } template <int kRowBlockSize, int kEBlockSize, int kFBlockSize> @@ -261,7 +266,7 @@ Eliminate(const BlockSparseMatrixBase* A, typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix inverse_ete = ete .template selfadjointView<Eigen::Upper>() - .ldlt() + .llt() .solve(Matrix::Identity(e_block_size, e_block_size)); // For the current chunk compute and update the rhs of the reduced @@ -294,8 +299,8 @@ BackSubstitute(const BlockSparseMatrixBase* A, const int e_block_id = bs->rows[chunk.start].cells.front().block_id; const int e_block_size = bs->cols[e_block_id].size; - typename EigenTypes<kEBlockSize>::VectorRef y_block( - y + bs->cols[e_block_id].position, e_block_size); + double* y_ptr = y + bs->cols[e_block_id].position; + typename EigenTypes<kEBlockSize>::VectorRef y_block(y_ptr, e_block_size); typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix ete(e_block_size, e_block_size); @@ -312,40 +317,35 @@ BackSubstitute(const BlockSparseMatrixBase* A, const double* row_values = A->RowBlockValues(chunk.start + j); const Cell& e_cell = row.cells.front(); DCHECK_EQ(e_block_id, e_cell.block_id); - const typename EigenTypes<kRowBlockSize, kEBlockSize>::ConstMatrixRef - e_block(row_values + e_cell.position, - row.block.size, - e_block_size); - typename EigenTypes<kRowBlockSize>::Vector - sj = + typename EigenTypes<kRowBlockSize>::Vector sj = typename EigenTypes<kRowBlockSize>::ConstVectorRef - (b + bs->rows[chunk.start + j].block.position, - row.block.size); + (b + bs->rows[chunk.start + j].block.position, row.block.size); for (int c = 1; c < row.cells.size(); ++c) { const int f_block_id = row.cells[c].block_id; const int f_block_size = bs->cols[f_block_id].size; - const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef - f_block(row_values + row.cells[c].position, - row.block.size, f_block_size); const int r_block = f_block_id - num_eliminate_blocks_; - sj -= f_block * - typename EigenTypes<kFBlockSize>::ConstVectorRef - (z + lhs_row_layout_[r_block], f_block_size); + MatrixVectorMultiply<kRowBlockSize, kFBlockSize, -1>( + row_values + row.cells[c].position, row.block.size, f_block_size, + z + lhs_row_layout_[r_block], + sj.data()); } - y_block += e_block.transpose() * sj; - ete.template selfadjointView<Eigen::Upper>() - .rankUpdate(e_block.transpose(), 1.0); + MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>( + row_values + e_cell.position, row.block.size, e_block_size, + sj.data(), + y_ptr); + + MatrixTransposeMatrixMultiply + <kRowBlockSize, kEBlockSize,kRowBlockSize, kEBlockSize, 1>( + row_values + e_cell.position, row.block.size, e_block_size, + row_values + e_cell.position, row.block.size, e_block_size, + ete.data(), 0, 0, e_block_size, e_block_size); } - y_block = - ete - .template selfadjointView<Eigen::Upper>() - .ldlt() - .solve(y_block); + ete.llt().solveInPlace(y_block); } } @@ -382,15 +382,12 @@ UpdateRhs(const Chunk& chunk, for (int c = 1; c < row.cells.size(); ++c) { const int block_id = row.cells[c].block_id; const int block_size = bs->cols[block_id].size; - const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef - b(row_values + row.cells[c].position, - row.block.size, block_size); - const int block = block_id - num_eliminate_blocks_; CeresMutexLock l(rhs_locks_[block]); - typename EigenTypes<kFBlockSize>::VectorRef - (rhs + lhs_row_layout_[block], block_size).noalias() - += b.transpose() * sj; + MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>( + row_values + row.cells[c].position, + row.block.size, block_size, + sj.data(), rhs + lhs_row_layout_[block]); } b_pos += row.block.size; } @@ -446,34 +443,31 @@ ChunkDiagonalBlockAndGradient( // Extract the e_block, ETE += E_i' E_i const Cell& e_cell = row.cells.front(); - const typename EigenTypes<kRowBlockSize, kEBlockSize>::ConstMatrixRef - e_block(row_values + e_cell.position, - row.block.size, - e_block_size); - - ete->template selfadjointView<Eigen::Upper>() - .rankUpdate(e_block.transpose(), 1.0); + MatrixTransposeMatrixMultiply + <kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>( + row_values + e_cell.position, row.block.size, e_block_size, + row_values + e_cell.position, row.block.size, e_block_size, + ete->data(), 0, 0, e_block_size, e_block_size); // g += E_i' b_i - g->noalias() += e_block.transpose() * - typename EigenTypes<kRowBlockSize>::ConstVectorRef - (b + b_pos, row.block.size); + MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>( + row_values + e_cell.position, row.block.size, e_block_size, + b + b_pos, + g->data()); + // buffer = E'F. This computation is done by iterating over the // f_blocks for each row in the chunk. for (int c = 1; c < row.cells.size(); ++c) { const int f_block_id = row.cells[c].block_id; const int f_block_size = bs->cols[f_block_id].size; - const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef - f_block(row_values + row.cells[c].position, - row.block.size, f_block_size); - double* buffer_ptr = buffer + FindOrDie(chunk.buffer_layout, f_block_id); - - typename EigenTypes<kEBlockSize, kFBlockSize>::MatrixRef - (buffer_ptr, e_block_size, f_block_size).noalias() - += e_block.transpose() * f_block; + MatrixTransposeMatrixMultiply + <kRowBlockSize, kEBlockSize, kRowBlockSize, kFBlockSize, 1>( + row_values + e_cell.position, row.block.size, e_block_size, + row_values + row.cells[c].position, row.block.size, f_block_size, + buffer_ptr, 0, 0, e_block_size, f_block_size); } b_pos += row.block.size; } @@ -497,15 +491,24 @@ ChunkOuterProduct(const CompressedRowBlockStructure* bs, // references to the left hand side. const int e_block_size = inverse_ete.rows(); BufferLayoutType::const_iterator it1 = buffer_layout.begin(); + +#ifdef CERES_USE_OPENMP + int thread_id = omp_get_thread_num(); +#else + int thread_id = 0; +#endif + double* b1_transpose_inverse_ete = + chunk_outer_product_buffer_.get() + thread_id * buffer_size_; + // S(i,j) -= bi' * ete^{-1} b_j for (; it1 != buffer_layout.end(); ++it1) { const int block1 = it1->first - num_eliminate_blocks_; const int block1_size = bs->cols[it1->first].size; - - const typename EigenTypes<kEBlockSize, kFBlockSize>::ConstMatrixRef - b1(buffer + it1->second, e_block_size, block1_size); - const typename EigenTypes<kFBlockSize, kEBlockSize>::Matrix - b1_transpose_inverse_ete = b1.transpose() * inverse_ete; + MatrixTransposeMatrixMultiply + <kEBlockSize, kFBlockSize, kEBlockSize, kEBlockSize, 0>( + buffer + it1->second, e_block_size, block1_size, + inverse_ete.data(), e_block_size, e_block_size, + b1_transpose_inverse_ete, 0, 0, block1_size, e_block_size); BufferLayoutType::const_iterator it2 = it1; for (; it2 != buffer_layout.end(); ++it2) { @@ -515,46 +518,15 @@ ChunkOuterProduct(const CompressedRowBlockStructure* bs, CellInfo* cell_info = lhs->GetCell(block1, block2, &r, &c, &row_stride, &col_stride); - if (cell_info == NULL) { - continue; + if (cell_info != NULL) { + const int block2_size = bs->cols[it2->first].size; + CeresMutexLock l(&cell_info->m); + MatrixMatrixMultiply + <kFBlockSize, kEBlockSize, kEBlockSize, kFBlockSize, -1>( + b1_transpose_inverse_ete, block1_size, e_block_size, + buffer + it2->second, e_block_size, block2_size, + cell_info->values, r, c, row_stride, col_stride); } - - const int block2_size = bs->cols[it2->first].size; - const typename EigenTypes<kEBlockSize, kFBlockSize>::ConstMatrixRef - b2(buffer + it2->second, e_block_size, block2_size); - - CeresMutexLock l(&cell_info->m); - MatrixRef m(cell_info->values, row_stride, col_stride); - - // We explicitly construct a block object here instead of using - // m.block(), as m.block() variant of the constructor does not - // allow mixing of template sizing and runtime sizing parameters - // like the Matrix class does. - Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize> - block(m, r, c, block1_size, block2_size); -#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG - // Removing the ".noalias()" annotation on the following statement is - // necessary to produce a correct build with the Android NDK, including - // versions 6, 7, 8, and 8b, when built with STLPort and the - // non-standalone toolchain (i.e. ndk-build). This appears to be a - // compiler bug; if the workaround is not in place, the line - // - // block.noalias() -= b1_transpose_inverse_ete * b2; - // - // gets compiled to - // - // block.noalias() += b1_transpose_inverse_ete * b2; - // - // which breaks schur elimination. Introducing a temporary by removing the - // .noalias() annotation causes the issue to disappear. Tracking this - // issue down was tricky, since the test suite doesn't run when built with - // the non-standalone toolchain. - // - // TODO(keir): Make a reproduction case for this and send it upstream. - block -= b1_transpose_inverse_ete * b2; -#else - block.noalias() -= b1_transpose_inverse_ete * b2; -#endif // CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG } } } @@ -624,10 +596,13 @@ NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A, &row_stride, &col_stride); if (cell_info != NULL) { CeresMutexLock l(&cell_info->m); - MatrixRef m(cell_info->values, row_stride, col_stride); - m.block(r, c, block1_size, block1_size) - .selfadjointView<Eigen::Upper>() - .rankUpdate(b1.transpose(), 1.0); + // This multiply currently ignores the fact that this is a + // symmetric outer product. + MatrixTransposeMatrixMultiply + <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>( + row_values + row.cells[i].position, row.block.size, block1_size, + row_values + row.cells[i].position, row.block.size, block1_size, + cell_info->values, r, c, row_stride, col_stride); } for (int j = i + 1; j < row.cells.size(); ++j) { @@ -638,17 +613,15 @@ NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A, CellInfo* cell_info = lhs->GetCell(block1, block2, &r, &c, &row_stride, &col_stride); - if (cell_info == NULL) { - continue; + if (cell_info != NULL) { + const int block2_size = bs->cols[row.cells[j].block_id].size; + CeresMutexLock l(&cell_info->m); + MatrixTransposeMatrixMultiply + <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>( + row_values + row.cells[i].position, row.block.size, block1_size, + row_values + row.cells[j].position, row.block.size, block2_size, + cell_info->values, r, c, row_stride, col_stride); } - - const int block2_size = bs->cols[row.cells[j].block_id].size; - CeresMutexLock l(&cell_info->m); - MatrixRef m(cell_info->values, row_stride, col_stride); - m.block(r, c, block1_size, block2_size).noalias() += - b1.transpose() * ConstMatrixRef(row_values + row.cells[j].position, - row.block.size, - block2_size); } } } @@ -670,25 +643,18 @@ EBlockRowOuterProduct(const BlockSparseMatrixBase* A, DCHECK_GE(block1, 0); const int block1_size = bs->cols[row.cells[i].block_id].size; - const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef - b1(row_values + row.cells[i].position, - row.block.size, block1_size); - { - int r, c, row_stride, col_stride; - CellInfo* cell_info = lhs->GetCell(block1, block1, - &r, &c, - &row_stride, &col_stride); - if (cell_info == NULL) { - continue; - } - + int r, c, row_stride, col_stride; + CellInfo* cell_info = lhs->GetCell(block1, block1, + &r, &c, + &row_stride, &col_stride); + if (cell_info != NULL) { CeresMutexLock l(&cell_info->m); - MatrixRef m(cell_info->values, row_stride, col_stride); - - Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize> - block(m, r, c, block1_size, block1_size); - block.template selfadjointView<Eigen::Upper>() - .rankUpdate(b1.transpose(), 1.0); + // block += b1.transpose() * b1; + MatrixTransposeMatrixMultiply + <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>( + row_values + row.cells[i].position, row.block.size, block1_size, + row_values + row.cells[i].position, row.block.size, block1_size, + cell_info->values, r, c, row_stride, col_stride); } for (int j = i + 1; j < row.cells.size(); ++j) { @@ -700,20 +666,14 @@ EBlockRowOuterProduct(const BlockSparseMatrixBase* A, CellInfo* cell_info = lhs->GetCell(block1, block2, &r, &c, &row_stride, &col_stride); - if (cell_info == NULL) { - continue; + if (cell_info != NULL) { + // block += b1.transpose() * b2; + MatrixTransposeMatrixMultiply + <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>( + row_values + row.cells[i].position, row.block.size, block1_size, + row_values + row.cells[j].position, row.block.size, block2_size, + cell_info->values, r, c, row_stride, col_stride); } - - const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef - b2(row_values + row.cells[j].position, - row.block.size, - block2_size); - - CeresMutexLock l(&cell_info->m); - MatrixRef m(cell_info->values, row_stride, col_stride); - Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize> - block(m, r, c, block1_size, block2_size); - block.noalias() += b1.transpose() * b2; } } } diff --git a/extern/libmv/third_party/ceres/internal/ceres/sparse_normal_cholesky_solver.cc b/extern/libmv/third_party/ceres/internal/ceres/sparse_normal_cholesky_solver.cc index 195cacb984c..aacba86e64a 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/sparse_normal_cholesky_solver.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/sparse_normal_cholesky_solver.cc @@ -213,13 +213,7 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImplUsingSuiteSparse( } else { factor_ = ss_.AnalyzeCholesky(lhs.get()); } - - if (VLOG_IS_ON(2)) { - cholmod_print_common(const_cast<char*>("Symbolic Analysis"), ss_.mutable_cc()); - } } - - CHECK_NOTNULL(factor_); event_logger.AddEvent("Analysis"); cholmod_dense* sol = ss_.SolveCholesky(lhs.get(), factor_, rhs); diff --git a/extern/libmv/third_party/ceres/internal/ceres/suitesparse.cc b/extern/libmv/third_party/ceres/internal/ceres/suitesparse.cc index d200aeb82f3..87fae75287e 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/suitesparse.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/suitesparse.cc @@ -35,8 +35,18 @@ #include "cholmod.h" #include "ceres/compressed_row_sparse_matrix.h" #include "ceres/triplet_sparse_matrix.h" + namespace ceres { namespace internal { + +SuiteSparse::SuiteSparse() { + cholmod_start(&cc_); +} + +SuiteSparse::~SuiteSparse() { + cholmod_finish(&cc_); +} + cholmod_sparse* SuiteSparse::CreateSparseMatrix(TripletSparseMatrix* A) { cholmod_triplet triplet; @@ -117,10 +127,16 @@ cholmod_factor* SuiteSparse::AnalyzeCholesky(cholmod_sparse* A) { cc_.nmethods = 1; cc_.method[0].ordering = CHOLMOD_AMD; cc_.supernodal = CHOLMOD_AUTO; + cholmod_factor* factor = cholmod_analyze(A, &cc_); CHECK_EQ(cc_.status, CHOLMOD_OK) << "Cholmod symbolic analysis failed " << cc_.status; CHECK_NOTNULL(factor); + + if (VLOG_IS_ON(2)) { + cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_); + } + return factor; } @@ -139,13 +155,20 @@ cholmod_factor* SuiteSparse::AnalyzeCholeskyWithUserOrdering( cholmod_sparse* A, const vector<int>& ordering) { CHECK_EQ(ordering.size(), A->nrow); + cc_.nmethods = 1; cc_.method[0].ordering = CHOLMOD_GIVEN; + cholmod_factor* factor = cholmod_analyze_p(A, const_cast<int*>(&ordering[0]), NULL, 0, &cc_); CHECK_EQ(cc_.status, CHOLMOD_OK) << "Cholmod symbolic analysis failed " << cc_.status; CHECK_NOTNULL(factor); + + if (VLOG_IS_ON(2)) { + cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_); + } + return factor; } @@ -276,36 +299,52 @@ bool SuiteSparse::Cholesky(cholmod_sparse* A, cholmod_factor* L) { CHECK_NOTNULL(A); CHECK_NOTNULL(L); + // Save the current print level and silence CHOLMOD, otherwise + // CHOLMOD is prone to dumping stuff to stderr, which can be + // distracting when the error (matrix is indefinite) is not a fatal + // failure. + const int old_print_level = cc_.print; + cc_.print = 0; + cc_.quick_return_if_not_posdef = 1; int status = cholmod_factorize(A, L, &cc_); + cc_.print = old_print_level; + + // TODO(sameeragarwal): This switch statement is not consistent. It + // treats all kinds of CHOLMOD failures as warnings. Some of these + // like out of memory are definitely not warnings. The problem is + // that the return value Cholesky is two valued, but the state of + // the linear solver is really three valued. SUCCESS, + // NON_FATAL_FAILURE (e.g., indefinite matrix) and FATAL_FAILURE + // (e.g. out of memory). switch (cc_.status) { case CHOLMOD_NOT_INSTALLED: - LOG(WARNING) << "Cholmod failure: method not installed."; + LOG(WARNING) << "CHOLMOD failure: Method not installed."; return false; case CHOLMOD_OUT_OF_MEMORY: - LOG(WARNING) << "Cholmod failure: out of memory."; + LOG(WARNING) << "CHOLMOD failure: Out of memory."; return false; case CHOLMOD_TOO_LARGE: - LOG(WARNING) << "Cholmod failure: integer overflow occured."; + LOG(WARNING) << "CHOLMOD failure: Integer overflow occured."; return false; case CHOLMOD_INVALID: - LOG(WARNING) << "Cholmod failure: invalid input."; + LOG(WARNING) << "CHOLMOD failure: Invalid input."; return false; case CHOLMOD_NOT_POSDEF: // TODO(sameeragarwal): These two warnings require more // sophisticated handling going forward. For now we will be // strict and treat them as failures. - LOG(WARNING) << "Cholmod warning: matrix not positive definite."; + LOG(WARNING) << "CHOLMOD warning: Matrix not positive definite."; return false; case CHOLMOD_DSMALL: - LOG(WARNING) << "Cholmod warning: D for LDL' or diag(L) or " + LOG(WARNING) << "CHOLMOD warning: D for LDL' or diag(L) or " << "LL' has tiny absolute value."; return false; case CHOLMOD_OK: if (status != 0) { return true; } - LOG(WARNING) << "Cholmod failure: cholmod_factorize returned zero " + LOG(WARNING) << "CHOLMOD failure: cholmod_factorize returned zero " << "but cholmod_common::status is CHOLMOD_OK." << "Please report this to ceres-solver@googlegroups.com."; return false; diff --git a/extern/libmv/third_party/ceres/internal/ceres/suitesparse.h b/extern/libmv/third_party/ceres/internal/ceres/suitesparse.h index 3fe79080d5d..5a8ef63bd2b 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/suitesparse.h +++ b/extern/libmv/third_party/ceres/internal/ceres/suitesparse.h @@ -56,8 +56,8 @@ class TripletSparseMatrix; // for all cholmod function calls. class SuiteSparse { public: - SuiteSparse() { cholmod_start(&cc_); } - ~SuiteSparse() { cholmod_finish(&cc_); } + SuiteSparse(); + ~SuiteSparse(); // Functions for building cholmod_sparse objects from sparse // matrices stored in triplet form. The matrix A is not diff --git a/extern/libmv/third_party/ceres/internal/ceres/visibility_based_preconditioner.cc b/extern/libmv/third_party/ceres/internal/ceres/visibility_based_preconditioner.cc index 1678d0072e5..f5c46bfcc68 100644 --- a/extern/libmv/third_party/ceres/internal/ceres/visibility_based_preconditioner.cc +++ b/extern/libmv/third_party/ceres/internal/ceres/visibility_based_preconditioner.cc @@ -426,14 +426,8 @@ bool VisibilityBasedPreconditioner::Factorize() { } else { factor_ = ss_.AnalyzeCholesky(lhs); } - - if (VLOG_IS_ON(2)) { - cholmod_print_common(const_cast<char*>("Symbolic Analysis"), ss_.mutable_cc()); - } } - CHECK_NOTNULL(factor_); - bool status = ss_.Cholesky(lhs, factor_); ss_.Free(lhs); return status; |