Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/extern
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2013-04-05 13:22:54 +0400
committerSergey Sharybin <sergey.vfx@gmail.com>2013-04-05 13:22:54 +0400
commit43b61fb8bd6743072f53d7006d8bebe9ff06caf3 (patch)
tree54cfc05064194bee46eccf974d9cc22324d366eb /extern
parenta00b72bc7503fd1aecdae41a7cb9338f2eb9b35b (diff)
Bundle current master of ceres-solver
Thins brings up some speed improvements: SPARSE_SCHUR is approx 1.3-1.5x times faster ITERATIVE_SCHUR is approx 1.2x times faster For blender this means camera solution go a bit faster now. Would not have affect on tracking speed.
Diffstat (limited to 'extern')
-rw-r--r--extern/libmv/third_party/ceres/CMakeLists.txt1
-rw-r--r--extern/libmv/third_party/ceres/ChangeLog350
-rw-r--r--extern/libmv/third_party/ceres/files.txt1
-rw-r--r--extern/libmv/third_party/ceres/include/ceres/jet.h30
-rw-r--r--extern/libmv/third_party/ceres/internal/ceres/blas.h496
-rw-r--r--extern/libmv/third_party/ceres/internal/ceres/block_sparse_matrix.cc20
-rw-r--r--extern/libmv/third_party/ceres/internal/ceres/partitioned_matrix_view.cc69
-rw-r--r--extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc6
-rw-r--r--extern/libmv/third_party/ceres/internal/ceres/schur_eliminator.h18
-rw-r--r--extern/libmv/third_party/ceres/internal/ceres/schur_eliminator_impl.h254
-rw-r--r--extern/libmv/third_party/ceres/internal/ceres/sparse_normal_cholesky_solver.cc6
-rw-r--r--extern/libmv/third_party/ceres/internal/ceres/suitesparse.cc53
-rw-r--r--extern/libmv/third_party/ceres/internal/ceres/suitesparse.h4
-rw-r--r--extern/libmv/third_party/ceres/internal/ceres/visibility_based_preconditioner.cc6
14 files changed, 911 insertions, 403 deletions
diff --git a/extern/libmv/third_party/ceres/CMakeLists.txt b/extern/libmv/third_party/ceres/CMakeLists.txt
index b71fe1d5cdb..e515280826b 100644
--- a/extern/libmv/third_party/ceres/CMakeLists.txt
+++ b/extern/libmv/third_party/ceres/CMakeLists.txt
@@ -141,6 +141,7 @@ set(SRC
include/ceres/solver.h
include/ceres/types.h
internal/ceres/array_utils.h
+ internal/ceres/blas.h
internal/ceres/block_evaluate_preparer.h
internal/ceres/block_jacobian_writer.h
internal/ceres/block_jacobi_preconditioner.h
diff --git a/extern/libmv/third_party/ceres/ChangeLog b/extern/libmv/third_party/ceres/ChangeLog
index 458a045d8be..46c7b6ac291 100644
--- a/extern/libmv/third_party/ceres/ChangeLog
+++ b/extern/libmv/third_party/ceres/ChangeLog
@@ -1,3 +1,185 @@
+commit 520d35ef22dbcb05e344451c03ae64304e524a06
+Author: Sameer Agarwal <sameeragarwal@google.com>
+Date: Thu Apr 4 08:16:02 2013 -0700
+
+ Further BLAS improvements.
+
+ 1. Switch to Eigen's implementation when all dimensions are fixed.
+ 2. Use lazyProduct for eigen matrix-vector product. This brings
+ eigen's performance on iterative_schur closer to what it used
+ to be before the last commit. There is however still an
+ improvement to be had by using the naive implementation when
+ the matrix and vector have dynamic dimensions.
+
+ BENCHMARK
+ HEAD CHANGE
+
+ problem-16-22106-pre.txt
+ gcc-eigen sparse_schur 0.859 gcc-eigen sparse_schur 0.853
+ clang-eigen sparse_schur 0.848 clang-eigen sparse_schur 0.850
+ gcc-blas sparse_schur 0.956 gcc-blas sparse_schur 0.865
+ clang-blas sparse_schur 0.954 clang-blas sparse_schur 0.858
+ gcc-eigen iterative_schur 4.656 gcc-eigen iterative_schur 3.271
+ clang-eigen iterative_schur 4.664 clang-eigen iterative_schur 3.307
+ gcc-blas iterative_schur 2.598 gcc-blas iterative_schur 2.620
+ clang-blas iterative_schur 2.554 clang-blas iterative_schur 2.567
+
+ problem-49-7776-pre.txt
+ gcc-eigen sparse_schur 0.477 gcc-eigen sparse_schur 0.472
+ clang-eigen sparse_schur 0.475 clang-eigen sparse_schur 0.479
+ gcc-blas sparse_schur 0.521 gcc-blas sparse_schur 0.469
+ clang-blas sparse_schur 0.508 clang-blas sparse_schur 0.471
+ gcc-eigen iterative_schur 3.172 gcc-eigen iterative_schur 2.088
+ clang-eigen iterative_schur 3.161 clang-eigen iterative_schur 2.079
+ gcc-blas iterative_schur 1.701 gcc-blas iterative_schur 1.720
+ clang-blas iterative_schur 1.708 clang-blas iterative_schur 1.694
+
+ problem-245-198739-pre.txt
+ gcc-eigen sparse_schur 28.092 gcc-eigen sparse_schur 28.233
+ clang-eigen sparse_schur 28.148 clang-eigen sparse_schur 28.400
+ gcc-blas sparse_schur 30.919 gcc-blas sparse_schur 28.110
+ clang-blas sparse_schur 31.001 clang-blas sparse_schur 28.407
+ gcc-eigen iterative_schur 63.095 gcc-eigen iterative_schur 43.694
+ clang-eigen iterative_schur 63.412 clang-eigen iterative_schur 43.473
+ gcc-blas iterative_schur 33.353 gcc-blas iterative_schur 33.321
+ clang-blas iterative_schur 33.276 clang-blas iterative_schur 33.278
+
+ problem-257-65132-pre.txt
+ gcc-eigen sparse_schur 3.687 gcc-eigen sparse_schur 3.629
+ clang-eigen sparse_schur 3.669 clang-eigen sparse_schur 3.652
+ gcc-blas sparse_schur 3.947 gcc-blas sparse_schur 3.673
+ clang-blas sparse_schur 3.952 clang-blas sparse_schur 3.678
+ gcc-eigen iterative_schur 121.512 gcc-eigen iterative_schur 76.833
+ clang-eigen iterative_schur 123.547 clang-eigen iterative_schur 78.763
+ gcc-blas iterative_schur 68.334 gcc-blas iterative_schur 68.612
+ clang-blas iterative_schur 67.793 clang-blas iterative_schur 68.266
+
+ Notes:
+
+ 1. Naive BLAS was a bit worse than eigen on fixed sized matrices. We did not see this
+ before because of the different inlining thresholds. Fixing this boosted eigen's
+ performance. Also the disparity between gcc and clang has gone away.
+
+ 2. SPARSE_SCHUR performance remains the same, since it is only testing static sized
+ matrices.
+
+ 3. ITERATIVE_SCHUR performance goes up substantially due to the lazyProduct change,
+ but even there, since most of the products are dynamic sized, the naive implementation
+ wins handily.
+
+ Change-Id: Idc17f35b9c68aaebb1b2e131adf3af8374a85a4c
+
+commit 25ac54807eedf16fd6c34efc390901ee549a7d14
+Author: Sameer Agarwal <sameeragarwal@google.com>
+Date: Wed Apr 3 18:51:27 2013 -0700
+
+ Speed up Jets.
+
+ Change-Id: I101bac1b1a1cf72ca49ffcf843b73c0ef5a6dfcb
+
+commit 3d6eceb45cf27024865908f0c10a5c2b0f8719cf
+Author: Sameer Agarwal <sameeragarwal@google.com>
+Date: Tue Apr 2 21:45:48 2013 -0700
+
+ Replace more instances of Eigen GEMV with Ceres BLAS.
+
+ With this ITERATIVE_SCHUR with JACOBI preconditioner went down from
+ 280 seconds to 150 seconds on problem-744-543562-pre.txt.
+
+ Change-Id: I4f319c1108421e8d59f58654a4c0576ad65df609
+
+commit 296fa9b1279ee1900c8ae32d70e97cd10fc0b46b
+Author: Sameer Agarwal <sameeragarwal@google.com>
+Date: Tue Apr 2 09:44:15 2013 -0700
+
+ Replace Eigen block operations with small GEMM and GEMV loops.
+
+ 1. Add Matrix-Matrix and Matrix-Vector multiply functions.
+ 2. Replace Eigen usage in SchurEliminator with these custom
+ matrix operations.
+ 3. Save on some memory allocations in ChunkOuterProduct.
+ 4. Replace LDLT with LLT.
+
+ As a result on problem-16-22106-pre.txt, the linear solver time
+ goes down from 1.2s to 0.64s.
+
+ Change-Id: I2daa667960e0a1e8834489965a30be31f37fd87f
+
+commit 222ca20e8facf706582fe696b7f41247391eac12
+Author: Sameer Agarwal <sameeragarwal@google.com>
+Date: Mon Apr 1 09:11:07 2013 -0700
+
+ SuiteSparse cleanup.
+
+ 1. Silence CHOLMOD's indefiniteness warnings.
+ 2. Add a comment about how the error handling in suitesparse.cc
+ needs to be improved.
+ 3. Move the analysis logging into suitesparse.cc and out of the
+ three callsites.
+
+ Change-Id: Idd396b8ea4bf59fc1ffc7f9fcbbc7b38ed71643c
+
+commit b7ba93459b7f584eedb1a9f42f3d06bccabd33dc
+Author: Petter Strandmark <petter.strandmark@gmail.com>
+Date: Tue Feb 19 12:52:58 2013 +0100
+
+ Enable larger tuple sizes for Visual Studio 2012.
+
+ Visual Studio 2012 does not have variadic templates and implements
+ tuples differently. By default, only sizes up to 5 are supported,
+ which conflicts with Gtest.
+
+ Change-Id: Ieb8d59e4329863cbfa2729d8a76db0714c08d259
+
+commit 564a83fcc690ac8383bf52a782c45757ae7fa2ad
+Author: Sameer Agarwal <sameeragarwal@google.com>
+Date: Tue Mar 26 11:11:43 2013 -0700
+
+ Lint cleanup from William Rucklidge.
+
+ Change-Id: I8d4a0aa3e264775d20e99a6b5265f3023de92560
+
+commit cbe64827becbbaab5b435a71bf00353b4ddd026b
+Author: Sameer Agarwal <sameeragarwal@google.com>
+Date: Mon Mar 25 17:39:53 2013 -0700
+
+ Update documentation
+
+ Change-Id: Iea3c4b5409e593b1fb070a491ba8a479db32ca58
+
+commit 802639c89603c9541e624766349d1989a1f641c0
+Author: Pablo Speciale <pablo.speciale@gmail.com>
+Date: Mon Mar 25 20:53:45 2013 -0700
+
+ ceresproblem label was incorrect
+
+ Change-Id: I3e210375adba4fa50ff3c25398b20a65d241cb35
+
+commit 6bcb8d9c304a3b218f8788018dfdfe368bb7d60c
+Author: Pablo Speciale <pablo.speciale@gmail.com>
+Date: Mon Mar 25 16:40:26 2013 -0700
+
+ Compiling against static or shared library
+
+ Change-Id: I3fb35e9a49f90b8894f59dde49c90a7c2dd74b0a
+
+commit 619cfe692020c078275b68eac2167232fafdfffb
+Author: Sameer Agarwal <sameeragarwal@google.com>
+Date: Mon Mar 25 14:03:41 2013 -0700
+
+ Update version history
+
+ Change-Id: I1d036efad1507efd19d8581f147b38170b1f0543
+
+commit 6ae34757850a5fa8213e0d1a540d9d75d6840a08
+Author: Pablo Speciale <pablo.speciale@gmail.com>
+Date: Sun Mar 24 22:30:52 2013 -0700
+
+ Added documentation regarding the use of Ceres with cmake (once installed)
+ Commets about the flag ``BUILD_DOCUMENTATION=ON``
+
+ Change-Id: I8814927e60af190c8043bfc36e77fe76bfe6f562
+
commit f46de9e697eb5b8756084615e29ded48600a4d39
Author: Sergey Sharybin <sergey.vfx@gmail.com>
Date: Thu Mar 21 15:31:35 2013 +0600
@@ -437,171 +619,3 @@ Date: Sun Feb 24 14:15:45 2013 -0800
Minor release script fixes.
Change-Id: Ifd0a7f4f584c85d4d9574eca46094b372a8d7aff
-
-commit b53c9667f508c125b8aa833e7a063fa44ef8a98e
-Author: Sergey Sharybin <sergey.vfx@gmail.com>
-Date: Mon Feb 25 01:14:26 2013 +0600
-
- Solve No Previous Prototype GCC warning
-
- In some cases there were missing includes of own
- header files from implementation files.
-
- In other cases moved function which are only used
- within single file into an anonymous namespace.
-
- Change-Id: I2c6b411bcfbc521e2a5f21265dc8e009a548b1c8
-
-commit 267ccc45a3e875bf87832a8ad615be690b4926d3
-Author: Sergey Sharybin <sergey.vfx@gmail.com>
-Date: Mon Feb 25 01:04:16 2013 +0600
-
- Fix for MinGW build on Windows
-
- GG_LONGLONG and GG_ULONGLONG shall use LL and ULL suffixes,
- since MinGW is actuall a GCC compiler.
-
- Solved by checking whether compilation happens with MinGW
- or not using standard MinGW's __MINGW32__ and __MINGW64__
- definitions.
-
- Change-Id: I789b34f6342a56ba42f4b280f7242700022ab7a1
-
-commit 509f68cfe3fd13b794c4e67ff38c761407c858cf
-Author: Sameer Agarwal <sameeragarwal@google.com>
-Date: Wed Feb 20 01:39:03 2013 -0800
-
- Problem::Evaluate implementation.
-
- 1. Add Problem::Evaluate and tests.
- 2. Remove Solver::Summary::initial/final_*
- 3. Remove Solver::Options::return_* members.
- 4. Various cpplint cleanups.
-
- Change-Id: I4266de53489896f72d9c6798c5efde6748d68a47
-
-commit d4a0bf86d688d1b68e00ff302858de5a4e0d9727
-Author: Keir Mierle <mierle@gmail.com>
-Date: Sun Feb 24 10:35:44 2013 -0800
-
- Fix threading build on Windows.
-
- On Windows, including the "windows.h" header defines an enormous number of
- symbols; some of which are macros with common names. In particular, "ERROR" and
- "min" and "max" get defined. This causes clashes when user code references
- these names in a context other than the intended use in windows.h.
-
- To deal with this, the Microsoft engineers added the ability to control the
- definition of these symbols by adding extra defines. In particular, including
- windows.h in the following way
-
- #define NOGDI
- #define NOMINMAX
-
- will reduce the number of macros defined. This way they will not conflict with
- other uses in Ceres. For example, numeric_limits<double>::max() is impossible
- to call without defining NOMINMAX.
-
- Change-Id: I166f5d3bb6dc0e2e4b2ebf800fb19e49206f7874
-
-commit beb4505311011130a7e54632137b0fbb5824cc9b
-Author: Sameer Agarwal <sameeragarwal@google.com>
-Date: Fri Feb 22 13:37:01 2013 -0800
-
- Minor fixes
-
- Based on William Rucklidge's review, including
- a nasty bug in parameter block removal.
-
- Change-Id: I3a692e589f600ff560ecae9fa85bb0b76063d403
-
-commit 9a88bd7c4b40e2a1e0cd9b0dc09a3517c467e04e
-Author: Sameer Agarwal <sameeragarwal@google.com>
-Date: Tue Feb 19 13:09:12 2013 -0800
-
- Minor bug fixes
-
- Change-Id: I94e4521adf76a6c77db954c4a8955168e9d37b55
-
-commit 956ed7e8f2054623615e6ae3601055d613897f26
-Author: Sameer Agarwal <sameeragarwal@google.com>
-Date: Tue Feb 19 07:06:15 2013 -0800
-
- Various minor fixes.
-
- 1. Unused variable warnings and fixes.
- 2. Minor documentation update.
-
- Change-Id: I815588a5806df1030a7c8750f4fb594c503f8998
-
-commit 3e2c4ef9ad35e94198f4f3367b99fd91e26996a1
-Author: Keir Mierle <mierle@gmail.com>
-Date: Sun Feb 17 12:37:55 2013 -0800
-
- Add adapters for column/row-major matrices to rotation.h
-
- This patch introduces a matrix wrapper (MatrixAdapter) that allows to
- transparently pass pointers to row-major or column-major matrices
- to the conversion functions.
-
- Change-Id: I7f1683a8722088cffcc542f593ce7eb46fca109b
-
-commit 04938efe4bedec112083c5ceb227ba004f96bd01
-Author: Keir Mierle <mierle@gmail.com>
-Date: Sun Feb 17 12:37:55 2013 -0800
-
- Add support for removing parameter and residual blocks.
-
- This adds support for removing parameter and residual blocks.
- There are two modes of operation: in the first, removals of
- paremeter blocks are expensive, since each remove requires
- scanning all residual blocks to find ones that depend on the
- removed parameter. In the other, extra memory is sacrificed to
- maintain a list of the residuals a parameter block depends on,
- removing the need to scan. In both cases, removing residual blocks
- is fast.
-
- As a caveat, any removals destroys the ordering of the parameters,
- so the residuals or jacobian returned from Solver::Solve() is
- meaningless. There is some debate on the best way to handle this;
- the details remain for a future change.
-
- This also adds some overhead, even in the case that fast removals
- are not requested:
-
- - 1 int32 to each residual, to track its position in the program.
- - 1 pointer to each parameter, to store the dependent residuals.
-
- Change-Id: I71dcac8656679329a15ee7fc12c0df07030c12af
-
-commit fa21df8cd969bb257b87c9ef7c0147d8d5ea8725
-Author: Sameer Agarwal <sameeragarwal@google.com>
-Date: Mon Feb 18 08:48:52 2013 -0800
-
- Add script for building documentation.
-
- Update make_release
-
- Minor documentation fixes.
-
- Change-Id: I1248ec3f58be66b5929aee6f2aa392c15d53ed83
-
-commit 290b975d1d4eba44205bbeb0fa6b3ce8a6fa4a0c
-Author: Sameer Agarwal <sameeragarwal@google.com>
-Date: Sun Feb 17 16:50:37 2013 -0800
-
- Preconditioner refactoring.
-
- 1. Added a Preconditioner interface.
- 2. SCHUR_JACOBI is now its own class and is independent of
- SuiteSparse.
-
- Change-Id: Id912ab19cf3736e61d1b90ddaf5bfba33e877ec4
-
-commit d010de543530001fa917501a13ba02879c8ea52f
-Author: Sameer Agarwal <sameeragarwal@google.com>
-Date: Fri Feb 15 14:26:56 2013 -0800
-
- Solver::Summary::FullReport() supports line search now.
-
- Change-Id: Ib08d300198b85d9732cfb5785af4235ca4bd5226
diff --git a/extern/libmv/third_party/ceres/files.txt b/extern/libmv/third_party/ceres/files.txt
index 8f4b7b97b50..0c60e5a055a 100644
--- a/extern/libmv/third_party/ceres/files.txt
+++ b/extern/libmv/third_party/ceres/files.txt
@@ -31,6 +31,7 @@ include/ceres/solver.h
include/ceres/types.h
internal/ceres/array_utils.cc
internal/ceres/array_utils.h
+internal/ceres/blas.h
internal/ceres/block_evaluate_preparer.cc
internal/ceres/block_evaluate_preparer.h
internal/ceres/block_jacobian_writer.cc
diff --git a/extern/libmv/third_party/ceres/include/ceres/jet.h b/extern/libmv/third_party/ceres/include/ceres/jet.h
index 96e2256fd02..1238123a8fb 100644
--- a/extern/libmv/third_party/ceres/include/ceres/jet.h
+++ b/extern/libmv/third_party/ceres/include/ceres/jet.h
@@ -349,7 +349,11 @@ Jet<T, N> operator/(const Jet<T, N>& f,
//
// which holds because v*v = 0.
h.a = f.a / g.a;
- h.v = (f.v - f.a / g.a * g.v) / g.a;
+ const T g_a_inverse = 1.0 / g.a;
+ const T f_a_by_g_a = f.a * g_a_inverse;
+ for (int i = 0; i < N; ++i) {
+ h.v[i] = (f.v[i] - f_a_by_g_a * g.v[i]) * g_a_inverse;
+ }
return h;
}
@@ -358,7 +362,8 @@ template<typename T, int N> inline
Jet<T, N> operator/(T s, const Jet<T, N>& g) {
Jet<T, N> h;
h.a = s / g.a;
- h.v = - s * g.v / (g.a * g.a);
+ const T minus_s_g_a_inverse2 = -s / (g.a * g.a);
+ h.v = g.v * minus_s_g_a_inverse2;
return h;
}
@@ -366,8 +371,9 @@ Jet<T, N> operator/(T s, const Jet<T, N>& g) {
template<typename T, int N> inline
Jet<T, N> operator/(const Jet<T, N>& f, T s) {
Jet<T, N> h;
- h.a = f.a / s;
- h.v = f.v / s;
+ const T s_inverse = 1.0 / s;
+ h.a = f.a * s_inverse;
+ h.v = f.v * s_inverse;
return h;
}
@@ -425,7 +431,8 @@ template <typename T, int N> inline
Jet<T, N> log(const Jet<T, N>& f) {
Jet<T, N> g;
g.a = log(f.a);
- g.v = f.v / f.a;
+ const T a_inverse = T(1.0) / f.a;
+ g.v = f.v * a_inverse;
return g;
}
@@ -443,7 +450,8 @@ template <typename T, int N> inline
Jet<T, N> sqrt(const Jet<T, N>& f) {
Jet<T, N> g;
g.a = sqrt(f.a);
- g.v = f.v / (T(2.0) * g.a);
+ const T two_a_inverse = 1.0 / (T(2.0) * g.a);
+ g.v = f.v * two_a_inverse;
return g;
}
@@ -452,7 +460,7 @@ template <typename T, int N> inline
Jet<T, N> cos(const Jet<T, N>& f) {
Jet<T, N> g;
g.a = cos(f.a);
- T sin_a = sin(f.a);
+ const T sin_a = sin(f.a);
g.v = - sin_a * f.v;
return g;
}
@@ -462,7 +470,8 @@ template <typename T, int N> inline
Jet<T, N> acos(const Jet<T, N>& f) {
Jet<T, N> g;
g.a = acos(f.a);
- g.v = - T(1.0) / sqrt(T(1.0) - f.a * f.a) * f.v;
+ const T tmp = - T(1.0) / sqrt(T(1.0) - f.a * f.a);
+ g.v = tmp * f.v;
return g;
}
@@ -471,7 +480,7 @@ template <typename T, int N> inline
Jet<T, N> sin(const Jet<T, N>& f) {
Jet<T, N> g;
g.a = sin(f.a);
- T cos_a = cos(f.a);
+ const T cos_a = cos(f.a);
g.v = cos_a * f.v;
return g;
}
@@ -481,7 +490,8 @@ template <typename T, int N> inline
Jet<T, N> asin(const Jet<T, N>& f) {
Jet<T, N> g;
g.a = asin(f.a);
- g.v = T(1.0) / sqrt(T(1.0) - f.a * f.a) * f.v;
+ const T tmp = T(1.0) / sqrt(T(1.0) - f.a * f.a);
+ g.v = tmp * f.v;
return g;
}
diff --git a/extern/libmv/third_party/ceres/internal/ceres/blas.h b/extern/libmv/third_party/ceres/internal/ceres/blas.h
new file mode 100644
index 00000000000..12667d09e55
--- /dev/null
+++ b/extern/libmv/third_party/ceres/internal/ceres/blas.h
@@ -0,0 +1,496 @@
+// Ceres Solver - A fast non-linear least squares minimizer
+// Copyright 2013 Google Inc. All rights reserved.
+// http://code.google.com/p/ceres-solver/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice,
+// this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above copyright notice,
+// this list of conditions and the following disclaimer in the documentation
+// and/or other materials provided with the distribution.
+// * Neither the name of Google Inc. nor the names of its contributors may be
+// used to endorse or promote products derived from this software without
+// specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+// Author: sameeragarwal@google.com (Sameer Agarwal)
+//
+// Simple blas functions for use in the Schur Eliminator. These are
+// fairly basic implementations which already yield a significant
+// speedup in the eliminator performance.
+
+#ifndef CERES_INTERNAL_BLAS_H_
+#define CERES_INTERNAL_BLAS_H_
+
+#include "ceres/internal/eigen.h"
+#include "glog/logging.h"
+
+namespace ceres {
+namespace internal {
+
+// Remove the ".noalias()" annotation from the matrix matrix
+// mutliplies to produce a correct build with the Android NDK,
+// including versions 6, 7, 8, and 8b, when built with STLPort and the
+// non-standalone toolchain (i.e. ndk-build). This appears to be a
+// compiler bug; if the workaround is not in place, the line
+//
+// block.noalias() -= A * B;
+//
+// gets compiled to
+//
+// block.noalias() += A * B;
+//
+// which breaks schur elimination. Introducing a temporary by removing the
+// .noalias() annotation causes the issue to disappear. Tracking this
+// issue down was tricky, since the test suite doesn't run when built with
+// the non-standalone toolchain.
+//
+// TODO(keir): Make a reproduction case for this and send it upstream.
+#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
+#define CERES_MAYBE_NOALIAS
+#else
+#define CERES_MAYBE_NOALIAS .noalias()
+#endif
+
+// For the matrix-matrix functions below, there are three functions
+// for each functionality. Foo, FooNaive and FooEigen. Foo is the one
+// to be called by the user. FooNaive is a basic loop based
+// implementation and FooEigen uses Eigen's implementation. Foo
+// chooses between FooNaive and FooEigen depending on how many of the
+// template arguments are fixed at compile time. Currently, FooEigen
+// is called if all matrix dimenions are compile time
+// constants. FooNaive is called otherwise. This leads to the best
+// performance currently.
+//
+// TODO(sameeragarwal): Benchmark and simplify the matrix-vector
+// functions.
+
+// C op A * B;
+//
+// where op can be +=, -=, or =.
+//
+// The template parameters (kRowA, kColA, kRowB, kColB) allow
+// specialization of the loop at compile time. If this information is
+// not available, then Eigen::Dynamic should be used as the template
+// argument.
+//
+// kOperation = 1 -> C += A * B
+// kOperation = -1 -> C -= A * B
+// kOperation = 0 -> C = A * B
+//
+// The function can write into matrices C which are larger than the
+// matrix A * B. This is done by specifying the true size of C via
+// row_stride_c and col_stride_c, and then indicating where A * B
+// should be written into by start_row_c and start_col_c.
+//
+// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c =
+// 4 and start_col_c = 5, then if A = 3x2 and B = 2x4, we get
+//
+// ------------
+// ------------
+// ------------
+// ------------
+// -----xxxx---
+// -----xxxx---
+// -----xxxx---
+// ------------
+// ------------
+// ------------
+//
+template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
+inline void MatrixMatrixMultiplyEigen(const double* A,
+ const int num_row_a,
+ const int num_col_a,
+ const double* B,
+ const int num_row_b,
+ const int num_col_b,
+ double* C,
+ const int start_row_c,
+ const int start_col_c,
+ const int row_stride_c,
+ const int col_stride_c) {
+ const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
+ const typename EigenTypes<kRowB, kColB>::ConstMatrixRef Bref(B, num_row_b, num_col_b);
+ MatrixRef Cref(C, row_stride_c, col_stride_c);
+ Eigen::Block<MatrixRef, kRowA, kColB> block(Cref,
+ start_row_c, start_col_c,
+ num_row_a, num_col_b);
+ if (kOperation > 0) {
+ block CERES_MAYBE_NOALIAS += Aref * Bref;
+ } else if (kOperation < 0) {
+ block CERES_MAYBE_NOALIAS -= Aref * Bref;
+ } else {
+ block CERES_MAYBE_NOALIAS = Aref * Bref;
+ }
+}
+
+template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
+inline void MatrixMatrixMultiplyNaive(const double* A,
+ const int num_row_a,
+ const int num_col_a,
+ const double* B,
+ const int num_row_b,
+ const int num_col_b,
+ double* C,
+ const int start_row_c,
+ const int start_col_c,
+ const int row_stride_c,
+ const int col_stride_c) {
+ DCHECK_GT(num_row_a, 0);
+ DCHECK_GT(num_col_a, 0);
+ DCHECK_GT(num_row_b, 0);
+ DCHECK_GT(num_col_b, 0);
+ DCHECK_GE(start_row_c, 0);
+ DCHECK_GE(start_col_c, 0);
+ DCHECK_GT(row_stride_c, 0);
+ DCHECK_GT(col_stride_c, 0);
+
+ DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
+ DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
+ DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b));
+ DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b));
+
+ const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
+ const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
+ const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b);
+ const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b);
+ DCHECK_EQ(NUM_COL_A, NUM_ROW_B);
+
+ const int NUM_ROW_C = NUM_ROW_A;
+ const int NUM_COL_C = NUM_COL_B;
+ DCHECK_LT(start_row_c + NUM_ROW_C, row_stride_c);
+ DCHECK_LT(start_col_c + NUM_COL_C, col_stride_c);
+
+ for (int row = 0; row < NUM_ROW_C; ++row) {
+ for (int col = 0; col < NUM_COL_C; ++col) {
+ double tmp = 0.0;
+ for (int k = 0; k < NUM_COL_A; ++k) {
+ tmp += A[row * NUM_COL_A + k] * B[k * NUM_COL_B + col];
+ }
+
+ const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
+ if (kOperation > 0) {
+ C[index] += tmp;
+ } else if (kOperation < 0) {
+ C[index] -= tmp;
+ } else {
+ C[index] = tmp;
+ }
+ }
+ }
+}
+
+template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
+inline void MatrixMatrixMultiply(const double* A,
+ const int num_row_a,
+ const int num_col_a,
+ const double* B,
+ const int num_row_b,
+ const int num_col_b,
+ double* C,
+ const int start_row_c,
+ const int start_col_c,
+ const int row_stride_c,
+ const int col_stride_c) {
+#ifdef CERES_NO_CUSTOM_BLAS
+ MatrixMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
+ A, num_row_a, num_col_a,
+ B, num_row_b, num_col_b,
+ C, start_row_c, start_col_c, row_stride_c, col_stride_c);
+ return;
+
+#else
+
+ if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
+ kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
+ MatrixMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
+ A, num_row_a, num_col_a,
+ B, num_row_b, num_col_b,
+ C, start_row_c, start_col_c, row_stride_c, col_stride_c);
+ } else {
+ MatrixMatrixMultiplyNaive<kRowA, kColA, kRowB, kColB, kOperation>(
+ A, num_row_a, num_col_a,
+ B, num_row_b, num_col_b,
+ C, start_row_c, start_col_c, row_stride_c, col_stride_c);
+ }
+
+#endif
+}
+
+
+// C op A' * B;
+//
+// where op can be +=, -=, or =.
+//
+// The template parameters (kRowA, kColA, kRowB, kColB) allow
+// specialization of the loop at compile time. If this information is
+// not available, then Eigen::Dynamic should be used as the template
+// argument.
+//
+// kOperation = 1 -> C += A' * B
+// kOperation = -1 -> C -= A' * B
+// kOperation = 0 -> C = A' * B
+//
+// The function can write into matrices C which are larger than the
+// matrix A' * B. This is done by specifying the true size of C via
+// row_stride_c and col_stride_c, and then indicating where A * B
+// should be written into by start_row_c and start_col_c.
+//
+// Graphically if row_stride_c = 10, col_stride_c = 12, start_row_c =
+// 4 and start_col_c = 5, then if A = 2x3 and B = 2x4, we get
+//
+// ------------
+// ------------
+// ------------
+// ------------
+// -----xxxx---
+// -----xxxx---
+// -----xxxx---
+// ------------
+// ------------
+// ------------
+//
+template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
+inline void MatrixTransposeMatrixMultiplyEigen(const double* A,
+ const int num_row_a,
+ const int num_col_a,
+ const double* B,
+ const int num_row_b,
+ const int num_col_b,
+ double* C,
+ const int start_row_c,
+ const int start_col_c,
+ const int row_stride_c,
+ const int col_stride_c) {
+ const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
+ const typename EigenTypes<kRowB, kColB>::ConstMatrixRef Bref(B, num_row_b, num_col_b);
+ MatrixRef Cref(C, row_stride_c, col_stride_c);
+ Eigen::Block<MatrixRef, kColA, kColB> block(Cref,
+ start_row_c, start_col_c,
+ num_col_a, num_col_b);
+ if (kOperation > 0) {
+ block CERES_MAYBE_NOALIAS += Aref.transpose() * Bref;
+ } else if (kOperation < 0) {
+ block CERES_MAYBE_NOALIAS -= Aref.transpose() * Bref;
+ } else {
+ block CERES_MAYBE_NOALIAS = Aref.transpose() * Bref;
+ }
+}
+
+template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
+inline void MatrixTransposeMatrixMultiplyNaive(const double* A,
+ const int num_row_a,
+ const int num_col_a,
+ const double* B,
+ const int num_row_b,
+ const int num_col_b,
+ double* C,
+ const int start_row_c,
+ const int start_col_c,
+ const int row_stride_c,
+ const int col_stride_c) {
+ DCHECK_GT(num_row_a, 0);
+ DCHECK_GT(num_col_a, 0);
+ DCHECK_GT(num_row_b, 0);
+ DCHECK_GT(num_col_b, 0);
+ DCHECK_GE(start_row_c, 0);
+ DCHECK_GE(start_col_c, 0);
+ DCHECK_GT(row_stride_c, 0);
+ DCHECK_GT(col_stride_c, 0);
+
+ DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
+ DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
+ DCHECK((kRowB == Eigen::Dynamic) || (kRowB == num_row_b));
+ DCHECK((kColB == Eigen::Dynamic) || (kColB == num_col_b));
+
+ const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
+ const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
+ const int NUM_ROW_B = (kColB != Eigen::Dynamic ? kRowB : num_row_b);
+ const int NUM_COL_B = (kColB != Eigen::Dynamic ? kColB : num_col_b);
+ DCHECK_EQ(NUM_ROW_A, NUM_ROW_B);
+
+ const int NUM_ROW_C = NUM_COL_A;
+ const int NUM_COL_C = NUM_COL_B;
+ DCHECK_LT(start_row_c + NUM_ROW_C, row_stride_c);
+ DCHECK_LT(start_col_c + NUM_COL_C, col_stride_c);
+
+ for (int row = 0; row < NUM_ROW_C; ++row) {
+ for (int col = 0; col < NUM_COL_C; ++col) {
+ double tmp = 0.0;
+ for (int k = 0; k < NUM_ROW_A; ++k) {
+ tmp += A[k * NUM_COL_A + row] * B[k * NUM_COL_B + col];
+ }
+
+ const int index = (row + start_row_c) * col_stride_c + start_col_c + col;
+ if (kOperation > 0) {
+ C[index]+= tmp;
+ } else if (kOperation < 0) {
+ C[index]-= tmp;
+ } else {
+ C[index]= tmp;
+ }
+ }
+ }
+}
+
+template<int kRowA, int kColA, int kRowB, int kColB, int kOperation>
+inline void MatrixTransposeMatrixMultiply(const double* A,
+ const int num_row_a,
+ const int num_col_a,
+ const double* B,
+ const int num_row_b,
+ const int num_col_b,
+ double* C,
+ const int start_row_c,
+ const int start_col_c,
+ const int row_stride_c,
+ const int col_stride_c) {
+#ifdef CERES_NO_CUSTOM_BLAS
+ MatrixTransposeMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
+ A, num_row_a, num_col_a,
+ B, num_row_b, num_col_b,
+ C, start_row_c, start_col_c, row_stride_c, col_stride_c);
+ return;
+
+#else
+
+ if (kRowA != Eigen::Dynamic && kColA != Eigen::Dynamic &&
+ kRowB != Eigen::Dynamic && kColB != Eigen::Dynamic) {
+ MatrixTransposeMatrixMultiplyEigen<kRowA, kColA, kRowB, kColB, kOperation>(
+ A, num_row_a, num_col_a,
+ B, num_row_b, num_col_b,
+ C, start_row_c, start_col_c, row_stride_c, col_stride_c);
+ } else {
+ MatrixTransposeMatrixMultiplyNaive<kRowA, kColA, kRowB, kColB, kOperation>(
+ A, num_row_a, num_col_a,
+ B, num_row_b, num_col_b,
+ C, start_row_c, start_col_c, row_stride_c, col_stride_c);
+ }
+
+#endif
+}
+
+template<int kRowA, int kColA, int kOperation>
+inline void MatrixVectorMultiply(const double* A,
+ const int num_row_a,
+ const int num_col_a,
+ const double* b,
+ double* c) {
+#ifdef CERES_NO_CUSTOM_BLAS
+ const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
+ const typename EigenTypes<kColA>::ConstVectorRef bref(b, num_col_a);
+ typename EigenTypes<kRowA>::VectorRef cref(c, num_row_a);
+
+ // lazyProduct works better than .noalias() for matrix-vector
+ // products.
+ if (kOperation > 0) {
+ cref += Aref.lazyProduct(bref);
+ } else if (kOperation < 0) {
+ cref -= Aref.lazyProduct(bref);
+ } else {
+ cref -= Aref.lazyProduct(bref);
+ }
+#else
+
+ DCHECK_GT(num_row_a, 0);
+ DCHECK_GT(num_col_a, 0);
+ DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
+ DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
+
+ const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
+ const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
+
+ for (int row = 0; row < NUM_ROW_A; ++row) {
+ double tmp = 0.0;
+ for (int col = 0; col < NUM_COL_A; ++col) {
+ tmp += A[row * NUM_COL_A + col] * b[col];
+ }
+
+ if (kOperation > 0) {
+ c[row] += tmp;
+ } else if (kOperation < 0) {
+ c[row] -= tmp;
+ } else {
+ c[row] = tmp;
+ }
+ }
+#endif // CERES_NO_CUSTOM_BLAS
+}
+
+// c op A' * b;
+//
+// where op can be +=, -=, or =.
+//
+// The template parameters (kRowA, kColA) allow specialization of the
+// loop at compile time. If this information is not available, then
+// Eigen::Dynamic should be used as the template argument.
+//
+// kOperation = 1 -> c += A' * b
+// kOperation = -1 -> c -= A' * b
+// kOperation = 0 -> c = A' * b
+template<int kRowA, int kColA, int kOperation>
+inline void MatrixTransposeVectorMultiply(const double* A,
+ const int num_row_a,
+ const int num_col_a,
+ const double* b,
+ double* c) {
+#ifdef CERES_NO_CUSTOM_BLAS
+ const typename EigenTypes<kRowA, kColA>::ConstMatrixRef Aref(A, num_row_a, num_col_a);
+ const typename EigenTypes<kRowA>::ConstVectorRef bref(b, num_row_a);
+ typename EigenTypes<kColA>::VectorRef cref(c, num_col_a);
+
+ // lazyProduct works better than .noalias() for matrix-vector
+ // products.
+ if (kOperation > 0) {
+ cref += Aref.transpose().lazyProduct(bref);
+ } else if (kOperation < 0) {
+ cref -= Aref.transpose().lazyProduct(bref);
+ } else {
+ cref -= Aref.transpose().lazyProduct(bref);
+ }
+#else
+
+ DCHECK_GT(num_row_a, 0);
+ DCHECK_GT(num_col_a, 0);
+ DCHECK((kRowA == Eigen::Dynamic) || (kRowA == num_row_a));
+ DCHECK((kColA == Eigen::Dynamic) || (kColA == num_col_a));
+
+ const int NUM_ROW_A = (kRowA != Eigen::Dynamic ? kRowA : num_row_a);
+ const int NUM_COL_A = (kColA != Eigen::Dynamic ? kColA : num_col_a);
+
+ for (int row = 0; row < NUM_COL_A; ++row) {
+ double tmp = 0.0;
+ for (int col = 0; col < NUM_ROW_A; ++col) {
+ tmp += A[col * NUM_COL_A + row] * b[col];
+ }
+
+ if (kOperation > 0) {
+ c[row] += tmp;
+ } else if (kOperation < 0) {
+ c[row] -= tmp;
+ } else {
+ c[row] = tmp;
+ }
+ }
+#endif // CERES_NO_CUSTOM_BLAS
+}
+
+#undef CERES_MAYBE_NOALIAS
+
+} // namespace internal
+} // namespace ceres
+
+#endif // CERES_INTERNAL_BLAS_H_
diff --git a/extern/libmv/third_party/ceres/internal/ceres/block_sparse_matrix.cc b/extern/libmv/third_party/ceres/internal/ceres/block_sparse_matrix.cc
index dbe5ec93ef0..ab6fcef1945 100644
--- a/extern/libmv/third_party/ceres/internal/ceres/block_sparse_matrix.cc
+++ b/extern/libmv/third_party/ceres/internal/ceres/block_sparse_matrix.cc
@@ -33,6 +33,7 @@
#include <cstddef>
#include <algorithm>
#include <vector>
+#include "ceres/blas.h"
#include "ceres/block_structure.h"
#include "ceres/internal/eigen.h"
#include "ceres/matrix_proto.h"
@@ -117,16 +118,15 @@ void BlockSparseMatrix::RightMultiply(const double* x, double* y) const {
for (int i = 0; i < block_structure_->rows.size(); ++i) {
int row_block_pos = block_structure_->rows[i].block.position;
int row_block_size = block_structure_->rows[i].block.size;
- VectorRef yref(y + row_block_pos, row_block_size);
const vector<Cell>& cells = block_structure_->rows[i].cells;
for (int j = 0; j < cells.size(); ++j) {
int col_block_id = cells[j].block_id;
int col_block_size = block_structure_->cols[col_block_id].size;
int col_block_pos = block_structure_->cols[col_block_id].position;
- ConstVectorRef xref(x + col_block_pos, col_block_size);
- MatrixRef m(values_.get() + cells[j].position,
- row_block_size, col_block_size);
- yref += m.lazyProduct(xref);
+ MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
+ values_.get() + cells[j].position, row_block_size, col_block_size,
+ x + col_block_pos,
+ y + row_block_pos);
}
}
}
@@ -138,16 +138,16 @@ void BlockSparseMatrix::LeftMultiply(const double* x, double* y) const {
for (int i = 0; i < block_structure_->rows.size(); ++i) {
int row_block_pos = block_structure_->rows[i].block.position;
int row_block_size = block_structure_->rows[i].block.size;
- const ConstVectorRef xref(x + row_block_pos, row_block_size);
const vector<Cell>& cells = block_structure_->rows[i].cells;
for (int j = 0; j < cells.size(); ++j) {
int col_block_id = cells[j].block_id;
int col_block_size = block_structure_->cols[col_block_id].size;
int col_block_pos = block_structure_->cols[col_block_id].position;
- VectorRef yref(y + col_block_pos, col_block_size);
- MatrixRef m(values_.get() + cells[j].position,
- row_block_size, col_block_size);
- yref += m.transpose().lazyProduct(xref);
+ MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
+ values_.get() + cells[j].position, row_block_size, col_block_size,
+ x + row_block_pos,
+ y + col_block_pos);
+
}
}
}
diff --git a/extern/libmv/third_party/ceres/internal/ceres/partitioned_matrix_view.cc b/extern/libmv/third_party/ceres/internal/ceres/partitioned_matrix_view.cc
index 0722fc82c02..c488184ac93 100644
--- a/extern/libmv/third_party/ceres/internal/ceres/partitioned_matrix_view.cc
+++ b/extern/libmv/third_party/ceres/internal/ceres/partitioned_matrix_view.cc
@@ -35,6 +35,7 @@
#include <algorithm>
#include <cstring>
#include <vector>
+#include "ceres/blas.h"
#include "ceres/block_sparse_matrix.h"
#include "ceres/block_structure.h"
#include "ceres/internal/eigen.h"
@@ -103,13 +104,10 @@ void PartitionedMatrixView::RightMultiplyE(const double* x, double* y) const {
const int col_block_id = cell.block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
-
- ConstVectorRef xref(x + col_block_pos, col_block_size);
- VectorRef yref(y + row_block_pos, row_block_size);
- ConstMatrixRef m(row_values + cell.position,
- row_block_size,
- col_block_size);
- yref += m.lazyProduct(xref);
+ MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
+ row_values + cell.position, row_block_size, col_block_size,
+ x + col_block_pos,
+ y + row_block_pos);
}
}
@@ -124,20 +122,16 @@ void PartitionedMatrixView::RightMultiplyF(const double* x, double* y) const {
for (int r = 0; r < bs->rows.size(); ++r) {
const int row_block_pos = bs->rows[r].block.position;
const int row_block_size = bs->rows[r].block.size;
- VectorRef yref(y + row_block_pos, row_block_size);
const vector<Cell>& cells = bs->rows[r].cells;
for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
const double* row_values = matrix_.RowBlockValues(r);
const int col_block_id = cells[c].block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
-
- ConstVectorRef xref(x + col_block_pos - num_cols_e(),
- col_block_size);
- ConstMatrixRef m(row_values + cells[c].position,
- row_block_size,
- col_block_size);
- yref += m.lazyProduct(xref);
+ MatrixVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
+ row_values + cells[c].position, row_block_size, col_block_size,
+ x + col_block_pos - num_cols_e(),
+ y + row_block_pos);
}
}
}
@@ -155,13 +149,10 @@ void PartitionedMatrixView::LeftMultiplyE(const double* x, double* y) const {
const int col_block_id = cell.block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
-
- ConstVectorRef xref(x + row_block_pos, row_block_size);
- VectorRef yref(y + col_block_pos, col_block_size);
- ConstMatrixRef m(row_values + cell.position,
- row_block_size,
- col_block_size);
- yref += m.transpose().lazyProduct(xref);
+ MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
+ row_values + cell.position, row_block_size, col_block_size,
+ x + row_block_pos,
+ y + col_block_pos);
}
}
@@ -176,19 +167,16 @@ void PartitionedMatrixView::LeftMultiplyF(const double* x, double* y) const {
for (int r = 0; r < bs->rows.size(); ++r) {
const int row_block_pos = bs->rows[r].block.position;
const int row_block_size = bs->rows[r].block.size;
- ConstVectorRef xref(x + row_block_pos, row_block_size);
const vector<Cell>& cells = bs->rows[r].cells;
for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
const double* row_values = matrix_.RowBlockValues(r);
const int col_block_id = cells[c].block_id;
const int col_block_pos = bs->cols[col_block_id].position;
const int col_block_size = bs->cols[col_block_id].size;
-
- VectorRef yref(y + col_block_pos - num_cols_e(), col_block_size);
- ConstMatrixRef m(row_values + cells[c].position,
- row_block_size,
- col_block_size);
- yref += m.transpose().lazyProduct(xref);
+ MatrixTransposeVectorMultiply<Eigen::Dynamic, Eigen::Dynamic, 1>(
+ row_values + cells[c].position, row_block_size, col_block_size,
+ x + row_block_pos,
+ y + col_block_pos - num_cols_e());
}
}
}
@@ -267,15 +255,15 @@ void PartitionedMatrixView::UpdateBlockDiagonalEtE(
const int row_block_size = bs->rows[r].block.size;
const int block_id = cell.block_id;
const int col_block_size = bs->cols[block_id].size;
- ConstMatrixRef m(row_values + cell.position,
- row_block_size,
- col_block_size);
-
const int cell_position =
block_diagonal_structure->rows[block_id].cells[0].position;
- MatrixRef(block_diagonal->mutable_values() + cell_position,
- col_block_size, col_block_size).noalias() += m.transpose() * m;
+ MatrixTransposeMatrixMultiply
+ <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
+ row_values + cell.position, row_block_size, col_block_size,
+ row_values + cell.position, row_block_size, col_block_size,
+ block_diagonal->mutable_values() + cell_position,
+ 0, 0, col_block_size, col_block_size);
}
}
@@ -298,15 +286,16 @@ void PartitionedMatrixView::UpdateBlockDiagonalFtF(
for (int c = (r < num_row_blocks_e_) ? 1 : 0; c < cells.size(); ++c) {
const int col_block_id = cells[c].block_id;
const int col_block_size = bs->cols[col_block_id].size;
- ConstMatrixRef m(row_values + cells[c].position,
- row_block_size,
- col_block_size);
const int diagonal_block_id = col_block_id - num_col_blocks_e_;
const int cell_position =
block_diagonal_structure->rows[diagonal_block_id].cells[0].position;
- MatrixRef(block_diagonal->mutable_values() + cell_position,
- col_block_size, col_block_size).noalias() += m.transpose() * m;
+ MatrixTransposeMatrixMultiply
+ <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
+ row_values + cells[c].position, row_block_size, col_block_size,
+ row_values + cells[c].position, row_block_size, col_block_size,
+ block_diagonal->mutable_values() + cell_position,
+ 0, 0, col_block_size, col_block_size);
}
}
}
diff --git a/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc b/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc
index e1be4e2a78a..875f4551044 100644
--- a/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc
+++ b/extern/libmv/third_party/ceres/internal/ceres/schur_complement_solver.cc
@@ -291,14 +291,8 @@ bool SparseSchurComplementSolver::SolveReducedLinearSystemUsingSuiteSparse(
} else {
factor_ = ss_.AnalyzeCholesky(cholmod_lhs);
}
-
- if (VLOG_IS_ON(2)) {
- cholmod_print_common(const_cast<char*>("Symbolic Analysis"), ss_.mutable_cc());
- }
}
- CHECK_NOTNULL(factor_);
-
cholmod_dense* cholmod_solution =
ss_.SolveCholesky(cholmod_lhs, factor_, cholmod_rhs);
diff --git a/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator.h b/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator.h
index 877621bb48c..e0c7fdafcb6 100644
--- a/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator.h
+++ b/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator.h
@@ -321,9 +321,25 @@ class SchurEliminator : public SchurEliminatorBase {
// see the documentation of the Chunk object above.
vector<Chunk> chunks_;
+ // TODO(sameeragarwal): The following two arrays contain per-thread
+ // storage. They should be refactored into a per thread struct.
+
// Buffer to store the products of the y and z blocks generated
- // during the elimination phase.
+ // during the elimination phase. buffer_ is of size num_threads *
+ // buffer_size_. Each thread accesses the chunk
+ //
+ // [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_]
+ //
scoped_array<double> buffer_;
+
+ // Buffer to store per thread matrix matrix products used by
+ // ChunkOuterProduct. Like buffer_ it is of size num_threads *
+ // buffer_size_. Each thread accesses the chunk
+ //
+ // [thread_id * buffer_size_ , (thread_id + 1) * buffer_size_]
+ //
+ scoped_array<double> chunk_outer_product_buffer_;
+
int buffer_size_;
int num_threads_;
int uneliminated_row_begins_;
diff --git a/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator_impl.h b/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator_impl.h
index 339c44bc41c..b46eab92c34 100644
--- a/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator_impl.h
+++ b/extern/libmv/third_party/ceres/internal/ceres/schur_eliminator_impl.h
@@ -34,10 +34,6 @@
#ifndef CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
#define CERES_INTERNAL_SCHUR_ELIMINATOR_IMPL_H_
-#ifdef CERES_USE_OPENMP
-#include <omp.h>
-#endif
-
// Eigen has an internal threshold switching between different matrix
// multiplication algorithms. In particular for matrices larger than
// EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD it uses a cache friendly
@@ -46,19 +42,25 @@
// are thin and long, the default choice may not be optimal. This is
// the case for us, as the default choice causes a 30% performance
// regression when we moved from Eigen2 to Eigen3.
+
#define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 10
+#ifdef CERES_USE_OPENMP
+#include <omp.h>
+#endif
+
#include <algorithm>
#include <map>
#include "Eigen/Dense"
+#include "ceres/blas.h"
#include "ceres/block_random_access_matrix.h"
#include "ceres/block_sparse_matrix.h"
#include "ceres/block_structure.h"
+#include "ceres/internal/eigen.h"
+#include "ceres/internal/scoped_ptr.h"
#include "ceres/map_util.h"
#include "ceres/schur_eliminator.h"
#include "ceres/stl_util.h"
-#include "ceres/internal/eigen.h"
-#include "ceres/internal/scoped_ptr.h"
#include "glog/logging.h"
namespace ceres {
@@ -149,13 +151,16 @@ Init(int num_eliminate_blocks, const CompressedRowBlockStructure* bs) {
buffer_.reset(new double[buffer_size_ * num_threads_]);
+ // chunk_outer_product_buffer_ only needs to store e_block_size *
+ // f_block_size, which is always less than buffer_size_, so we just
+ // allocate buffer_size_ per thread.
+ chunk_outer_product_buffer_.reset(new double[buffer_size_ * num_threads_]);
+
STLDeleteElements(&rhs_locks_);
rhs_locks_.resize(num_col_blocks - num_eliminate_blocks_);
for (int i = 0; i < num_col_blocks - num_eliminate_blocks_; ++i) {
rhs_locks_[i] = new Mutex;
}
-
- VLOG(1) << "Eliminator threads: " << num_threads_;
}
template <int kRowBlockSize, int kEBlockSize, int kFBlockSize>
@@ -261,7 +266,7 @@ Eliminate(const BlockSparseMatrixBase* A,
typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix inverse_ete =
ete
.template selfadjointView<Eigen::Upper>()
- .ldlt()
+ .llt()
.solve(Matrix::Identity(e_block_size, e_block_size));
// For the current chunk compute and update the rhs of the reduced
@@ -294,8 +299,8 @@ BackSubstitute(const BlockSparseMatrixBase* A,
const int e_block_id = bs->rows[chunk.start].cells.front().block_id;
const int e_block_size = bs->cols[e_block_id].size;
- typename EigenTypes<kEBlockSize>::VectorRef y_block(
- y + bs->cols[e_block_id].position, e_block_size);
+ double* y_ptr = y + bs->cols[e_block_id].position;
+ typename EigenTypes<kEBlockSize>::VectorRef y_block(y_ptr, e_block_size);
typename EigenTypes<kEBlockSize, kEBlockSize>::Matrix
ete(e_block_size, e_block_size);
@@ -312,40 +317,35 @@ BackSubstitute(const BlockSparseMatrixBase* A,
const double* row_values = A->RowBlockValues(chunk.start + j);
const Cell& e_cell = row.cells.front();
DCHECK_EQ(e_block_id, e_cell.block_id);
- const typename EigenTypes<kRowBlockSize, kEBlockSize>::ConstMatrixRef
- e_block(row_values + e_cell.position,
- row.block.size,
- e_block_size);
- typename EigenTypes<kRowBlockSize>::Vector
- sj =
+ typename EigenTypes<kRowBlockSize>::Vector sj =
typename EigenTypes<kRowBlockSize>::ConstVectorRef
- (b + bs->rows[chunk.start + j].block.position,
- row.block.size);
+ (b + bs->rows[chunk.start + j].block.position, row.block.size);
for (int c = 1; c < row.cells.size(); ++c) {
const int f_block_id = row.cells[c].block_id;
const int f_block_size = bs->cols[f_block_id].size;
- const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
- f_block(row_values + row.cells[c].position,
- row.block.size, f_block_size);
const int r_block = f_block_id - num_eliminate_blocks_;
- sj -= f_block *
- typename EigenTypes<kFBlockSize>::ConstVectorRef
- (z + lhs_row_layout_[r_block], f_block_size);
+ MatrixVectorMultiply<kRowBlockSize, kFBlockSize, -1>(
+ row_values + row.cells[c].position, row.block.size, f_block_size,
+ z + lhs_row_layout_[r_block],
+ sj.data());
}
- y_block += e_block.transpose() * sj;
- ete.template selfadjointView<Eigen::Upper>()
- .rankUpdate(e_block.transpose(), 1.0);
+ MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
+ row_values + e_cell.position, row.block.size, e_block_size,
+ sj.data(),
+ y_ptr);
+
+ MatrixTransposeMatrixMultiply
+ <kRowBlockSize, kEBlockSize,kRowBlockSize, kEBlockSize, 1>(
+ row_values + e_cell.position, row.block.size, e_block_size,
+ row_values + e_cell.position, row.block.size, e_block_size,
+ ete.data(), 0, 0, e_block_size, e_block_size);
}
- y_block =
- ete
- .template selfadjointView<Eigen::Upper>()
- .ldlt()
- .solve(y_block);
+ ete.llt().solveInPlace(y_block);
}
}
@@ -382,15 +382,12 @@ UpdateRhs(const Chunk& chunk,
for (int c = 1; c < row.cells.size(); ++c) {
const int block_id = row.cells[c].block_id;
const int block_size = bs->cols[block_id].size;
- const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
- b(row_values + row.cells[c].position,
- row.block.size, block_size);
-
const int block = block_id - num_eliminate_blocks_;
CeresMutexLock l(rhs_locks_[block]);
- typename EigenTypes<kFBlockSize>::VectorRef
- (rhs + lhs_row_layout_[block], block_size).noalias()
- += b.transpose() * sj;
+ MatrixTransposeVectorMultiply<kRowBlockSize, kFBlockSize, 1>(
+ row_values + row.cells[c].position,
+ row.block.size, block_size,
+ sj.data(), rhs + lhs_row_layout_[block]);
}
b_pos += row.block.size;
}
@@ -446,34 +443,31 @@ ChunkDiagonalBlockAndGradient(
// Extract the e_block, ETE += E_i' E_i
const Cell& e_cell = row.cells.front();
- const typename EigenTypes<kRowBlockSize, kEBlockSize>::ConstMatrixRef
- e_block(row_values + e_cell.position,
- row.block.size,
- e_block_size);
-
- ete->template selfadjointView<Eigen::Upper>()
- .rankUpdate(e_block.transpose(), 1.0);
+ MatrixTransposeMatrixMultiply
+ <kRowBlockSize, kEBlockSize, kRowBlockSize, kEBlockSize, 1>(
+ row_values + e_cell.position, row.block.size, e_block_size,
+ row_values + e_cell.position, row.block.size, e_block_size,
+ ete->data(), 0, 0, e_block_size, e_block_size);
// g += E_i' b_i
- g->noalias() += e_block.transpose() *
- typename EigenTypes<kRowBlockSize>::ConstVectorRef
- (b + b_pos, row.block.size);
+ MatrixTransposeVectorMultiply<kRowBlockSize, kEBlockSize, 1>(
+ row_values + e_cell.position, row.block.size, e_block_size,
+ b + b_pos,
+ g->data());
+
// buffer = E'F. This computation is done by iterating over the
// f_blocks for each row in the chunk.
for (int c = 1; c < row.cells.size(); ++c) {
const int f_block_id = row.cells[c].block_id;
const int f_block_size = bs->cols[f_block_id].size;
- const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
- f_block(row_values + row.cells[c].position,
- row.block.size, f_block_size);
-
double* buffer_ptr =
buffer + FindOrDie(chunk.buffer_layout, f_block_id);
-
- typename EigenTypes<kEBlockSize, kFBlockSize>::MatrixRef
- (buffer_ptr, e_block_size, f_block_size).noalias()
- += e_block.transpose() * f_block;
+ MatrixTransposeMatrixMultiply
+ <kRowBlockSize, kEBlockSize, kRowBlockSize, kFBlockSize, 1>(
+ row_values + e_cell.position, row.block.size, e_block_size,
+ row_values + row.cells[c].position, row.block.size, f_block_size,
+ buffer_ptr, 0, 0, e_block_size, f_block_size);
}
b_pos += row.block.size;
}
@@ -497,15 +491,24 @@ ChunkOuterProduct(const CompressedRowBlockStructure* bs,
// references to the left hand side.
const int e_block_size = inverse_ete.rows();
BufferLayoutType::const_iterator it1 = buffer_layout.begin();
+
+#ifdef CERES_USE_OPENMP
+ int thread_id = omp_get_thread_num();
+#else
+ int thread_id = 0;
+#endif
+ double* b1_transpose_inverse_ete =
+ chunk_outer_product_buffer_.get() + thread_id * buffer_size_;
+
// S(i,j) -= bi' * ete^{-1} b_j
for (; it1 != buffer_layout.end(); ++it1) {
const int block1 = it1->first - num_eliminate_blocks_;
const int block1_size = bs->cols[it1->first].size;
-
- const typename EigenTypes<kEBlockSize, kFBlockSize>::ConstMatrixRef
- b1(buffer + it1->second, e_block_size, block1_size);
- const typename EigenTypes<kFBlockSize, kEBlockSize>::Matrix
- b1_transpose_inverse_ete = b1.transpose() * inverse_ete;
+ MatrixTransposeMatrixMultiply
+ <kEBlockSize, kFBlockSize, kEBlockSize, kEBlockSize, 0>(
+ buffer + it1->second, e_block_size, block1_size,
+ inverse_ete.data(), e_block_size, e_block_size,
+ b1_transpose_inverse_ete, 0, 0, block1_size, e_block_size);
BufferLayoutType::const_iterator it2 = it1;
for (; it2 != buffer_layout.end(); ++it2) {
@@ -515,46 +518,15 @@ ChunkOuterProduct(const CompressedRowBlockStructure* bs,
CellInfo* cell_info = lhs->GetCell(block1, block2,
&r, &c,
&row_stride, &col_stride);
- if (cell_info == NULL) {
- continue;
+ if (cell_info != NULL) {
+ const int block2_size = bs->cols[it2->first].size;
+ CeresMutexLock l(&cell_info->m);
+ MatrixMatrixMultiply
+ <kFBlockSize, kEBlockSize, kEBlockSize, kFBlockSize, -1>(
+ b1_transpose_inverse_ete, block1_size, e_block_size,
+ buffer + it2->second, e_block_size, block2_size,
+ cell_info->values, r, c, row_stride, col_stride);
}
-
- const int block2_size = bs->cols[it2->first].size;
- const typename EigenTypes<kEBlockSize, kFBlockSize>::ConstMatrixRef
- b2(buffer + it2->second, e_block_size, block2_size);
-
- CeresMutexLock l(&cell_info->m);
- MatrixRef m(cell_info->values, row_stride, col_stride);
-
- // We explicitly construct a block object here instead of using
- // m.block(), as m.block() variant of the constructor does not
- // allow mixing of template sizing and runtime sizing parameters
- // like the Matrix class does.
- Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize>
- block(m, r, c, block1_size, block2_size);
-#ifdef CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
- // Removing the ".noalias()" annotation on the following statement is
- // necessary to produce a correct build with the Android NDK, including
- // versions 6, 7, 8, and 8b, when built with STLPort and the
- // non-standalone toolchain (i.e. ndk-build). This appears to be a
- // compiler bug; if the workaround is not in place, the line
- //
- // block.noalias() -= b1_transpose_inverse_ete * b2;
- //
- // gets compiled to
- //
- // block.noalias() += b1_transpose_inverse_ete * b2;
- //
- // which breaks schur elimination. Introducing a temporary by removing the
- // .noalias() annotation causes the issue to disappear. Tracking this
- // issue down was tricky, since the test suite doesn't run when built with
- // the non-standalone toolchain.
- //
- // TODO(keir): Make a reproduction case for this and send it upstream.
- block -= b1_transpose_inverse_ete * b2;
-#else
- block.noalias() -= b1_transpose_inverse_ete * b2;
-#endif // CERES_WORK_AROUND_ANDROID_NDK_COMPILER_BUG
}
}
}
@@ -624,10 +596,13 @@ NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A,
&row_stride, &col_stride);
if (cell_info != NULL) {
CeresMutexLock l(&cell_info->m);
- MatrixRef m(cell_info->values, row_stride, col_stride);
- m.block(r, c, block1_size, block1_size)
- .selfadjointView<Eigen::Upper>()
- .rankUpdate(b1.transpose(), 1.0);
+ // This multiply currently ignores the fact that this is a
+ // symmetric outer product.
+ MatrixTransposeMatrixMultiply
+ <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
+ row_values + row.cells[i].position, row.block.size, block1_size,
+ row_values + row.cells[i].position, row.block.size, block1_size,
+ cell_info->values, r, c, row_stride, col_stride);
}
for (int j = i + 1; j < row.cells.size(); ++j) {
@@ -638,17 +613,15 @@ NoEBlockRowOuterProduct(const BlockSparseMatrixBase* A,
CellInfo* cell_info = lhs->GetCell(block1, block2,
&r, &c,
&row_stride, &col_stride);
- if (cell_info == NULL) {
- continue;
+ if (cell_info != NULL) {
+ const int block2_size = bs->cols[row.cells[j].block_id].size;
+ CeresMutexLock l(&cell_info->m);
+ MatrixTransposeMatrixMultiply
+ <Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, Eigen::Dynamic, 1>(
+ row_values + row.cells[i].position, row.block.size, block1_size,
+ row_values + row.cells[j].position, row.block.size, block2_size,
+ cell_info->values, r, c, row_stride, col_stride);
}
-
- const int block2_size = bs->cols[row.cells[j].block_id].size;
- CeresMutexLock l(&cell_info->m);
- MatrixRef m(cell_info->values, row_stride, col_stride);
- m.block(r, c, block1_size, block2_size).noalias() +=
- b1.transpose() * ConstMatrixRef(row_values + row.cells[j].position,
- row.block.size,
- block2_size);
}
}
}
@@ -670,25 +643,18 @@ EBlockRowOuterProduct(const BlockSparseMatrixBase* A,
DCHECK_GE(block1, 0);
const int block1_size = bs->cols[row.cells[i].block_id].size;
- const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
- b1(row_values + row.cells[i].position,
- row.block.size, block1_size);
- {
- int r, c, row_stride, col_stride;
- CellInfo* cell_info = lhs->GetCell(block1, block1,
- &r, &c,
- &row_stride, &col_stride);
- if (cell_info == NULL) {
- continue;
- }
-
+ int r, c, row_stride, col_stride;
+ CellInfo* cell_info = lhs->GetCell(block1, block1,
+ &r, &c,
+ &row_stride, &col_stride);
+ if (cell_info != NULL) {
CeresMutexLock l(&cell_info->m);
- MatrixRef m(cell_info->values, row_stride, col_stride);
-
- Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize>
- block(m, r, c, block1_size, block1_size);
- block.template selfadjointView<Eigen::Upper>()
- .rankUpdate(b1.transpose(), 1.0);
+ // block += b1.transpose() * b1;
+ MatrixTransposeMatrixMultiply
+ <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
+ row_values + row.cells[i].position, row.block.size, block1_size,
+ row_values + row.cells[i].position, row.block.size, block1_size,
+ cell_info->values, r, c, row_stride, col_stride);
}
for (int j = i + 1; j < row.cells.size(); ++j) {
@@ -700,20 +666,14 @@ EBlockRowOuterProduct(const BlockSparseMatrixBase* A,
CellInfo* cell_info = lhs->GetCell(block1, block2,
&r, &c,
&row_stride, &col_stride);
- if (cell_info == NULL) {
- continue;
+ if (cell_info != NULL) {
+ // block += b1.transpose() * b2;
+ MatrixTransposeMatrixMultiply
+ <kRowBlockSize, kFBlockSize, kRowBlockSize, kFBlockSize, 1>(
+ row_values + row.cells[i].position, row.block.size, block1_size,
+ row_values + row.cells[j].position, row.block.size, block2_size,
+ cell_info->values, r, c, row_stride, col_stride);
}
-
- const typename EigenTypes<kRowBlockSize, kFBlockSize>::ConstMatrixRef
- b2(row_values + row.cells[j].position,
- row.block.size,
- block2_size);
-
- CeresMutexLock l(&cell_info->m);
- MatrixRef m(cell_info->values, row_stride, col_stride);
- Eigen::Block<MatrixRef, kFBlockSize, kFBlockSize>
- block(m, r, c, block1_size, block2_size);
- block.noalias() += b1.transpose() * b2;
}
}
}
diff --git a/extern/libmv/third_party/ceres/internal/ceres/sparse_normal_cholesky_solver.cc b/extern/libmv/third_party/ceres/internal/ceres/sparse_normal_cholesky_solver.cc
index 195cacb984c..aacba86e64a 100644
--- a/extern/libmv/third_party/ceres/internal/ceres/sparse_normal_cholesky_solver.cc
+++ b/extern/libmv/third_party/ceres/internal/ceres/sparse_normal_cholesky_solver.cc
@@ -213,13 +213,7 @@ LinearSolver::Summary SparseNormalCholeskySolver::SolveImplUsingSuiteSparse(
} else {
factor_ = ss_.AnalyzeCholesky(lhs.get());
}
-
- if (VLOG_IS_ON(2)) {
- cholmod_print_common(const_cast<char*>("Symbolic Analysis"), ss_.mutable_cc());
- }
}
-
- CHECK_NOTNULL(factor_);
event_logger.AddEvent("Analysis");
cholmod_dense* sol = ss_.SolveCholesky(lhs.get(), factor_, rhs);
diff --git a/extern/libmv/third_party/ceres/internal/ceres/suitesparse.cc b/extern/libmv/third_party/ceres/internal/ceres/suitesparse.cc
index d200aeb82f3..87fae75287e 100644
--- a/extern/libmv/third_party/ceres/internal/ceres/suitesparse.cc
+++ b/extern/libmv/third_party/ceres/internal/ceres/suitesparse.cc
@@ -35,8 +35,18 @@
#include "cholmod.h"
#include "ceres/compressed_row_sparse_matrix.h"
#include "ceres/triplet_sparse_matrix.h"
+
namespace ceres {
namespace internal {
+
+SuiteSparse::SuiteSparse() {
+ cholmod_start(&cc_);
+}
+
+SuiteSparse::~SuiteSparse() {
+ cholmod_finish(&cc_);
+}
+
cholmod_sparse* SuiteSparse::CreateSparseMatrix(TripletSparseMatrix* A) {
cholmod_triplet triplet;
@@ -117,10 +127,16 @@ cholmod_factor* SuiteSparse::AnalyzeCholesky(cholmod_sparse* A) {
cc_.nmethods = 1;
cc_.method[0].ordering = CHOLMOD_AMD;
cc_.supernodal = CHOLMOD_AUTO;
+
cholmod_factor* factor = cholmod_analyze(A, &cc_);
CHECK_EQ(cc_.status, CHOLMOD_OK)
<< "Cholmod symbolic analysis failed " << cc_.status;
CHECK_NOTNULL(factor);
+
+ if (VLOG_IS_ON(2)) {
+ cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
+ }
+
return factor;
}
@@ -139,13 +155,20 @@ cholmod_factor* SuiteSparse::AnalyzeCholeskyWithUserOrdering(
cholmod_sparse* A,
const vector<int>& ordering) {
CHECK_EQ(ordering.size(), A->nrow);
+
cc_.nmethods = 1;
cc_.method[0].ordering = CHOLMOD_GIVEN;
+
cholmod_factor* factor =
cholmod_analyze_p(A, const_cast<int*>(&ordering[0]), NULL, 0, &cc_);
CHECK_EQ(cc_.status, CHOLMOD_OK)
<< "Cholmod symbolic analysis failed " << cc_.status;
CHECK_NOTNULL(factor);
+
+ if (VLOG_IS_ON(2)) {
+ cholmod_print_common(const_cast<char*>("Symbolic Analysis"), &cc_);
+ }
+
return factor;
}
@@ -276,36 +299,52 @@ bool SuiteSparse::Cholesky(cholmod_sparse* A, cholmod_factor* L) {
CHECK_NOTNULL(A);
CHECK_NOTNULL(L);
+ // Save the current print level and silence CHOLMOD, otherwise
+ // CHOLMOD is prone to dumping stuff to stderr, which can be
+ // distracting when the error (matrix is indefinite) is not a fatal
+ // failure.
+ const int old_print_level = cc_.print;
+ cc_.print = 0;
+
cc_.quick_return_if_not_posdef = 1;
int status = cholmod_factorize(A, L, &cc_);
+ cc_.print = old_print_level;
+
+ // TODO(sameeragarwal): This switch statement is not consistent. It
+ // treats all kinds of CHOLMOD failures as warnings. Some of these
+ // like out of memory are definitely not warnings. The problem is
+ // that the return value Cholesky is two valued, but the state of
+ // the linear solver is really three valued. SUCCESS,
+ // NON_FATAL_FAILURE (e.g., indefinite matrix) and FATAL_FAILURE
+ // (e.g. out of memory).
switch (cc_.status) {
case CHOLMOD_NOT_INSTALLED:
- LOG(WARNING) << "Cholmod failure: method not installed.";
+ LOG(WARNING) << "CHOLMOD failure: Method not installed.";
return false;
case CHOLMOD_OUT_OF_MEMORY:
- LOG(WARNING) << "Cholmod failure: out of memory.";
+ LOG(WARNING) << "CHOLMOD failure: Out of memory.";
return false;
case CHOLMOD_TOO_LARGE:
- LOG(WARNING) << "Cholmod failure: integer overflow occured.";
+ LOG(WARNING) << "CHOLMOD failure: Integer overflow occured.";
return false;
case CHOLMOD_INVALID:
- LOG(WARNING) << "Cholmod failure: invalid input.";
+ LOG(WARNING) << "CHOLMOD failure: Invalid input.";
return false;
case CHOLMOD_NOT_POSDEF:
// TODO(sameeragarwal): These two warnings require more
// sophisticated handling going forward. For now we will be
// strict and treat them as failures.
- LOG(WARNING) << "Cholmod warning: matrix not positive definite.";
+ LOG(WARNING) << "CHOLMOD warning: Matrix not positive definite.";
return false;
case CHOLMOD_DSMALL:
- LOG(WARNING) << "Cholmod warning: D for LDL' or diag(L) or "
+ LOG(WARNING) << "CHOLMOD warning: D for LDL' or diag(L) or "
<< "LL' has tiny absolute value.";
return false;
case CHOLMOD_OK:
if (status != 0) {
return true;
}
- LOG(WARNING) << "Cholmod failure: cholmod_factorize returned zero "
+ LOG(WARNING) << "CHOLMOD failure: cholmod_factorize returned zero "
<< "but cholmod_common::status is CHOLMOD_OK."
<< "Please report this to ceres-solver@googlegroups.com.";
return false;
diff --git a/extern/libmv/third_party/ceres/internal/ceres/suitesparse.h b/extern/libmv/third_party/ceres/internal/ceres/suitesparse.h
index 3fe79080d5d..5a8ef63bd2b 100644
--- a/extern/libmv/third_party/ceres/internal/ceres/suitesparse.h
+++ b/extern/libmv/third_party/ceres/internal/ceres/suitesparse.h
@@ -56,8 +56,8 @@ class TripletSparseMatrix;
// for all cholmod function calls.
class SuiteSparse {
public:
- SuiteSparse() { cholmod_start(&cc_); }
- ~SuiteSparse() { cholmod_finish(&cc_); }
+ SuiteSparse();
+ ~SuiteSparse();
// Functions for building cholmod_sparse objects from sparse
// matrices stored in triplet form. The matrix A is not
diff --git a/extern/libmv/third_party/ceres/internal/ceres/visibility_based_preconditioner.cc b/extern/libmv/third_party/ceres/internal/ceres/visibility_based_preconditioner.cc
index 1678d0072e5..f5c46bfcc68 100644
--- a/extern/libmv/third_party/ceres/internal/ceres/visibility_based_preconditioner.cc
+++ b/extern/libmv/third_party/ceres/internal/ceres/visibility_based_preconditioner.cc
@@ -426,14 +426,8 @@ bool VisibilityBasedPreconditioner::Factorize() {
} else {
factor_ = ss_.AnalyzeCholesky(lhs);
}
-
- if (VLOG_IS_ON(2)) {
- cholmod_print_common(const_cast<char*>("Symbolic Analysis"), ss_.mutable_cc());
- }
}
- CHECK_NOTNULL(factor_);
-
bool status = ss_.Cholesky(lhs, factor_);
ss_.Free(lhs);
return status;