diff options
Diffstat (limited to 'extern/ceres/internal/ceres/accelerate_sparse.cc')
-rw-r--r-- | extern/ceres/internal/ceres/accelerate_sparse.cc | 289 |
1 files changed, 289 insertions, 0 deletions
diff --git a/extern/ceres/internal/ceres/accelerate_sparse.cc b/extern/ceres/internal/ceres/accelerate_sparse.cc new file mode 100644 index 00000000000..eb04e7113d7 --- /dev/null +++ b/extern/ceres/internal/ceres/accelerate_sparse.cc @@ -0,0 +1,289 @@ +// Ceres Solver - A fast non-linear least squares minimizer +// Copyright 2018 Google Inc. All rights reserved. +// http://ceres-solver.org/ +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// * Neither the name of Google Inc. nor the names of its contributors may be +// used to endorse or promote products derived from this software without +// specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +// POSSIBILITY OF SUCH DAMAGE. +// +// Author: alexs.mac@gmail.com (Alex Stewart) + +// This include must come before any #ifndef check on Ceres compile options. +#include "ceres/internal/port.h" + +#ifndef CERES_NO_ACCELERATE_SPARSE + +#include "ceres/accelerate_sparse.h" + +#include <algorithm> +#include <string> +#include <vector> + +#include "ceres/compressed_col_sparse_matrix_utils.h" +#include "ceres/compressed_row_sparse_matrix.h" +#include "ceres/triplet_sparse_matrix.h" +#include "glog/logging.h" + +#define CASESTR(x) case x: return #x + +namespace ceres { +namespace internal { + +namespace { +const char* SparseStatusToString(SparseStatus_t status) { + switch (status) { + CASESTR(SparseStatusOK); + CASESTR(SparseFactorizationFailed); + CASESTR(SparseMatrixIsSingular); + CASESTR(SparseInternalError); + CASESTR(SparseParameterError); + CASESTR(SparseStatusReleased); + default: + return "UKNOWN"; + } +} +} // namespace. + +// Resizes workspace as required to contain at least required_size bytes +// aligned to kAccelerateRequiredAlignment and returns a pointer to the +// aligned start. +void* ResizeForAccelerateAlignment(const size_t required_size, + std::vector<uint8_t> *workspace) { + // As per the Accelerate documentation, all workspace memory passed to the + // sparse solver functions must be 16-byte aligned. + constexpr int kAccelerateRequiredAlignment = 16; + // Although malloc() on macOS should always be 16-byte aligned, it is unclear + // if this holds for new(), or on other Apple OSs (phoneOS, watchOS etc). + // As such we assume it is not and use std::align() to create a (potentially + // offset) 16-byte aligned sub-buffer of the specified size within workspace. + workspace->resize(required_size + kAccelerateRequiredAlignment); + size_t size_from_aligned_start = workspace->size(); + void* aligned_solve_workspace_start = + reinterpret_cast<void*>(workspace->data()); + aligned_solve_workspace_start = + std::align(kAccelerateRequiredAlignment, + required_size, + aligned_solve_workspace_start, + size_from_aligned_start); + CHECK(aligned_solve_workspace_start != nullptr) + << "required_size: " << required_size + << ", workspace size: " << workspace->size(); + return aligned_solve_workspace_start; +} + +template<typename Scalar> +void AccelerateSparse<Scalar>::Solve(NumericFactorization* numeric_factor, + DenseVector* rhs_and_solution) { + // From SparseSolve() documentation in Solve.h + const int required_size = + numeric_factor->solveWorkspaceRequiredStatic + + numeric_factor->solveWorkspaceRequiredPerRHS; + SparseSolve(*numeric_factor, *rhs_and_solution, + ResizeForAccelerateAlignment(required_size, &solve_workspace_)); +} + +template<typename Scalar> +typename AccelerateSparse<Scalar>::ASSparseMatrix +AccelerateSparse<Scalar>::CreateSparseMatrixTransposeView( + CompressedRowSparseMatrix* A) { + // Accelerate uses CSC as its sparse storage format whereas Ceres uses CSR. + // As this method returns the transpose view we can flip rows/cols to map + // from CSR to CSC^T. + // + // Accelerate's columnStarts is a long*, not an int*. These types might be + // different (e.g. ARM on iOS) so always make a copy. + column_starts_.resize(A->num_rows() +1); // +1 for final column length. + std::copy_n(A->rows(), column_starts_.size(), &column_starts_[0]); + + ASSparseMatrix At; + At.structure.rowCount = A->num_cols(); + At.structure.columnCount = A->num_rows(); + At.structure.columnStarts = &column_starts_[0]; + At.structure.rowIndices = A->mutable_cols(); + At.structure.attributes.transpose = false; + At.structure.attributes.triangle = SparseUpperTriangle; + At.structure.attributes.kind = SparseSymmetric; + At.structure.attributes._reserved = 0; + At.structure.attributes._allocatedBySparse = 0; + At.structure.blockSize = 1; + if (std::is_same<Scalar, double>::value) { + At.data = reinterpret_cast<Scalar*>(A->mutable_values()); + } else { + values_ = + ConstVectorRef(A->values(), A->num_nonzeros()).template cast<Scalar>(); + At.data = values_.data(); + } + return At; +} + +template<typename Scalar> +typename AccelerateSparse<Scalar>::SymbolicFactorization +AccelerateSparse<Scalar>::AnalyzeCholesky(ASSparseMatrix* A) { + return SparseFactor(SparseFactorizationCholesky, A->structure); +} + +template<typename Scalar> +typename AccelerateSparse<Scalar>::NumericFactorization +AccelerateSparse<Scalar>::Cholesky(ASSparseMatrix* A, + SymbolicFactorization* symbolic_factor) { + return SparseFactor(*symbolic_factor, *A); +} + +template<typename Scalar> +void AccelerateSparse<Scalar>::Cholesky(ASSparseMatrix* A, + NumericFactorization* numeric_factor) { + // From SparseRefactor() documentation in Solve.h + const int required_size = std::is_same<Scalar, double>::value + ? numeric_factor->symbolicFactorization.workspaceSize_Double + : numeric_factor->symbolicFactorization.workspaceSize_Float; + return SparseRefactor(*A, numeric_factor, + ResizeForAccelerateAlignment(required_size, + &factorization_workspace_)); +} + +// Instantiate only for the specific template types required/supported s/t the +// definition can be in the .cc file. +template class AccelerateSparse<double>; +template class AccelerateSparse<float>; + +template<typename Scalar> +std::unique_ptr<SparseCholesky> +AppleAccelerateCholesky<Scalar>::Create(OrderingType ordering_type) { + return std::unique_ptr<SparseCholesky>( + new AppleAccelerateCholesky<Scalar>(ordering_type)); +} + +template<typename Scalar> +AppleAccelerateCholesky<Scalar>::AppleAccelerateCholesky( + const OrderingType ordering_type) + : ordering_type_(ordering_type) {} + +template<typename Scalar> +AppleAccelerateCholesky<Scalar>::~AppleAccelerateCholesky() { + FreeSymbolicFactorization(); + FreeNumericFactorization(); +} + +template<typename Scalar> +CompressedRowSparseMatrix::StorageType +AppleAccelerateCholesky<Scalar>::StorageType() const { + return CompressedRowSparseMatrix::LOWER_TRIANGULAR; +} + +template<typename Scalar> +LinearSolverTerminationType +AppleAccelerateCholesky<Scalar>::Factorize(CompressedRowSparseMatrix* lhs, + std::string* message) { + CHECK_EQ(lhs->storage_type(), StorageType()); + if (lhs == NULL) { + *message = "Failure: Input lhs is NULL."; + return LINEAR_SOLVER_FATAL_ERROR; + } + typename SparseTypesTrait<Scalar>::SparseMatrix as_lhs = + as_.CreateSparseMatrixTransposeView(lhs); + + if (!symbolic_factor_) { + symbolic_factor_.reset( + new typename SparseTypesTrait<Scalar>::SymbolicFactorization( + as_.AnalyzeCholesky(&as_lhs))); + if (symbolic_factor_->status != SparseStatusOK) { + *message = StringPrintf( + "Apple Accelerate Failure : Symbolic factorisation failed: %s", + SparseStatusToString(symbolic_factor_->status)); + FreeSymbolicFactorization(); + return LINEAR_SOLVER_FATAL_ERROR; + } + } + + if (!numeric_factor_) { + numeric_factor_.reset( + new typename SparseTypesTrait<Scalar>::NumericFactorization( + as_.Cholesky(&as_lhs, symbolic_factor_.get()))); + } else { + // Recycle memory from previous numeric factorization. + as_.Cholesky(&as_lhs, numeric_factor_.get()); + } + if (numeric_factor_->status != SparseStatusOK) { + *message = StringPrintf( + "Apple Accelerate Failure : Numeric factorisation failed: %s", + SparseStatusToString(numeric_factor_->status)); + FreeNumericFactorization(); + return LINEAR_SOLVER_FAILURE; + } + + return LINEAR_SOLVER_SUCCESS; +} + +template<typename Scalar> +LinearSolverTerminationType +AppleAccelerateCholesky<Scalar>::Solve(const double* rhs, + double* solution, + std::string* message) { + CHECK_EQ(numeric_factor_->status, SparseStatusOK) + << "Solve called without a call to Factorize first (" + << SparseStatusToString(numeric_factor_->status) << ")."; + const int num_cols = numeric_factor_->symbolicFactorization.columnCount; + + typename SparseTypesTrait<Scalar>::DenseVector as_rhs_and_solution; + as_rhs_and_solution.count = num_cols; + if (std::is_same<Scalar, double>::value) { + as_rhs_and_solution.data = reinterpret_cast<Scalar*>(solution); + std::copy_n(rhs, num_cols, solution); + } else { + scalar_rhs_and_solution_ = + ConstVectorRef(rhs, num_cols).template cast<Scalar>(); + as_rhs_and_solution.data = scalar_rhs_and_solution_.data(); + } + as_.Solve(numeric_factor_.get(), &as_rhs_and_solution); + if (!std::is_same<Scalar, double>::value) { + VectorRef(solution, num_cols) = + scalar_rhs_and_solution_.template cast<double>(); + } + return LINEAR_SOLVER_SUCCESS; +} + +template<typename Scalar> +void AppleAccelerateCholesky<Scalar>::FreeSymbolicFactorization() { + if (symbolic_factor_) { + SparseCleanup(*symbolic_factor_); + symbolic_factor_.reset(); + } +} + +template<typename Scalar> +void AppleAccelerateCholesky<Scalar>::FreeNumericFactorization() { + if (numeric_factor_) { + SparseCleanup(*numeric_factor_); + numeric_factor_.reset(); + } +} + +// Instantiate only for the specific template types required/supported s/t the +// definition can be in the .cc file. +template class AppleAccelerateCholesky<double>; +template class AppleAccelerateCholesky<float>; + +} +} + +#endif // CERES_NO_ACCELERATE_SPARSE |