extern/ceres/internal/ceres/accelerate_sparse.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289

// Ceres Solver - A fast non-linear least squares minimizer
// Copyright 2018 Google Inc. All rights reserved.
// http://ceres-solver.org/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
//   this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
//   this list of conditions and the following disclaimer in the documentation
//   and/or other materials provided with the distribution.
// * Neither the name of Google Inc. nor the names of its contributors may be
//   used to endorse or promote products derived from this software without
//   specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
//
// Author: alexs.mac@gmail.com (Alex Stewart)

// This include must come before any #ifndef check on Ceres compile options.
#include "ceres/internal/port.h"

#ifndef CERES_NO_ACCELERATE_SPARSE

#include "ceres/accelerate_sparse.h"

#include <algorithm>
#include <string>
#include <vector>

#include "ceres/compressed_col_sparse_matrix_utils.h"
#include "ceres/compressed_row_sparse_matrix.h"
#include "ceres/triplet_sparse_matrix.h"
#include "glog/logging.h"

#define CASESTR(x) case x: return #x

namespace ceres {
namespace internal {

namespace {
const char* SparseStatusToString(SparseStatus_t status) {
  switch (status) {
    CASESTR(SparseStatusOK);
    CASESTR(SparseFactorizationFailed);
    CASESTR(SparseMatrixIsSingular);
    CASESTR(SparseInternalError);
    CASESTR(SparseParameterError);
    CASESTR(SparseStatusReleased);
    default:
      return "UKNOWN";
  }
}
}  // namespace.

// Resizes workspace as required to contain at least required_size bytes
// aligned to kAccelerateRequiredAlignment and returns a pointer to the
// aligned start.
void* ResizeForAccelerateAlignment(const size_t required_size,
                                   std::vector<uint8_t> *workspace) {
  // As per the Accelerate documentation, all workspace memory passed to the
  // sparse solver functions must be 16-byte aligned.
  constexpr int kAccelerateRequiredAlignment = 16;
  // Although malloc() on macOS should always be 16-byte aligned, it is unclear
  // if this holds for new(), or on other Apple OSs (phoneOS, watchOS etc).
  // As such we assume it is not and use std::align() to create a (potentially
  // offset) 16-byte aligned sub-buffer of the specified size within workspace.
  workspace->resize(required_size + kAccelerateRequiredAlignment);
  size_t size_from_aligned_start = workspace->size();
  void* aligned_solve_workspace_start =
      reinterpret_cast<void*>(workspace->data());
  aligned_solve_workspace_start =
      std::align(kAccelerateRequiredAlignment,
                 required_size,
                 aligned_solve_workspace_start,
                 size_from_aligned_start);
  CHECK(aligned_solve_workspace_start != nullptr)
      << "required_size: " << required_size
      << ", workspace size: " << workspace->size();
  return aligned_solve_workspace_start;
}

template<typename Scalar>
void AccelerateSparse<Scalar>::Solve(NumericFactorization* numeric_factor,
                                     DenseVector* rhs_and_solution) {
  // From SparseSolve() documentation in Solve.h
  const int required_size =
      numeric_factor->solveWorkspaceRequiredStatic +
      numeric_factor->solveWorkspaceRequiredPerRHS;
  SparseSolve(*numeric_factor, *rhs_and_solution,
              ResizeForAccelerateAlignment(required_size, &solve_workspace_));
}

template<typename Scalar>
typename AccelerateSparse<Scalar>::ASSparseMatrix
AccelerateSparse<Scalar>::CreateSparseMatrixTransposeView(
    CompressedRowSparseMatrix* A) {
  // Accelerate uses CSC as its sparse storage format whereas Ceres uses CSR.
  // As this method returns the transpose view we can flip rows/cols to map
  // from CSR to CSC^T.
  //
  // Accelerate's columnStarts is a long*, not an int*.  These types might be
  // different (e.g. ARM on iOS) so always make a copy.
  column_starts_.resize(A->num_rows() +1); // +1 for final column length.
  std::copy_n(A->rows(), column_starts_.size(), &column_starts_[0]);

  ASSparseMatrix At;
  At.structure.rowCount = A->num_cols();
  At.structure.columnCount = A->num_rows();
  At.structure.columnStarts = &column_starts_[0];
  At.structure.rowIndices = A->mutable_cols();
  At.structure.attributes.transpose = false;
  At.structure.attributes.triangle = SparseUpperTriangle;
  At.structure.attributes.kind = SparseSymmetric;
  At.structure.attributes._reserved = 0;
  At.structure.attributes._allocatedBySparse = 0;
  At.structure.blockSize = 1;
  if (std::is_same<Scalar, double>::value) {
    At.data = reinterpret_cast<Scalar*>(A->mutable_values());
  } else {
    values_ =
        ConstVectorRef(A->values(), A->num_nonzeros()).template cast<Scalar>();
    At.data = values_.data();
  }
  return At;
}

template<typename Scalar>
typename AccelerateSparse<Scalar>::SymbolicFactorization
AccelerateSparse<Scalar>::AnalyzeCholesky(ASSparseMatrix* A) {
  return SparseFactor(SparseFactorizationCholesky, A->structure);
}

template<typename Scalar>
typename AccelerateSparse<Scalar>::NumericFactorization
AccelerateSparse<Scalar>::Cholesky(ASSparseMatrix* A,
                                   SymbolicFactorization* symbolic_factor) {
  return SparseFactor(*symbolic_factor, *A);
}

template<typename Scalar>
void AccelerateSparse<Scalar>::Cholesky(ASSparseMatrix* A,
                                        NumericFactorization* numeric_factor) {
  // From SparseRefactor() documentation in Solve.h
  const int required_size = std::is_same<Scalar, double>::value
      ? numeric_factor->symbolicFactorization.workspaceSize_Double
      : numeric_factor->symbolicFactorization.workspaceSize_Float;
  return SparseRefactor(*A, numeric_factor,
                        ResizeForAccelerateAlignment(required_size,
                                                     &factorization_workspace_));
}

// Instantiate only for the specific template types required/supported s/t the
// definition can be in the .cc file.
template class AccelerateSparse<double>;
template class AccelerateSparse<float>;

template<typename Scalar>
std::unique_ptr<SparseCholesky>
AppleAccelerateCholesky<Scalar>::Create(OrderingType ordering_type) {
  return std::unique_ptr<SparseCholesky>(
      new AppleAccelerateCholesky<Scalar>(ordering_type));
}

template<typename Scalar>
AppleAccelerateCholesky<Scalar>::AppleAccelerateCholesky(
    const OrderingType ordering_type)
    : ordering_type_(ordering_type) {}

template<typename Scalar>
AppleAccelerateCholesky<Scalar>::~AppleAccelerateCholesky() {
  FreeSymbolicFactorization();
  FreeNumericFactorization();
}

template<typename Scalar>
CompressedRowSparseMatrix::StorageType
AppleAccelerateCholesky<Scalar>::StorageType() const {
  return CompressedRowSparseMatrix::LOWER_TRIANGULAR;
}

template<typename Scalar>
LinearSolverTerminationType
AppleAccelerateCholesky<Scalar>::Factorize(CompressedRowSparseMatrix* lhs,
                                           std::string* message) {
  CHECK_EQ(lhs->storage_type(), StorageType());
  if (lhs == NULL) {
    *message = "Failure: Input lhs is NULL.";
    return LINEAR_SOLVER_FATAL_ERROR;
  }
  typename SparseTypesTrait<Scalar>::SparseMatrix as_lhs =
      as_.CreateSparseMatrixTransposeView(lhs);

  if (!symbolic_factor_) {
    symbolic_factor_.reset(
        new typename SparseTypesTrait<Scalar>::SymbolicFactorization(
            as_.AnalyzeCholesky(&as_lhs)));
    if (symbolic_factor_->status != SparseStatusOK) {
      *message = StringPrintf(
          "Apple Accelerate Failure : Symbolic factorisation failed: %s",
          SparseStatusToString(symbolic_factor_->status));
      FreeSymbolicFactorization();
      return LINEAR_SOLVER_FATAL_ERROR;
    }
  }

  if (!numeric_factor_) {
    numeric_factor_.reset(
        new typename SparseTypesTrait<Scalar>::NumericFactorization(
            as_.Cholesky(&as_lhs, symbolic_factor_.get())));
  } else {
    // Recycle memory from previous numeric factorization.
    as_.Cholesky(&as_lhs, numeric_factor_.get());
  }
  if (numeric_factor_->status != SparseStatusOK) {
    *message = StringPrintf(
        "Apple Accelerate Failure : Numeric factorisation failed: %s",
        SparseStatusToString(numeric_factor_->status));
    FreeNumericFactorization();
    return LINEAR_SOLVER_FAILURE;
  }

  return LINEAR_SOLVER_SUCCESS;
}

template<typename Scalar>
LinearSolverTerminationType
AppleAccelerateCholesky<Scalar>::Solve(const double* rhs,
                                       double* solution,
                                       std::string* message) {
  CHECK_EQ(numeric_factor_->status, SparseStatusOK)
      << "Solve called without a call to Factorize first ("
      << SparseStatusToString(numeric_factor_->status) << ").";
  const int num_cols = numeric_factor_->symbolicFactorization.columnCount;

  typename SparseTypesTrait<Scalar>::DenseVector as_rhs_and_solution;
  as_rhs_and_solution.count = num_cols;
  if (std::is_same<Scalar, double>::value) {
    as_rhs_and_solution.data = reinterpret_cast<Scalar*>(solution);
    std::copy_n(rhs, num_cols, solution);
  } else {
    scalar_rhs_and_solution_ =
        ConstVectorRef(rhs, num_cols).template cast<Scalar>();
    as_rhs_and_solution.data = scalar_rhs_and_solution_.data();
  }
  as_.Solve(numeric_factor_.get(), &as_rhs_and_solution);
  if (!std::is_same<Scalar, double>::value) {
    VectorRef(solution, num_cols) =
        scalar_rhs_and_solution_.template cast<double>();
  }
  return LINEAR_SOLVER_SUCCESS;
}

template<typename Scalar>
void AppleAccelerateCholesky<Scalar>::FreeSymbolicFactorization() {
  if (symbolic_factor_) {
    SparseCleanup(*symbolic_factor_);
    symbolic_factor_.reset();
  }
}

template<typename Scalar>
void AppleAccelerateCholesky<Scalar>::FreeNumericFactorization() {
  if (numeric_factor_) {
    SparseCleanup(*numeric_factor_);
    numeric_factor_.reset();
  }
}

// Instantiate only for the specific template types required/supported s/t the
// definition can be in the .cc file.
template class AppleAccelerateCholesky<double>;
template class AppleAccelerateCholesky<float>;

}
}

#endif  // CERES_NO_ACCELERATE_SPARSE