// // Helper matrix class, RCMatrix.h // Required for PD optimizations (guiding) // Thanks to Ryoichi Ando, and Robert Bridson // #ifndef RCMATRIX3_H #define RCMATRIX3_H #include #include #include #include // index type #define int_index long long // link to omp & tbb for now #if OPENMP == 1 || TBB == 1 # define MANTA_ENABLE_PARALLEL 1 // allow the preconditioner to be computed in parallel? (can lead to slightly non-deterministic // results) # define MANTA_ENABLE_PARALLEL_PC 0 #else # define MANTA_ENABLE_PARALLEL 0 # define MANTA_ENABLE_PARALLEL_PC 0 #endif #if MANTA_ENABLE_PARALLEL == 1 # include # include static const int manta_num_threads = std::thread::hardware_concurrency(); // For clang # define parallel_for(total_size) \ { \ int_index parallel_array_size = (total_size); \ std::vector threads(manta_num_threads); \ for (int thread_number = 0; thread_number < manta_num_threads; thread_number++) { \ threads[thread_number] = std::thread([&](int_index parallel_array_size, int_index thread_number ) { \ for( int_index parallel_index=thread_number; parallel_index < parallel_array_size; parallel_index += manta_num_threads ) { # define parallel_end \ } \ },parallel_array_size,thread_number); \ } \ for (auto &thread : threads) \ thread.join(); \ } # define parallel_block \ { \ std::vector threads; \ { # define do_parallel threads.push_back( std::thread([&]() { # define do_end \ } ) ); # define block_end \ } \ for (auto &thread : threads) { \ thread.join(); \ } \ } #else # define parallel_for(size) \ { \ int thread_number = 0; \ int_index parallel_index = 0; \ for (int_index parallel_index = 0; parallel_index < (int_index)size; parallel_index++) { # define parallel_end \ } \ thread_number = parallel_index = 0; \ } # define parallel_block # define do_parallel # define do_end # define block_end #endif #include "vectorbase.h" namespace Manta { static const unsigned default_expected_none_zeros = 7; template struct RCMatrix { struct RowEntry { std::vector index; std::vector value; }; RCMatrix() : n(0), expected_none_zeros(default_expected_none_zeros) { } RCMatrix(N size, N expected_none_zeros = default_expected_none_zeros) : n(0), expected_none_zeros(expected_none_zeros) { resize(size); } RCMatrix(const RCMatrix &m) : n(0), expected_none_zeros(default_expected_none_zeros) { init(m); } RCMatrix &operator=(const RCMatrix &m) { expected_none_zeros = m.expected_none_zeros; init(m); return *this; } RCMatrix &operator=(RCMatrix &&m) { matrix = m.matrix; offsets = m.offsets; expected_none_zeros = m.expected_none_zeros; n = m.n; m.n = 0; m.matrix.clear(); m.offsets.clear(); return *this; } RCMatrix(RCMatrix &&m) : n(m.n), expected_none_zeros(m.expected_none_zeros), matrix(m.matrix), offsets(m.offsets) { m.n = 0; m.matrix.clear(); m.offsets.clear(); } void init(const RCMatrix &m) { expected_none_zeros = m.expected_none_zeros; resize(m.n); parallel_for(n) { N i = parallel_index; if (m.matrix[i]) { alloc_row(i); matrix[i]->index = m.matrix[i]->index; matrix[i]->value = m.matrix[i]->value; } else { dealloc_row(i); } } parallel_end } ~RCMatrix() { clear(); } void clear() { for (N i = 0; i < n; i++) { dealloc_row(i); matrix[i] = nullptr; if (offsets.size()) offsets[i] = 0; } }; bool empty(N i) const { return matrix[i] == nullptr; } N row_nonzero_size(N i) const { return matrix[i] == nullptr ? 0 : matrix[i]->index.size(); } void resize(N size, N expected_none_zeros = 0) { if (!expected_none_zeros) { expected_none_zeros = this->expected_none_zeros; } if (n > size) { // Shrinking for (N i = size ? size - 1 : 0; i < n; i++) dealloc_row(i); matrix.resize(size); } else if (n < size) { // Expanding matrix.resize(size); for (N i = n; i < size; i++) { matrix[i] = nullptr; if (offsets.size()) offsets[i] = 0; } } n = size; } void alloc_row(N i) { assert(i < n); if (!matrix[i]) { matrix[i] = new RowEntry; matrix[i]->index.reserve(expected_none_zeros); matrix[i]->value.reserve(expected_none_zeros); if (offsets.size()) offsets[i] = 0; } } void dealloc_row(N i) { assert(i < n); if (matrix[i]) { if (offsets.empty() || !offsets[i]) delete matrix[i]; matrix[i] = nullptr; if (offsets.size()) offsets[i] = 0; } } T operator()(N i, N j) const { assert(i < n); for (Iterator it = row_begin(i); it; ++it) { if (it.index() == j) return it.value(); } return T(0.0); } void add_to_element_checked(N i, N j, T val) { if ((i < 0) || (j < 0) || (i >= n) || (j >= n)) return; add_to_element(i, j, val); } void add_to_element(N i, N j, T increment_value) { if (std::abs(increment_value) > VECTOR_EPSILON) { assert(i < n); assert(offsets.empty() || offsets[i] == 0); alloc_row(i); std::vector &index = matrix[i]->index; std::vector &value = matrix[i]->value; for (N k = 0; k < (N)index.size(); ++k) { if (index[k] == j) { value[k] += increment_value; return; } else if (index[k] > j) { index.insert(index.begin() + k, j); value.insert(value.begin() + k, increment_value); return; } } index.push_back(j); value.push_back(increment_value); } } void set_element(N i, N j, T v) { if (std::abs(v) > VECTOR_EPSILON) { assert(i < n); assert(offsets.empty() || offsets[i] == 0); alloc_row(i); std::vector &index = matrix[i]->index; std::vector &value = matrix[i]->value; for (N k = 0; k < (N)index.size(); ++k) { if (index[k] == j) { value[k] = v; return; } else if (index[k] > j) { index.insert(index.begin() + k, j); value.insert(value.begin() + k, v); return; } } index.push_back(j); value.push_back(v); } } // Make sure that j is the biggest column in the row, no duplication allowed void fix_element(N i, N j, T v) { if (std::abs(v) > VECTOR_EPSILON) { assert(i < n); assert(offsets.empty() || offsets[i] == 0); alloc_row(i); std::vector &index = matrix[i]->index; std::vector &value = matrix[i]->value; index.push_back(j); value.push_back(v); } } int_index trim_zero_entries(double e = VECTOR_EPSILON) { std::vector deleted_entries(n, 0); parallel_for(n) { N i = parallel_index; if (matrix[i]) { std::vector &index = matrix[i]->index; std::vector &value = matrix[i]->value; N head = 0; N k = 0; for (k = 0; k < index.size(); ++k) { if (std::abs(value[k]) > e) { index[head] = index[k]; value[head] = value[k]; ++head; } } if (head != k) { index.erase(index.begin() + head, index.end()); value.erase(value.begin() + head, value.end()); deleted_entries[i] += k - head; } if (!offsets.size() && !head) { remove_row(i); } } } parallel_end // int_index sum_deleted(0); for (int_index i = 0; i < n; i++) sum_deleted += deleted_entries[i]; return sum_deleted; } void remove_reference(N i) { if (offsets.size() && offsets[i] && matrix[i]) { RowEntry *save = matrix[i]; matrix[i] = new RowEntry; *matrix[i] = *save; for (N &index : matrix[i]->index) index += offsets[i]; offsets[i] = 0; } } void remove_row(N i) { dealloc_row(i); } bool is_symmetric(double e = VECTOR_EPSILON) const { std::vector flags(n, true); parallel_for(n) { N i = parallel_index; bool flag = true; for (Iterator it = row_begin(i); it; ++it) { N index = it.index(); T value = it.value(); if (std::abs(value) > e) { bool found_entry = false; for (Iterator it_i = row_begin(index); it_i; ++it_i) { if (it_i.index() == i) { found_entry = true; if (std::abs(value - it_i.value()) > e) { flag = false; break; } } } if (!found_entry) flag = false; if (!flag) break; } } flags[i] = flag; } parallel_end for (N i = 0; i < matrix.size(); ++i) { if (!flags[i]) return false; } return true; } void expand() { if (offsets.empty()) return; for (N i = 1; i < n; i++) { if (offsets[i]) { RowEntry *ref = matrix[i]; matrix[i] = new RowEntry; *matrix[i] = *ref; for (N j = 0; j < (N)matrix[i]->index.size(); j++) { matrix[i]->index[j] += offsets[i]; } } } offsets.resize(0); } N column(N i) const { return empty(i) ? 0 : row_begin(i, row_nonzero_size(i) - 1).index(); } N getColumnSize() const { N max_column(0); auto column = [&](N i) { N max_column(0); for (Iterator it = row_begin(i); it; ++it) max_column = std::max(max_column, it.index()); return max_column + 1; }; for (N i = 0; i < n; i++) max_column = std::max(max_column, column(i)); return max_column; } N getNonzeroSize() const { N nonzeros(0); for (N i = 0; i < n; ++i) { nonzeros += row_nonzero_size(i); } return nonzeros; } class Iterator : std::iterator { public: Iterator(const RowEntry *rowEntry, N k, N offset) : rowEntry(rowEntry), k(k), offset(offset) { } operator bool() const { return rowEntry != nullptr && k < (N)rowEntry->index.size(); } Iterator &operator++() { ++k; return *this; } T value() const { return rowEntry->value[k]; } N index() const { return rowEntry->index[k] + offset; } N index_raw() const { return rowEntry->index[k]; } N size() const { return rowEntry == nullptr ? 0 : rowEntry->index.size(); } protected: const RowEntry *rowEntry; N k, offset; }; Iterator row_begin(N n, N k = 0) const { return Iterator(matrix[n], k, offsets.size() ? offsets[n] : 0); } class DynamicIterator : public Iterator { public: DynamicIterator(RowEntry *rowEntry, N k, N offset) : rowEntry(rowEntry), Iterator(rowEntry, k, offset) { } void setValue(T value) { rowEntry->value[Iterator::k] = value; } void setIndex(N index) { rowEntry->index[Iterator::k] = index; } protected: RowEntry *rowEntry; }; DynamicIterator dynamic_row_begin(N n, N k = 0) { N offset = offsets.size() ? offsets[n] : 0; if (offset) { printf("---- Warning ----\n"); printf("Dynamic iterator is not allowed for referenced rows.\n"); printf("You should be very careful otherwise this causes some bugs.\n"); printf( "We encourage you that you convert this row into a raw format, then loop over it...\n"); printf("-----------------\n"); exit(0); } return DynamicIterator(matrix[n], k, offset); } RCMatrix transpose(N rowsize = 0, unsigned expected_none_zeros = default_expected_none_zeros) const { if (!rowsize) rowsize = getColumnSize(); RCMatrix result(rowsize, expected_none_zeros); for (N i = 0; i < n; i++) { for (Iterator it = row_begin(i); it; ++it) result.fix_element(it.index(), i, it.value()); } return result; } RCMatrix getKtK() const { RCMatrix m = transpose(); RCMatrix result(n, expected_none_zeros); // Run in parallel parallel_for(result.n) { N i = parallel_index; for (Iterator it_A = m.row_begin(i); it_A; ++it_A) { N j = it_A.index(); assert(j < n); T a = it_A.value(); if (std::abs(a) > VECTOR_EPSILON) { for (Iterator it_B = row_begin(j); it_B; ++it_B) { // result.add_to_element(i,it_B.index(),it_B.value()*a); double value = it_B.value() * a; if (std::abs(value) > VECTOR_EPSILON) result.add_to_element(i, it_B.index(), value); } } } } parallel_end return result; } RCMatrix operator*(const RCMatrix &m) const { RCMatrix result(n, expected_none_zeros); // Run in parallel parallel_for(result.n) { N i = parallel_index; for (Iterator it_A = row_begin(i); it_A; ++it_A) { N j = it_A.index(); assert(j < m.n); T a = it_A.value(); if (std::abs(a) > VECTOR_EPSILON) { for (Iterator it_B = m.row_begin(j); it_B; ++it_B) { // result.add_to_element(i,it_B.index(),it_B.value()*a); double value = it_B.value() * a; if (std::abs(value) > VECTOR_EPSILON) result.add_to_element(i, it_B.index(), value); } } } } parallel_end return result; } RCMatrix sqrt() const { RCMatrix result(n, expected_none_zeros); // Run in parallel parallel_for(result.n) { N i = parallel_index; for (Iterator it_A = row_begin(i); it_A; ++it_A) { N j = it_A.index(); result.set_element(i, j, std::sqrt(it_A.value())); } } parallel_end return result; } RCMatrix operator*(const double k) const { RCMatrix result(n, expected_none_zeros); // Run in parallel parallel_for(result.n) { N i = parallel_index; for (Iterator it_A = row_begin(i); it_A; ++it_A) { N j = it_A.index(); result.add_to_element(i, j, it_A.value() * k); } } parallel_end return result; } RCMatrix applyKernel(const RCMatrix &kernel, const int nx, const int ny) const { RCMatrix result(n, expected_none_zeros); // find center position of kernel (half of kernel size) int kCols = kernel.n, kRows = kernel.n, rows = nx, cols = ny; int kCenterX = kCols / 2; int kCenterY = kRows / 2; // Run in parallel parallel_for(result.n) { N i = parallel_index; if (i >= rows) break; for (Iterator it_A = row_begin(i); it_A; ++it_A) { N j = it_A.index(); if (j >= cols) break; for (int m = 0; m < kRows; ++m) { // kernel rows int mm = kRows - 1 - m; // row index of flipped kernel for (int n = 0; n < kCols; ++n) { // kernel columns int nn = kCols - 1 - n; // column index of flipped kernel // index of input signal, used for checking boundary int ii = i + (m - kCenterY); int jj = j + (n - kCenterX); // ignore input samples which are out of bound if (ii >= 0 && ii < rows && jj >= 0 && jj < cols) result.add_to_element(i, j, (*this)(ii, jj) * kernel(mm, nn)); } } } } parallel_end return result; } RCMatrix applyHorizontalKernel(const RCMatrix &kernel, const int nx, const int ny) const { RCMatrix result(n, expected_none_zeros); // find center position of kernel (half of kernel size) int kCols = kernel.n, kRows = 1, rows = nx, cols = ny; int kCenterX = kCols / 2; int kCenterY = kRows / 2; // Run in parallel parallel_for(result.n) { N i = parallel_index; if (i >= rows) break; for (Iterator it_A = row_begin(i); it_A; ++it_A) { N j = it_A.index(); if (j >= cols) break; for (int m = 0; m < kRows; ++m) { // kernel rows int mm = kRows - 1 - m; // row index of flipped kernel for (int n = 0; n < kCols; ++n) { // kernel columns int nn = kCols - 1 - n; // column index of flipped kernel // index of input signal, used for checking boundary int ii = i + (m - kCenterY); int jj = j + (n - kCenterX); // ignore input samples which are out of bound if (ii >= 0 && ii < rows && jj >= 0 && jj < cols) result.add_to_element(i, j, (*this)(ii, jj) * kernel(mm, nn)); } } } } parallel_end return result; } RCMatrix applyVerticalKernel(const RCMatrix &kernel, const int nx, const int ny) const { RCMatrix result(n, expected_none_zeros); // find center position of kernel (half of kernel size) int kCols = 1, kRows = kernel.n, rows = nx, cols = ny; int kCenterX = kCols / 2; int kCenterY = kRows / 2; // Run in parallel parallel_for(result.n) { N i = parallel_index; if (i >= rows) break; for (Iterator it_A = row_begin(i); it_A; ++it_A) { N j = it_A.index(); if (j >= cols) break; for (int m = 0; m < kRows; ++m) { // kernel rows int mm = kRows - 1 - m; // row index of flipped kernel for (int n = 0; n < kCols; ++n) { // kernel columns int nn = kCols - 1 - n; // column index of flipped kernel // index of input signal, used for checking boundary int ii = i + (m - kCenterY); int jj = j + (n - kCenterX); // ignore input samples which are out of bound if (ii >= 0 && ii < rows && jj >= 0 && jj < cols) result.add_to_element(i, j, (*this)(ii, jj) * kernel(mm, nn)); } } } } parallel_end return result; } RCMatrix applySeparableKernel(const RCMatrix &kernelH, const RCMatrix &kernelV, const int nx, const int ny) const { return applyHorizontalKernel(kernelH, nx, ny).applyVerticalKernel(kernelV, nx, ny); } RCMatrix applySeparableKernelTwice(const RCMatrix &kernelH, const RCMatrix &kernelV, const int nx, const int ny) const { return applySeparableKernel(kernelH, kernelV, nx, ny) .applySeparableKernel(kernelH, kernelV, nx, ny); } std::vector operator*(const std::vector &rhs) const { std::vector result(n, 0.0); multiply(rhs, result); return result; } void multiply(const std::vector &rhs, std::vector &result) const { result.resize(n); for (N i = 0; i < n; i++) { T new_value = 0.0; for (Iterator it = row_begin(i); it; ++it) { N j_index = it.index(); assert(j_index < rhs.size()); new_value += rhs[j_index] * it.value(); } result[i] = new_value; } } RCMatrix operator+(const RCMatrix &m) const { RCMatrix A(*this); return std::move(A.add(m)); } RCMatrix &add(const RCMatrix &m) { if (m.n > n) resize(m.n); parallel_for(m.n) { N i = parallel_index; for (Iterator it = m.row_begin(i); it; ++it) { add_to_element(i, it.index(), it.value()); } } parallel_end return *this; } RCMatrix operator-(const RCMatrix &m) const { RCMatrix A(*this); return std::move(A.sub(m)); } RCMatrix &sub(const RCMatrix &m) { if (m.n > n) resize(m.n); parallel_for(m.n) { N i = parallel_index; for (Iterator it = m.row_begin(i); it; ++it) { add_to_element(i, it.index(), -it.value()); } } parallel_end return *this; } RCMatrix &replace(const RCMatrix &m, int rowInd, int colInd) { if (m.n > n) resize(m.n); parallel_for(m.n) { N i = parallel_index; for (Iterator it = m.row_begin(i); it; ++it) { set_element(i + rowInd, it.index() + colInd, it.value()); } } parallel_end return *this; } Real max_residual(const std::vector &lhs, const std::vector &rhs) const { std::vector r = operator*(lhs); Real max_residual = 0.0; for (N i = 0; i < rhs.size(); i++) { if (!empty(i)) max_residual = std::max(max_residual, std::abs(r[i] - rhs[i])); } return max_residual; } std::vector residual_vector(const std::vector &lhs, const std::vector &rhs) const { std::vector result = operator*(lhs); assert(result.size() == rhs.size()); for (N i = 0; i < result.size(); i++) { result[i] = std::abs(result[i] - rhs[i]); } return result; } T norm() const { T result(0.0); for (N i = 0; i < n; ++i) { for (Iterator it = row_begin(i); it; ++it) { result = std::max(result, std::abs(it.value())); } } return result; } T norm_L2_sqr() const { T result(0.0); for (N i = 0; i < n; ++i) { for (Iterator it = row_begin(i); it; ++it) { result += (it.value()) * (it.value()); } } return result; } void write_matlab(std::ostream &output, unsigned int rows, unsigned int columns, const char *variable_name) { output << variable_name << "=sparse(["; for (N i = 0; i < n; ++i) { if (matrix[i]) { const std::vector &index = matrix[i]->index; for (N j = 0; j < (N)index.size(); ++j) { output << i + 1 << " "; } } } output << "],...\n ["; for (N i = 0; i < n; ++i) { if (matrix[i]) { const std::vector &index = matrix[i]->index; for (N j = 0; j < (N)index.size(); ++j) { output << index[j] + (offsets.empty() ? 0 : offsets[i]) + 1 << " "; } } } output << "],...\n ["; for (N i = 0; i < n; ++i) { if (matrix[i]) { const std::vector &value = matrix[i]->value; for (N j = 0; j < value.size(); ++j) { output << value[j] << " "; } } } output << "], " << rows << ", " << columns << ");" << std::endl; }; void export_matlab(std::string filename, std::string name) { // Export this matrix std::ofstream file; file.open(filename.c_str()); write_matlab(file, n, getColumnSize(), name.c_str()); file.close(); } void print_readable(std::string name, bool printNonZero = true) { std::cout << name << " \n"; for (int i = 0; i < n; ++i) { for (int j = 0; j < n; ++j) { if (printNonZero) { if ((*this)(i, j) == 0) { std::cout << " ."; continue; } } else { if ((*this)(i, j) == 0) { continue; } } if ((*this)(i, j) >= 0) std::cout << " "; std::cout << " " << (*this)(i, j); } std::cout << " \n"; } } /// N n; N expected_none_zeros; std::vector matrix; std::vector offsets; }; template static inline RCMatrix operator*(const std::vector &diagonal, const RCMatrix &A) { RCMatrix result(A); parallel_for(result.n) { N row(parallel_index); for (auto it = result.dynamic_row_begin(row); it; ++it) { it.setValue(it.value() * diagonal[row]); } } parallel_end return std::move(result); } template struct RCFixedMatrix { std::vector rowstart; std::vector index; std::vector value; N n; N max_rowlength; // RCFixedMatrix() : n(0), max_rowlength(0) { } RCFixedMatrix(const RCMatrix &matrix) { n = matrix.n; rowstart.resize(n + 1); rowstart[0] = 0; max_rowlength = 0; for (N i = 0; i < n; i++) { if (!matrix.empty(i)) { rowstart[i + 1] = rowstart[i] + matrix.row_nonzero_size(i); max_rowlength = std::max(max_rowlength, rowstart[i + 1] - rowstart[i]); } else { rowstart[i + 1] = rowstart[i]; } } value.resize(rowstart[n]); index.resize(rowstart[n]); N j = 0; for (N i = 0; i < n; i++) { for (typename RCMatrix::Iterator it = matrix.row_begin(i); it; ++it) { value[j] = it.value(); index[j] = it.index(); ++j; } } } class Iterator : std::iterator { public: Iterator(N start, N end, const std::vector &index, const std::vector &value) : index_array(index), value_array(value), k(start), start(start), end(end) { } operator bool() const { return k < end; } Iterator &operator++() { ++k; return *this; } T value() const { return value_array[k]; } N index() const { return index_array[k]; } N size() const { return end - start; } private: const std::vector &index_array; const std::vector &value_array; N k, start, end; }; Iterator row_begin(N n) const { return Iterator(rowstart[n], rowstart[n + 1], index, value); } std::vector operator*(const std::vector &rhs) const { std::vector result(n, 0.0); multiply(rhs, result); return result; } void multiply(const std::vector &rhs, std::vector &result) const { result.resize(n); parallel_for(n) { N i = parallel_index; T new_value = 0.0; for (Iterator it = row_begin(i); it; ++it) { N j_index = it.index(); assert(j_index < rhs.size()); new_value += rhs[j_index] * it.value(); } result[i] = new_value; } parallel_end } RCMatrix operator*(const RCFixedMatrix &m) const { RCMatrix result(n, max_rowlength); // Run in parallel parallel_for(result.n) { N i = parallel_index; for (Iterator it_A = row_begin(i); it_A; ++it_A) { N j = it_A.index(); assert(j < m.n); T a = it_A.value(); if (std::abs(a) > VECTOR_EPSILON) { for (Iterator it_B = m.row_begin(j); it_B; ++it_B) { result.add_to_element(i, it_B.index(), it_B.value() * a); } } } } parallel_end return result; } RCMatrix toRCMatrix() const { RCMatrix result(n, 0); parallel_for(n) { N i = parallel_index; N size = rowstart[i + 1] - rowstart[i]; result.matrix[i] = new typename RCMatrix::RowEntry; result.matrix[i]->index.resize(size); result.matrix[i]->value.resize(size); for (N j = 0; j < size; j++) { result.matrix[i]->index[j] = index[rowstart[i] + j]; result.matrix[i]->value[j] = value[rowstart[i] + j]; } } parallel_end return result; } }; typedef RCMatrix Matrix; typedef RCFixedMatrix FixedMatrix; } #undef parallel_for #undef parallel_end #undef parallel_block #undef do_parallel #undef do_end #undef block_end #endif