diff options
Diffstat (limited to 'extern/mantaflow/helper/util/rcmatrix.h')
-rw-r--r-- | extern/mantaflow/helper/util/rcmatrix.h | 1112 |
1 files changed, 1112 insertions, 0 deletions
diff --git a/extern/mantaflow/helper/util/rcmatrix.h b/extern/mantaflow/helper/util/rcmatrix.h new file mode 100644 index 00000000000..39951cece2e --- /dev/null +++ b/extern/mantaflow/helper/util/rcmatrix.h @@ -0,0 +1,1112 @@ +// +// Helper matrix class, RCMatrix.h +// Required for PD optimizations (guiding) +// Thanks to Ryoichi Ando, and Robert Bridson +// + +#ifndef RCMATRIX3_H +#define RCMATRIX3_H + +#include <iterator> +#include <cassert> +#include <vector> +#include <fstream> + +// index type +#define int_index long long + +// link to omp & tbb for now +#if OPENMP == 1 || TBB == 1 +# define MANTA_ENABLE_PARALLEL 0 +// allow the preconditioner to be computed in parallel? (can lead to slightly non-deterministic +// results) +# define MANTA_ENABLE_PARALLEL_PC 0 +// use c++11 code? +# define MANTA_USE_CPP11 1 +#else +# define MANTA_ENABLE_PARALLEL 0 +# define MANTA_ENABLE_PARALLEL_PC 0 +# define MANTA_USE_CPP11 0 +#endif + +#if MANTA_ENABLE_PARALLEL == 1 +# include <thread> +# include <algorithm> + +static const int manta_num_threads = std::thread::hardware_concurrency(); + +// For clang +# define parallel_for(total_size) \ + { \ + int_index parallel_array_size = (total_size); \ + std::vector<std::thread> threads(manta_num_threads); \ + for (int thread_number = 0; thread_number < manta_num_threads; thread_number++) { \ + threads[thread_number] = std::thread([&](int_index parallel_array_size, int_index thread_number ) { \ + for( int_index parallel_index=thread_number; parallel_index < parallel_array_size; parallel_index += manta_num_threads ) { + +# define parallel_end \ + } \ + },parallel_array_size,thread_number); \ + } \ + for (auto &thread : threads) \ + thread.join(); \ + } + +# define parallel_block \ + { \ + std::vector<std::thread> threads; \ + { + +# define do_parallel threads.push_back( std::thread([&]() { +# define do_end \ + } ) ); + +# define block_end \ + } \ + for (auto &thread : threads) { \ + thread.join(); \ + } \ + } + +#else + +# define parallel_for(size) \ + { \ + int thread_number = 0; \ + int_index parallel_index = 0; \ + for (int_index parallel_index = 0; parallel_index < (int_index)size; parallel_index++) { +# define parallel_end \ + } \ + thread_number = parallel_index = 0; \ + } + +# define parallel_block +# define do_parallel +# define do_end +# define block_end + +#endif + +#include "vectorbase.h" + +namespace Manta { + +static const unsigned default_expected_none_zeros = 7; + +template<class N, class T> struct RCMatrix { + struct RowEntry { + std::vector<N> index; + std::vector<T> value; + }; + RCMatrix() : n(0), expected_none_zeros(default_expected_none_zeros) + { + } + RCMatrix(N size, N expected_none_zeros = default_expected_none_zeros) + : n(0), expected_none_zeros(expected_none_zeros) + { + resize(size); + } + RCMatrix(const RCMatrix &m) : n(0), expected_none_zeros(default_expected_none_zeros) + { + init(m); + } + RCMatrix &operator=(const RCMatrix &m) + { + expected_none_zeros = m.expected_none_zeros; + init(m); + return *this; + } + RCMatrix &operator=(RCMatrix &&m) + { + matrix = m.matrix; + offsets = m.offsets; + expected_none_zeros = m.expected_none_zeros; + n = m.n; + m.n = 0; + m.matrix.clear(); + m.offsets.clear(); + return *this; + } + RCMatrix(RCMatrix &&m) + : n(m.n), expected_none_zeros(m.expected_none_zeros), matrix(m.matrix), offsets(m.offsets) + { + m.n = 0; + m.matrix.clear(); + m.offsets.clear(); + } + void init(const RCMatrix &m) + { + expected_none_zeros = m.expected_none_zeros; + resize(m.n); + parallel_for(n) + { + N i = parallel_index; + if (m.matrix[i]) { + alloc_row(i); + matrix[i]->index = m.matrix[i]->index; + matrix[i]->value = m.matrix[i]->value; + } + else { + dealloc_row(i); + } + } + parallel_end + } + ~RCMatrix() + { + clear(); + } + void clear() + { + for (N i = 0; i < n; i++) { + dealloc_row(i); + matrix[i] = NULL; + if (offsets.size()) + offsets[i] = 0; + } + }; + bool empty(N i) const + { + return matrix[i] == NULL; + } + N row_nonzero_size(N i) const + { + return matrix[i] == NULL ? 0 : matrix[i]->index.size(); + } + void resize(N size, N expected_none_zeros = 0) + { + if (!expected_none_zeros) { + expected_none_zeros = this->expected_none_zeros; + } + if (n > size) { + // Shrinking + for (N i = size ? size - 1 : 0; i < n; i++) + dealloc_row(i); + matrix.resize(size); + } + else if (n < size) { + // Expanding + matrix.resize(size); + for (N i = n; i < size; i++) { + matrix[i] = NULL; + if (offsets.size()) + offsets[i] = 0; + } + } + n = size; + } + void alloc_row(N i) + { + assert(i < n); + if (!matrix[i]) { + matrix[i] = new RowEntry; + matrix[i]->index.reserve(expected_none_zeros); + matrix[i]->value.reserve(expected_none_zeros); + if (offsets.size()) + offsets[i] = 0; + } + } + void dealloc_row(N i) + { + assert(i < n); + if (matrix[i]) { + if (offsets.empty() || !offsets[i]) + delete matrix[i]; + matrix[i] = NULL; + if (offsets.size()) + offsets[i] = 0; + } + } + T operator()(N i, N j) const + { + assert(i < n); + for (Iterator it = row_begin(i); it; ++it) { + if (it.index() == j) + return it.value(); + } + return T(0.0); + } + void add_to_element_checked(N i, N j, T val) + { + if ((i < 0) || (j < 0) || (i >= n) || (j >= n)) + return; + add_to_element(i, j, val); + } + void add_to_element(N i, N j, T increment_value) + { + if (std::abs(increment_value) > VECTOR_EPSILON) { + assert(i < n); + assert(offsets.empty() || offsets[i] == 0); + alloc_row(i); + std::vector<N> &index = matrix[i]->index; + std::vector<T> &value = matrix[i]->value; + for (N k = 0; k < (N)index.size(); ++k) { + if (index[k] == j) { + value[k] += increment_value; + return; + } + else if (index[k] > j) { + index.insert(index.begin() + k, j); + value.insert(value.begin() + k, increment_value); + return; + } + } + index.push_back(j); + value.push_back(increment_value); + } + } + + void set_element(N i, N j, T v) + { + if (std::abs(v) > VECTOR_EPSILON) { + assert(i < n); + assert(offsets.empty() || offsets[i] == 0); + alloc_row(i); + std::vector<N> &index = matrix[i]->index; + std::vector<T> &value = matrix[i]->value; + for (N k = 0; k < (N)index.size(); ++k) { + if (index[k] == j) { + value[k] = v; + return; + } + else if (index[k] > j) { + index.insert(index.begin() + k, j); + value.insert(value.begin() + k, v); + return; + } + } + index.push_back(j); + value.push_back(v); + } + } + + // Make sure that j is the biggest column in the row, no duplication allowed + void fix_element(N i, N j, T v) + { + if (std::abs(v) > VECTOR_EPSILON) { + assert(i < n); + assert(offsets.empty() || offsets[i] == 0); + alloc_row(i); + std::vector<N> &index = matrix[i]->index; + std::vector<T> &value = matrix[i]->value; + index.push_back(j); + value.push_back(v); + } + } + int_index trim_zero_entries(double e = VECTOR_EPSILON) + { + std::vector<int_index> deleted_entries(n, 0); + parallel_for(n) + { + N i = parallel_index; + if (matrix[i]) { + std::vector<N> &index = matrix[i]->index; + std::vector<T> &value = matrix[i]->value; + N head = 0; + N k = 0; + for (k = 0; k < index.size(); ++k) { + if (std::abs(value[k]) > e) { + index[head] = index[k]; + value[head] = value[k]; + ++head; + } + } + if (head != k) { + index.erase(index.begin() + head, index.end()); + value.erase(value.begin() + head, value.end()); + deleted_entries[i] += k - head; + } + if (!offsets.size() && !head) { + remove_row(i); + } + } + } + parallel_end + // + int_index sum_deleted(0); + for (int_index i = 0; i < n; i++) + sum_deleted += deleted_entries[i]; + return sum_deleted; + } + void remove_reference(N i) + { + if (offsets.size() && offsets[i] && matrix[i]) { + RowEntry *save = matrix[i]; + matrix[i] = new RowEntry; + *matrix[i] = *save; + for (N &index : matrix[i]->index) + index += offsets[i]; + offsets[i] = 0; + } + } + void remove_row(N i) + { + dealloc_row(i); + } + bool is_symmetric(double e = VECTOR_EPSILON) const + { + std::vector<bool> flags(n, true); + parallel_for(n) + { + N i = parallel_index; + bool flag = true; + for (Iterator it = row_begin(i); it; ++it) { + N index = it.index(); + T value = it.value(); + if (std::abs(value) > e) { + bool found_entry = false; + for (Iterator it_i = row_begin(index); it_i; ++it_i) { + if (it_i.index() == i) { + found_entry = true; + if (std::abs(value - it_i.value()) > e) { + flag = false; + break; + } + } + } + if (!found_entry) + flag = false; + if (!flag) + break; + } + } + flags[i] = flag; + } + parallel_end for (N i = 0; i < matrix.size(); ++i) + { + if (!flags[i]) + return false; + } + return true; + } + + void expand() + { + if (offsets.empty()) + return; + for (N i = 1; i < n; i++) { + if (offsets[i]) { + RowEntry *ref = matrix[i]; + matrix[i] = new RowEntry; + *matrix[i] = *ref; + for (N j = 0; j < (N)matrix[i]->index.size(); j++) { + matrix[i]->index[j] += offsets[i]; + } + } + } + offsets.resize(0); + } + + N column(N i) const + { + return empty(i) ? 0 : row_begin(i, row_nonzero_size(i) - 1).index(); + } + N getColumnSize() const + { + N max_column(0); + auto column = [&](N i) { + N max_column(0); + for (Iterator it = row_begin(i); it; ++it) + max_column = std::max(max_column, it.index()); + return max_column + 1; + }; + for (N i = 0; i < n; i++) + max_column = std::max(max_column, column(i)); + return max_column; + } + N getNonzeroSize() const + { + N nonzeros(0); + for (N i = 0; i < n; ++i) { + nonzeros += row_nonzero_size(i); + } + return nonzeros; + } + class Iterator : std::iterator<std::input_iterator_tag, T> { + public: + Iterator(const RowEntry *rowEntry, N k, N offset) : rowEntry(rowEntry), k(k), offset(offset) + { + } + operator bool() const + { + return rowEntry != NULL && k < (N)rowEntry->index.size(); + } + Iterator &operator++() + { + ++k; + return *this; + } + T value() const + { + return rowEntry->value[k]; + } + N index() const + { + return rowEntry->index[k] + offset; + } + N index_raw() const + { + return rowEntry->index[k]; + } + N size() const + { + return rowEntry == NULL ? 0 : rowEntry->index.size(); + } + + protected: + const RowEntry *rowEntry; + N k, offset; + }; + Iterator row_begin(N n, N k = 0) const + { + return Iterator(matrix[n], k, offsets.size() ? offsets[n] : 0); + } + class DynamicIterator : public Iterator { + public: + DynamicIterator(RowEntry *rowEntry, N k, N offset) + : rowEntry(rowEntry), Iterator(rowEntry, k, offset) + { + } + void setValue(T value) + { + rowEntry->value[Iterator::k] = value; + } + void setIndex(N index) + { + rowEntry->index[Iterator::k] = index; + } + + protected: + RowEntry *rowEntry; + }; + DynamicIterator dynamic_row_begin(N n, N k = 0) + { + N offset = offsets.size() ? offsets[n] : 0; + if (offset) { + printf("---- Warning ----\n"); + printf("Dynamic iterator is not allowed for referenced rows.\n"); + printf("You should be very careful otherwise this causes some bugs.\n"); + printf( + "We encourage you that you convert this row into a raw format, then loop over it...\n"); + printf("-----------------\n"); + exit(0); + } + return DynamicIterator(matrix[n], k, offset); + } + RCMatrix transpose(N rowsize = 0, + unsigned expected_none_zeros = default_expected_none_zeros) const + { + if (!rowsize) + rowsize = getColumnSize(); + RCMatrix result(rowsize, expected_none_zeros); + for (N i = 0; i < n; i++) { + for (Iterator it = row_begin(i); it; ++it) + result.fix_element(it.index(), i, it.value()); + } +#if MANTA_USE_CPP11 == 1 + return std::move(result); +#else + return result; +#endif + } + + RCMatrix getKtK() const + { + RCMatrix m = transpose(); + RCMatrix result(n, expected_none_zeros); + // Run in parallel + parallel_for(result.n) + { + N i = parallel_index; + for (Iterator it_A = m.row_begin(i); it_A; ++it_A) { + N j = it_A.index(); + assert(j < n); + T a = it_A.value(); + if (std::abs(a) > VECTOR_EPSILON) { + for (Iterator it_B = row_begin(j); it_B; ++it_B) { + // result.add_to_element(i,it_B.index(),it_B.value()*a); + double value = it_B.value() * a; + if (std::abs(value) > VECTOR_EPSILON) + result.add_to_element(i, it_B.index(), value); + } + } + } + } + parallel_end +#if MANTA_USE_CPP11 == 1 + return std::move(result); +#else + return result; +#endif + } + + RCMatrix operator*(const RCMatrix &m) const + { + RCMatrix result(n, expected_none_zeros); + // Run in parallel + parallel_for(result.n) + { + N i = parallel_index; + for (Iterator it_A = row_begin(i); it_A; ++it_A) { + N j = it_A.index(); + assert(j < m.n); + T a = it_A.value(); + if (std::abs(a) > VECTOR_EPSILON) { + for (Iterator it_B = m.row_begin(j); it_B; ++it_B) { + // result.add_to_element(i,it_B.index(),it_B.value()*a); + double value = it_B.value() * a; + if (std::abs(value) > VECTOR_EPSILON) + result.add_to_element(i, it_B.index(), value); + } + } + } + } + parallel_end +#if MANTA_USE_CPP11 == 1 + return std::move(result); +#else + return result; +#endif + } + + RCMatrix sqrt() const + { + RCMatrix result(n, expected_none_zeros); + // Run in parallel + parallel_for(result.n) + { + N i = parallel_index; + for (Iterator it_A = row_begin(i); it_A; ++it_A) { + N j = it_A.index(); + result.set_element(i, j, std::sqrt(it_A.value())); + } + } + parallel_end +#if MANTA_USE_CPP11 == 1 + return std::move(result); +#else + return result; +#endif + } + + RCMatrix operator*(const double k) const + { + RCMatrix result(n, expected_none_zeros); + // Run in parallel + parallel_for(result.n) + { + N i = parallel_index; + for (Iterator it_A = row_begin(i); it_A; ++it_A) { + N j = it_A.index(); + result.add_to_element(i, j, it_A.value() * k); + } + } + parallel_end +#if MANTA_USE_CPP11 == 1 + return std::move(result); +#else + return result; +#endif + } + + RCMatrix applyKernel(const RCMatrix &kernel, const int nx, const int ny) const + { + RCMatrix result(n, expected_none_zeros); + // find center position of kernel (half of kernel size) + int kCols = kernel.n, kRows = kernel.n, rows = nx, cols = ny; + int kCenterX = kCols / 2; + int kCenterY = kRows / 2; + // Run in parallel + parallel_for(result.n) + { + N i = parallel_index; + if (i >= rows) + break; + for (Iterator it_A = row_begin(i); it_A; ++it_A) { + N j = it_A.index(); + if (j >= cols) + break; + for (int m = 0; m < kRows; ++m) { // kernel rows + int mm = kRows - 1 - m; // row index of flipped kernel + for (int n = 0; n < kCols; ++n) { // kernel columns + int nn = kCols - 1 - n; // column index of flipped kernel + // index of input signal, used for checking boundary + int ii = i + (m - kCenterY); + int jj = j + (n - kCenterX); + // ignore input samples which are out of bound + if (ii >= 0 && ii < rows && jj >= 0 && jj < cols) + result.add_to_element(i, j, (*this)(ii, jj) * kernel(mm, nn)); + } + } + } + } + parallel_end +#if MANTA_USE_CPP11 == 1 + return std::move(result); +#else + return result; +#endif + } + + RCMatrix applyHorizontalKernel(const RCMatrix &kernel, const int nx, const int ny) const + { + RCMatrix result(n, expected_none_zeros); + // find center position of kernel (half of kernel size) + int kCols = kernel.n, kRows = 1, rows = nx, cols = ny; + int kCenterX = kCols / 2; + int kCenterY = kRows / 2; + // Run in parallel + parallel_for(result.n) + { + N i = parallel_index; + if (i >= rows) + break; + for (Iterator it_A = row_begin(i); it_A; ++it_A) { + N j = it_A.index(); + if (j >= cols) + break; + for (int m = 0; m < kRows; ++m) { // kernel rows + int mm = kRows - 1 - m; // row index of flipped kernel + for (int n = 0; n < kCols; ++n) { // kernel columns + int nn = kCols - 1 - n; // column index of flipped kernel + // index of input signal, used for checking boundary + int ii = i + (m - kCenterY); + int jj = j + (n - kCenterX); + // ignore input samples which are out of bound + if (ii >= 0 && ii < rows && jj >= 0 && jj < cols) + result.add_to_element(i, j, (*this)(ii, jj) * kernel(mm, nn)); + } + } + } + } + parallel_end +#if MANTA_USE_CPP11 == 1 + return std::move(result); +#else + return result; +#endif + } + + RCMatrix applyVerticalKernel(const RCMatrix &kernel, const int nx, const int ny) const + { + RCMatrix result(n, expected_none_zeros); + // find center position of kernel (half of kernel size) + int kCols = 1, kRows = kernel.n, rows = nx, cols = ny; + int kCenterX = kCols / 2; + int kCenterY = kRows / 2; + // Run in parallel + parallel_for(result.n) + { + N i = parallel_index; + if (i >= rows) + break; + for (Iterator it_A = row_begin(i); it_A; ++it_A) { + N j = it_A.index(); + if (j >= cols) + break; + for (int m = 0; m < kRows; ++m) { // kernel rows + int mm = kRows - 1 - m; // row index of flipped kernel + for (int n = 0; n < kCols; ++n) { // kernel columns + int nn = kCols - 1 - n; // column index of flipped kernel + // index of input signal, used for checking boundary + int ii = i + (m - kCenterY); + int jj = j + (n - kCenterX); + // ignore input samples which are out of bound + if (ii >= 0 && ii < rows && jj >= 0 && jj < cols) + result.add_to_element(i, j, (*this)(ii, jj) * kernel(mm, nn)); + } + } + } + } + parallel_end +#if MANTA_USE_CPP11 == 1 + return std::move(result); +#else + return result; +#endif + } + + RCMatrix applySeparableKernel(const RCMatrix &kernelH, + const RCMatrix &kernelV, + const int nx, + const int ny) const + { + return applyHorizontalKernel(kernelH, nx, ny).applyVerticalKernel(kernelV, nx, ny); + } + + RCMatrix applySeparableKernelTwice(const RCMatrix &kernelH, + const RCMatrix &kernelV, + const int nx, + const int ny) const + { + return applySeparableKernel(kernelH, kernelV, nx, ny) + .applySeparableKernel(kernelH, kernelV, nx, ny); + } + + std::vector<T> operator*(const std::vector<T> &rhs) const + { + std::vector<T> result(n, 0.0); + multiply(rhs, result); +#if MANTA_USE_CPP11 == 1 + return std::move(result); +#else + return result; +#endif + } + void multiply(const std::vector<T> &rhs, std::vector<T> &result) const + { + result.resize(n); + for (N i = 0; i < n; i++) { + T new_value = 0.0; + for (Iterator it = row_begin(i); it; ++it) { + N j_index = it.index(); + assert(j_index < rhs.size()); + new_value += rhs[j_index] * it.value(); + } + result[i] = new_value; + } + } + RCMatrix operator+(const RCMatrix &m) const + { + RCMatrix A(*this); + return std::move(A.add(m)); + } + RCMatrix &add(const RCMatrix &m) + { + if (m.n > n) + resize(m.n); + parallel_for(m.n) + { + N i = parallel_index; + for (Iterator it = m.row_begin(i); it; ++it) { + add_to_element(i, it.index(), it.value()); + } + } + parallel_end return *this; + } + RCMatrix operator-(const RCMatrix &m) const + { + RCMatrix A(*this); + return std::move(A.sub(m)); + } + RCMatrix &sub(const RCMatrix &m) + { + if (m.n > n) + resize(m.n); + parallel_for(m.n) + { + N i = parallel_index; + for (Iterator it = m.row_begin(i); it; ++it) { + add_to_element(i, it.index(), -it.value()); + } + } + parallel_end return *this; + } + RCMatrix &replace(const RCMatrix &m, int rowInd, int colInd) + { + if (m.n > n) + resize(m.n); + parallel_for(m.n) + { + N i = parallel_index; + for (Iterator it = m.row_begin(i); it; ++it) { + set_element(i + rowInd, it.index() + colInd, it.value()); + } + } + parallel_end return *this; + } + Real max_residual(const std::vector<T> &lhs, const std::vector<T> &rhs) const + { + std::vector<T> r = operator*(lhs); + Real max_residual = 0.0; + for (N i = 0; i < rhs.size(); i++) { + if (!empty(i)) + max_residual = std::max(max_residual, std::abs(r[i] - rhs[i])); + } + return max_residual; + } + std::vector<T> residual_vector(const std::vector<T> &lhs, const std::vector<T> &rhs) const + { + std::vector<T> result = operator*(lhs); + assert(result.size() == rhs.size()); + for (N i = 0; i < result.size(); i++) { + result[i] = std::abs(result[i] - rhs[i]); + } +#if MANTA_USE_CPP11 == 1 + return std::move(result); +#else + return result; +#endif + } + T norm() const + { + T result(0.0); + for (N i = 0; i < n; ++i) { + for (Iterator it = row_begin(i); it; ++it) { + result = std::max(result, std::abs(it.value())); + } + } + return result; + } + + T norm_L2_sqr() const + { + T result(0.0); + for (N i = 0; i < n; ++i) { + for (Iterator it = row_begin(i); it; ++it) { + result += (it.value()) * (it.value()); + } + } + return result; + } + + void write_matlab(std::ostream &output, + unsigned int rows, + unsigned int columns, + const char *variable_name) + { + output << variable_name << "=sparse(["; + for (N i = 0; i < n; ++i) { + if (matrix[i]) { + const std::vector<N> &index = matrix[i]->index; + for (N j = 0; j < (N)index.size(); ++j) { + output << i + 1 << " "; + } + } + } + output << "],...\n ["; + for (N i = 0; i < n; ++i) { + if (matrix[i]) { + const std::vector<N> &index = matrix[i]->index; + for (N j = 0; j < (N)index.size(); ++j) { + output << index[j] + (offsets.empty() ? 0 : offsets[i]) + 1 << " "; + } + } + } + output << "],...\n ["; + for (N i = 0; i < n; ++i) { + if (matrix[i]) { + const std::vector<T> &value = matrix[i]->value; + for (N j = 0; j < value.size(); ++j) { + output << value[j] << " "; + } + } + } + output << "], " << rows << ", " << columns << ");" << std::endl; + }; + void export_matlab(std::string filename, std::string name) + { + // Export this matrix + std::ofstream file; + file.open(filename.c_str()); + write_matlab(file, n, getColumnSize(), name.c_str()); + file.close(); + } + void print_readable(std::string name, bool printNonZero = true) + { + std::cout << name << " \n"; + for (int i = 0; i < n; ++i) { + for (int j = 0; j < n; ++j) { + if (printNonZero) { + if ((*this)(i, j) == 0) { + std::cout << " ."; + continue; + } + } + else { + if ((*this)(i, j) == 0) { + continue; + } + } + + if ((*this)(i, j) >= 0) + std::cout << " "; + std::cout << " " << (*this)(i, j); + } + std::cout << " \n"; + } + } + /// + N n; + N expected_none_zeros; + std::vector<RowEntry *> matrix; + std::vector<int> offsets; +}; + +template<class N, class T> +static inline RCMatrix<N, T> operator*(const std::vector<T> &diagonal, const RCMatrix<N, T> &A) +{ + RCMatrix<N, T> result(A); + parallel_for(result.n) + { + N row(parallel_index); + for (auto it = result.dynamic_row_begin(row); it; ++it) { + it.setValue(it.value() * diagonal[row]); + } + } + parallel_end return std::move(result); +} + +template<class N, class T> struct RCFixedMatrix { + std::vector<N> rowstart; + std::vector<N> index; + std::vector<T> value; + N n; + N max_rowlength; + // + RCFixedMatrix() : n(0), max_rowlength(0) + { + } + RCFixedMatrix(const RCMatrix<N, T> &matrix) + { + n = matrix.n; + rowstart.resize(n + 1); + rowstart[0] = 0; + max_rowlength = 0; + for (N i = 0; i < n; i++) { + if (!matrix.empty(i)) { + rowstart[i + 1] = rowstart[i] + matrix.row_nonzero_size(i); + max_rowlength = std::max(max_rowlength, rowstart[i + 1] - rowstart[i]); + } + else { + rowstart[i + 1] = rowstart[i]; + } + } + value.resize(rowstart[n]); + index.resize(rowstart[n]); + N j = 0; + for (N i = 0; i < n; i++) { + for (typename RCMatrix<N, T>::Iterator it = matrix.row_begin(i); it; ++it) { + value[j] = it.value(); + index[j] = it.index(); + ++j; + } + } + } + class Iterator : std::iterator<std::input_iterator_tag, T> { + public: + Iterator(N start, N end, const std::vector<N> &index, const std::vector<T> &value) + : index_array(index), value_array(value), k(start), start(start), end(end) + { + } + operator bool() const + { + return k < end; + } + Iterator &operator++() + { + ++k; + return *this; + } + T value() const + { + return value_array[k]; + } + N index() const + { + return index_array[k]; + } + N size() const + { + return end - start; + } + + private: + const std::vector<N> &index_array; + const std::vector<T> &value_array; + N k, start, end; + }; + Iterator row_begin(N n) const + { + return Iterator(rowstart[n], rowstart[n + 1], index, value); + } + std::vector<T> operator*(const std::vector<T> &rhs) const + { + std::vector<T> result(n, 0.0); + multiply(rhs, result); +#if MANTA_USE_CPP11 == 1 + return std::move(result); +#else + return result; +#endif + } + void multiply(const std::vector<T> &rhs, std::vector<T> &result) const + { + result.resize(n); + parallel_for(n) + { + N i = parallel_index; + T new_value = 0.0; + for (Iterator it = row_begin(i); it; ++it) { + N j_index = it.index(); + assert(j_index < rhs.size()); + new_value += rhs[j_index] * it.value(); + } + result[i] = new_value; + } + parallel_end + } + RCMatrix<N, T> operator*(const RCFixedMatrix &m) const + { + RCMatrix<N, T> result(n, max_rowlength); + // Run in parallel + parallel_for(result.n) + { + N i = parallel_index; + for (Iterator it_A = row_begin(i); it_A; ++it_A) { + N j = it_A.index(); + assert(j < m.n); + T a = it_A.value(); + if (std::abs(a) > VECTOR_EPSILON) { + for (Iterator it_B = m.row_begin(j); it_B; ++it_B) { + result.add_to_element(i, it_B.index(), it_B.value() * a); + } + } + } + } + parallel_end +#if MANTA_USE_CPP11 == 1 + return std::move(result); +#else + return result; +#endif + } + + RCMatrix<N, T> toRCMatrix() const + { + RCMatrix<N, T> result(n, 0); + parallel_for(n) + { + N i = parallel_index; + N size = rowstart[i + 1] - rowstart[i]; + result.matrix[i] = new typename RCMatrix<N, T>::RowEntry; + result.matrix[i]->index.resize(size); + result.matrix[i]->value.resize(size); + for (N j = 0; j < size; j++) { + result.matrix[i]->index[j] = index[rowstart[i] + j]; + result.matrix[i]->value[j] = value[rowstart[i] + j]; + } + } + parallel_end +#if MANTA_USE_CPP11 == 1 + return std::move(result); +#else + return result; +#endif + } +}; + +typedef RCMatrix<int, Real> Matrix; +typedef RCFixedMatrix<int, Real> FixedMatrix; + +} // namespace Manta + +#undef parallel_for +#undef parallel_end + +#undef parallel_block +#undef do_parallel +#undef do_end +#undef block_end + +#endif |