From 8af51750b24ea037c5ad64641d79b05a60448d11 Mon Sep 17 00:00:00 2001 From: over0219 Date: Tue, 30 Jun 2020 17:07:29 -0500 Subject: working on mcgs --- extern/softbody/src/admmpd_linsolve.cpp | 393 ++++++++++++++++++++++++++++++-- 1 file changed, 369 insertions(+), 24 deletions(-) (limited to 'extern/softbody/src/admmpd_linsolve.cpp') diff --git a/extern/softbody/src/admmpd_linsolve.cpp b/extern/softbody/src/admmpd_linsolve.cpp index 5b3a62dc7bb..0d40c49ef4d 100644 --- a/extern/softbody/src/admmpd_linsolve.cpp +++ b/extern/softbody/src/admmpd_linsolve.cpp @@ -4,12 +4,154 @@ #include "admmpd_linsolve.h" #include #include +#include #include +#include #include "BLI_assert.h" +#include "BLI_task.h" namespace admmpd { using namespace Eigen; +// Makes full n3 x n3 matrix +static inline void make_n3( + const RowSparseMatrix &A, + RowSparseMatrix &A3) +{ + int na = A.rows(); + SparseMatrix A_cm = A; // A to CM + SparseMatrix A3_cm(na*3, na*3); + A3_cm.reserve(A_cm.nonZeros()*3); + int cols = A_cm.rows(); + for(int c=0; c::InnerIterator itA(A_cm,c); itA; ++itA) + A3_cm.insertBack((itA.row()*3+dim), col) = itA.value(); + } + } + A3_cm.finalize(); + A3_cm.makeCompressed(); + A3 = A3_cm; // to RowMajor +} // end make_n3 + +void ConjugateGradients::solve( + const Options *options, + SolverData *data) +{ + BLI_assert(data != NULL); + BLI_assert(options != NULL); + int nx = data->x.rows(); + BLI_assert(nx > 0); + BLI_assert(data->A.rows() == nx); + BLI_assert(data->A.cols() == nx); + BLI_assert(data->b.rows() == nx); + BLI_assert(data->C.cols() == nx*3); + BLI_assert(data->d.rows() > 0); + BLI_assert(data->C.rows() == data->d.rows()); + + // If we don't have any constraints, we don't need to perform CG + data->b.noalias() = data->M_xbar + data->DtW2*(data->z-data->u); + if (data->C.nonZeros()==0) + { + data->x = data->ldltA.solve(data->b); + return; + } + + // Resize data associated with Conjugate-Gradient + admmpd::SolverData::GlobalStepData *gsdata = &data->gsdata; + if (gsdata->r.rows() != nx*3) + { + gsdata->r.resize(nx*3); + gsdata->z.resize(nx*3); + gsdata->p.resize(nx*3); + gsdata->A3p.resize(nx*3); + gsdata->b3_plus_Ctd.resize(nx*3); + make_n3(data->A, gsdata->A3); + } + + gsdata->Ctd = data->spring_k * data->C.transpose()*data->d; + gsdata->CtC = data->spring_k * data->C.transpose()*data->C; + gsdata->A3_plus_CtC = gsdata->A3 + gsdata->CtC; + VectorXd x3(nx*3); + for (int i=0; ib.row(i); + gsdata->b3_plus_Ctd.segment<3>(i*3) = bi+gsdata->Ctd.segment<3>(i*3); + x3.segment<3>(i*3) = data->x.row(i); + } + + gsdata->r.noalias() = gsdata->b3_plus_Ctd - gsdata->A3_plus_CtC*x3; + solve_Ax_b(data,&gsdata->z,&gsdata->r); + gsdata->p = gsdata->z; + + for (int iter=0; itermax_cg_iters; ++iter) + { + gsdata->A3p.noalias() = gsdata->A3_plus_CtC*gsdata->p; + double p_dot_Ap = gsdata->p.dot(gsdata->A3p); + if (p_dot_Ap==0.0) + break; + + double zk_dot_rk = gsdata->z.dot(gsdata->r); + if (zk_dot_rk==0.0) + break; + + double alpha = zk_dot_rk / p_dot_Ap; + x3.noalias() += alpha * gsdata->p; + gsdata->r.noalias() -= alpha * gsdata->A3p; + double r_norm = gsdata->r.lpNorm(); + if (r_norm < options->min_res) + break; + + solve_Ax_b(data,&gsdata->z,&gsdata->r); + double zk1_dot_rk1 = gsdata->z.dot(gsdata->r); + double beta = zk1_dot_rk1 / zk_dot_rk; + gsdata->p = gsdata->z + beta*gsdata->p; + } + + for (int i=0; ix.row(i) = x3.segment<3>(i*3); + +} // end ConjugateGradients solve + +// Solve Ax = b in parallel and apply +// dimensionality mapping with temporaries +void ConjugateGradients::solve_Ax_b( + SolverData *data_, + VectorXd *x_, + VectorXd *b_) +{ + typedef struct LinSolveThreadData { + SolverData *data; + VectorXd *ls_x; + VectorXd *ls_b; + } LinSolveThreadData; + + auto parallel_lin_solve = []( + void *__restrict userdata, + const int i, + const TaskParallelTLS *__restrict UNUSED(tls))->void + { + LinSolveThreadData *td = (LinSolveThreadData*)userdata; + int nx = td->ls_x->rows()/3; + VectorXd b(nx); + for (int j=0; jls_b->operator[](j*3+i); + VectorXd x = td->data->ldltA.solve(b); + for (int j=0; jls_x->operator[](j*3+i) = x[j]; + }; + + LinSolveThreadData thread_data = {.data=data_, .ls_x=x_, .ls_b=b_}; + TaskParallelSettings settings; + BLI_parallel_range_settings_defaults(&settings); + BLI_task_parallel_range(0, 3, &thread_data, parallel_lin_solve, &settings); + +} // end solve Ax=b + void GaussSeidel::solve( const Options *options, SolverData *data) @@ -67,7 +209,6 @@ void GaussSeidel::solve( for (int j=0; j<3; ++j) xi_new[j] *= inv_aii[j]; data->x.row(idx) = xi*(1.0-omega) + xi_new*omega; - data->gsdata.last_dx.row(idx) = data->x.row(idx)-xi.transpose(); // TODO // We can also apply constraints here, like @@ -97,51 +238,255 @@ void GaussSeidel::init_solve( BLI_assert(data->b.rows()==nx); BLI_assert(data->b.cols()==data->x.cols()); - if (data->gsdata.last_dx.rows() != nx) + // Do we need to color the default colorings? + if (data->gsdata.A_colors.size() == 0) + compute_colors(&data->A, 1, data->gsdata.A_colors); + + // Verify color groups are correct { - data->gsdata.last_dx.resize(nx,3); - data->gsdata.last_dx.setZero(); + std::cout << "TESTING " << data->tets.rows() << " tets" << std::endl; + std::cout << "num colors: " << data->gsdata.A_colors.size() << std::endl; + int nt = data->tets.rows(); + int nc = data->gsdata.A_colors.size(); + for (int i=0; i &grp = data->gsdata.A_colors[i]; + int n_grp = grp.size(); + for (int j=0; jtets.row(k); + if (!in_tet(p_idx,t)) + continue; + + for (int l=0; lgsdata.A_colors.size() == 0 ) - compute_colors(&data->A,nullptr,data->gsdata.A_colors); + // Create large A if we haven't already. + if (data->gsdata.A3.rows() != nx*3) + make_n3(data->A, data->gsdata.A3); - // TODO: Eventually we'll replace KtK with the full-dof matrix. + // TODO + // Eventually we'll replace KtK with the full-dof matrix. // For now use z and test collisions against ground plane. - bool has_constraints = data->K[2].nonZeros()>0; - data->gsdata.KtK = data->K[2].transpose()*data->K[2]; + bool has_constraints = data->C.nonZeros()>0; - if (false)//(has_constraints) + // Finally, the new global matrix and rhs + if (has_constraints) { - (void)(has_constraints); - // TODO color A + KtK + data->gsdata.CtC = data->spring_k * data->C.transpose()*data->C; + data->gsdata.Ctd.noalias() = data->spring_k * data->C.transpose()*data->d; + data->gsdata.A3_plus_CtC = data->gsdata.A3 + data->gsdata.CtC; + data->gsdata.b3_plus_Ctd.resize(nx*3); + for (int i=0; igsdata.b3_plus_Ctd[i*3+0] = data->b(i,0)+data->gsdata.Ctd[i*3+0]; + data->gsdata.b3_plus_Ctd[i*3+1] = data->b(i,1)+data->gsdata.Ctd[i*3+1]; + data->gsdata.b3_plus_Ctd[i*3+2] = data->b(i,2)+data->gsdata.Ctd[i*3+2]; + } + compute_colors(&data->gsdata.A3_plus_CtC, 3, data->gsdata.A3_plus_CtC_colors); + } + else + { + data->gsdata.CtC.setZero(); + data->gsdata.Ctd.setZero(); + data->gsdata.A3_plus_CtC = data->gsdata.A3; + data->gsdata.b3_plus_Ctd.resize(nx*3); + for (int i=0; igsdata.b3_plus_Ctd[i*3+0] = data->b(i,0); + data->gsdata.b3_plus_Ctd[i*3+1] = data->b(i,1); + data->gsdata.b3_plus_Ctd[i*3+2] = data->b(i,2); + } + data->gsdata.A3_plus_CtC_colors = data->gsdata.A_colors; } } // end init solve +typedef struct GraphColorThreadData { + const RowSparseMatrix *A; + int stride; + std::vector > *adjacency; + std::vector > *palette; + int init_palette_size; + std::vector *conflict; + std::vector *node_colors; +} GraphColorThreadData; + +// Rehash of graph coloring from +// https://github.com/mattoverby/mclscene/blob/master/include/MCL/GraphColor.hpp void GaussSeidel::compute_colors( const RowSparseMatrix *A, - const RowSparseMatrix *KtK, // if null, just A + int stride, std::vector > &colors) { BLI_assert(A != nullptr); - if (KtK != nullptr) + BLI_assert(stride>0); + + // Graph color settings + int init_palette_size = 6; + + // Graph coloring tmp data + int n_nodes = A->rows()/stride; + std::vector > adjacency(n_nodes, std::vector()); + std::vector > palette(n_nodes, std::set()); + std::vector conflict(n_nodes,1); + std::vector node_colors(n_nodes,-1); + + // + // Step 1) + // Graph color initialization + // + auto parallel_init_graph = []( + void *__restrict userdata, + const int i, + const TaskParallelTLS *__restrict UNUSED(tls)) -> void { - throw std::runtime_error("**GaussSeidel::compute_colors TODO: KtK"); - } + GraphColorThreadData *td = (GraphColorThreadData*)userdata; + typedef RowSparseMatrix::InnerIterator InnerIter; + // Use lower trianglular portion of the matrix. + // That is, store adjacency inds that are lower than + // the current index. + for (InnerIter it(*(td->A),i*td->stride); it; ++it) + { + if (std::abs(it.value())==0.0) + continue; + if (it.col()/td->stride >= it.row()/td->stride) + break; + int idx = it.col()/td->stride; + td->adjacency->at(i).emplace_back(idx); + } + for( int j=0; jinit_palette_size; ++j ) // init colors + td->palette->at(i).insert(j); + }; + GraphColorThreadData thread_data = { + .A = A, .stride = stride, + .adjacency = &adjacency, + .palette = &palette, + .init_palette_size = init_palette_size, + .conflict = &conflict, + .node_colors = &node_colors }; + TaskParallelSettings settings; + BLI_parallel_range_settings_defaults(&settings); + BLI_task_parallel_range(0, n_nodes, &thread_data, parallel_init_graph, &settings); - colors.clear(); - int nx = A->rows(); + std::random_device rd; + std::mt19937 mt(rd()); + std::uniform_int_distribution dist(0, n_nodes); - { // DEBUGGING - colors.resize(nx,std::vector()); - for (int i=0; i node_queue(n_nodes); + std::iota(node_queue.begin(), node_queue.end(), 0); + int max_iters = n_nodes; + for (int rand_iter=0; n_nodes>0 && rand_iter < max_iters; ++rand_iter) + { + // Generate a random color and find conflicts + for (int i=0; i= idx) + throw std::runtime_error("GaussSeidel Error: Oops, not lower diag"); + if (adj_color==curr_color) + conflict[adj_idx] = 1; + } + } + + // Resolve conflicts and trim queue + std::set new_queue; + for (int i=0; i= (int)colors.size() ) + colors.emplace_back(std::vector()); + colors[color].emplace_back(i); } + // Remove empty color groups + for (std::vector >::iterator it = colors.begin(); it != colors.end();) + it->size() == 0 ? it = colors.erase(it) : it++; + } // end compute colors -} // namespace admmpd +} // namespace admmpd \ No newline at end of file -- cgit v1.2.3