From 8af51750b24ea037c5ad64641d79b05a60448d11 Mon Sep 17 00:00:00 2001 From: over0219 Date: Tue, 30 Jun 2020 17:07:29 -0500 Subject: working on mcgs --- extern/softbody/src/admmpd_collision.cpp | 94 ++++--- extern/softbody/src/admmpd_collision.h | 23 +- extern/softbody/src/admmpd_embeddedmesh.cpp | 12 +- extern/softbody/src/admmpd_embeddedmesh.h | 3 +- extern/softbody/src/admmpd_linsolve.cpp | 393 ++++++++++++++++++++++++++-- extern/softbody/src/admmpd_linsolve.h | 23 +- extern/softbody/src/admmpd_solver.cpp | 171 ++---------- extern/softbody/src/admmpd_solver.h | 6 - extern/softbody/src/admmpd_types.h | 35 +-- 9 files changed, 496 insertions(+), 264 deletions(-) (limited to 'extern/softbody/src') diff --git a/extern/softbody/src/admmpd_collision.cpp b/extern/softbody/src/admmpd_collision.cpp index bbda72f2428..b7e835a9185 100644 --- a/extern/softbody/src/admmpd_collision.cpp +++ b/extern/softbody/src/admmpd_collision.cpp @@ -20,8 +20,7 @@ VFCollisionPair::VFCollisionPair() : p_is_obs(0), // 0 or 1 q_idx(-1), // face q_is_obs(0), // 0 or 1 - pt_on_q(0,0,0), - q_n(0,0,0) + pt_on_q(0,0,0) {} void Collision::set_obstacles( @@ -89,7 +88,6 @@ void Collision::detect_discrete_vf( pair.q_idx = -1; pair.q_is_obs = 1; pair.pt_on_q = Vector3d(pt[0],pt[1],floor_z); - pair.q_n = Vector3d(0,0,1); } // Any faces to detect against? @@ -133,18 +131,17 @@ void Collision::detect_discrete_vf( pair.pt_on_q = find_nearest_tri.output.pt_on_tri; // Compute face normal - BLI_assert(pair.q_idx >= 0); - BLI_assert(pair.q_idx < mesh_tris->rows()); - RowVector3i tri_inds = mesh_tris->row(pair.q_idx); - BLI_assert(tri_inds[0]>=0 && tri_inds[0]rows()); - BLI_assert(tri_inds[1]>=0 && tri_inds[1]rows()); - BLI_assert(tri_inds[2]>=0 && tri_inds[2]rows()); - Vector3d tri[3] = { - mesh_x->row(tri_inds[0]), - mesh_x->row(tri_inds[1]), - mesh_x->row(tri_inds[2]) - }; - pair.q_n = ((tri[1]-tri[0]).cross(tri[2]-tri[0])).normalized(); +// BLI_assert(pair.q_idx >= 0); +// BLI_assert(pair.q_idx < mesh_tris->rows()); +// RowVector3i tri_inds = mesh_tris->row(pair.q_idx); +// BLI_assert(tri_inds[0]>=0 && tri_inds[0]rows()); +// BLI_assert(tri_inds[1]>=0 && tri_inds[1]rows()); +// BLI_assert(tri_inds[2]>=0 && tri_inds[2]rows()); +// Vector3d tri[3] = { +// mesh_x->row(tri_inds[0]), +// mesh_x->row(tri_inds[1]), +// mesh_x->row(tri_inds[2]) +// }; // std::stringstream ss; // ss << "\nhit:" << @@ -247,12 +244,10 @@ int EmbeddedMeshCollision::detect( return vf_pairs.size(); } // end detect -void EmbeddedMeshCollision::jacobian( +void EmbeddedMeshCollision::linearize( const Eigen::MatrixXd *x, - std::vector > *trips_x, - std::vector > *trips_y, - std::vector > *trips_z, - std::vector *l) + std::vector > *trips, + std::vector *d) { BLI_assert(x != NULL); BLI_assert(x->cols() == 3); @@ -261,16 +256,45 @@ void EmbeddedMeshCollision::jacobian( if (np==0) return; - l->reserve((int)l->size() + np); - trips_x->reserve((int)trips_x->size() + np*4); - trips_y->reserve((int)trips_y->size() + np*4); - trips_z->reserve((int)trips_z->size() + np*4); + d->reserve((int)d->size() + np); + trips->reserve((int)trips->size() + np*3*4); for (int i=0; i q_tris; + if (pair.q_is_obs) + { + // Special case, floor + if (pair.q_idx == -1) + { + q_n = Vector3d(0,0,1); + } + else + { + q_inds = obsdata.F.row(pair.q_idx); + q_tris = { + obsdata.V1.row(q_inds[0]), + obsdata.V1.row(q_inds[1]), + obsdata.V1.row(q_inds[2]) + }; + q_n = ((q_tris[1]-q_tris[0]).cross(q_tris[2]-q_tris[0])); + q_n.normalize(); + } + } + // TODO: obstacle point intersecting mesh if (pair.p_is_obs) { @@ -283,24 +307,14 @@ void EmbeddedMeshCollision::jacobian( RowVector4d bary = mesh->barys.row(emb_p_idx); int tet_idx = mesh->vtx_to_tet[emb_p_idx]; RowVector4i tet = mesh->tets.row(tet_idx); - - // Okay this is pretty ugly. I'm going to have to think about - // whether or not this is reasonable. - for (int j=0; j<3; ++j) + int c_idx = d->size(); + d->emplace_back(q_n.dot(pair.pt_on_q)); + for (int j=0; j<4; ++j) { - int c_idx = l->size(); - for (int k=0; k<4; ++k) - { - if (j==0) - trips_x->emplace_back(c_idx, tet[k], bary[k]*pair.q_n[0]); - if (j==1) - trips_y->emplace_back(c_idx, tet[k], bary[k]*pair.q_n[1]); - if (j==2) - trips_z->emplace_back(c_idx, tet[k], bary[k]*pair.q_n[2]); - } - l->emplace_back(pair.pt_on_q[j]*pair.q_n[j]); + trips->emplace_back(c_idx, tet[j]*3+0, bary[j]*q_n[0]); + trips->emplace_back(c_idx, tet[j]*3+1, bary[j]*q_n[1]); + trips->emplace_back(c_idx, tet[j]*3+2, bary[j]*q_n[2]); } - continue; } diff --git a/extern/softbody/src/admmpd_collision.h b/extern/softbody/src/admmpd_collision.h index f9c6363b89f..2ab7d343c05 100644 --- a/extern/softbody/src/admmpd_collision.h +++ b/extern/softbody/src/admmpd_collision.h @@ -52,15 +52,11 @@ public: // Special case for floor since it's common. virtual void set_floor(double z) = 0; - // Linearize the constraints and return Jacobian tensor. - // Constraints are linearized about x for constraint - // K x = l - virtual void jacobian( + // Linearize the constraints and return Jacobian. + virtual void linearize( const Eigen::MatrixXd *x, - std::vector > *trips_x, - std::vector > *trips_y, - std::vector > *trips_z, - std::vector *l) = 0; + std::vector > *trips, + std::vector *d) = 0; // Given a point and a surface mesh, // perform discrete collision and create @@ -83,7 +79,8 @@ class EmbeddedMeshCollision : public Collision { public: EmbeddedMeshCollision(const EmbeddedMeshData *mesh_) : mesh(mesh_), - floor_z(-std::numeric_limits::max()) +// floor_z(-std::numeric_limits::max()) + floor_z(0) {} // A floor is so common that it makes sense to hard @@ -100,12 +97,10 @@ public: // Linearizes the collision pairs about x // for the constraint Kx=l - void jacobian( + void linearize( const Eigen::MatrixXd *x, - std::vector > *trips_x, - std::vector > *trips_y, - std::vector > *trips_z, - std::vector *l); + std::vector > *trips, + std::vector *d); protected: // A ptr to the embedded mesh data diff --git a/extern/softbody/src/admmpd_embeddedmesh.cpp b/extern/softbody/src/admmpd_embeddedmesh.cpp index 68cd38b27d5..22ac59fef0c 100644 --- a/extern/softbody/src/admmpd_embeddedmesh.cpp +++ b/extern/softbody/src/admmpd_embeddedmesh.cpp @@ -67,7 +67,8 @@ bool EmbeddedMesh::generate( const Eigen::MatrixXd &V, // embedded verts const Eigen::MatrixXi &F, // embedded faces EmbeddedMeshData *emb_mesh, // where embedding is stored - Eigen::MatrixXd *x_tets) // lattice vertices, n x 3 + Eigen::MatrixXd *x_tets, // lattice vertices, n x 3 + bool trim_lattice) { // How big the grid cells are as a fraction // of the total mesh. @@ -191,9 +192,12 @@ bool EmbeddedMesh::generate( .tets = &tets, .keep_tet = &keep_tet }; - TaskParallelSettings settings; - BLI_parallel_range_settings_defaults(&settings); - BLI_task_parallel_range(0, nt0, &thread_data, parallel_keep_tet, &settings); + if (trim_lattice) + { + TaskParallelSettings settings; + BLI_parallel_range_settings_defaults(&settings); + BLI_task_parallel_range(0, nt0, &thread_data, parallel_keep_tet, &settings); + } // Loop over tets and remove as needed. // Mark referenced vertices to compute a mapping. diff --git a/extern/softbody/src/admmpd_embeddedmesh.h b/extern/softbody/src/admmpd_embeddedmesh.h index 85c64c7b0a5..eb7583b93b4 100644 --- a/extern/softbody/src/admmpd_embeddedmesh.h +++ b/extern/softbody/src/admmpd_embeddedmesh.h @@ -15,7 +15,8 @@ public: const Eigen::MatrixXd &V, // embedded verts const Eigen::MatrixXi &F, // embedded faces EmbeddedMeshData *emb_mesh, // where embedding is stored - Eigen::MatrixXd *x_tets); // lattice vertices, n x 3 + Eigen::MatrixXd *x_tets, // lattice vertices, n x 3 + bool trim_lattice = true); // remove elements outside embedded volume // Returns the vtx mapped from x/v and tets Eigen::Vector3d get_mapped_vertex( diff --git a/extern/softbody/src/admmpd_linsolve.cpp b/extern/softbody/src/admmpd_linsolve.cpp index 5b3a62dc7bb..0d40c49ef4d 100644 --- a/extern/softbody/src/admmpd_linsolve.cpp +++ b/extern/softbody/src/admmpd_linsolve.cpp @@ -4,12 +4,154 @@ #include "admmpd_linsolve.h" #include #include +#include #include +#include #include "BLI_assert.h" +#include "BLI_task.h" namespace admmpd { using namespace Eigen; +// Makes full n3 x n3 matrix +static inline void make_n3( + const RowSparseMatrix &A, + RowSparseMatrix &A3) +{ + int na = A.rows(); + SparseMatrix A_cm = A; // A to CM + SparseMatrix A3_cm(na*3, na*3); + A3_cm.reserve(A_cm.nonZeros()*3); + int cols = A_cm.rows(); + for(int c=0; c::InnerIterator itA(A_cm,c); itA; ++itA) + A3_cm.insertBack((itA.row()*3+dim), col) = itA.value(); + } + } + A3_cm.finalize(); + A3_cm.makeCompressed(); + A3 = A3_cm; // to RowMajor +} // end make_n3 + +void ConjugateGradients::solve( + const Options *options, + SolverData *data) +{ + BLI_assert(data != NULL); + BLI_assert(options != NULL); + int nx = data->x.rows(); + BLI_assert(nx > 0); + BLI_assert(data->A.rows() == nx); + BLI_assert(data->A.cols() == nx); + BLI_assert(data->b.rows() == nx); + BLI_assert(data->C.cols() == nx*3); + BLI_assert(data->d.rows() > 0); + BLI_assert(data->C.rows() == data->d.rows()); + + // If we don't have any constraints, we don't need to perform CG + data->b.noalias() = data->M_xbar + data->DtW2*(data->z-data->u); + if (data->C.nonZeros()==0) + { + data->x = data->ldltA.solve(data->b); + return; + } + + // Resize data associated with Conjugate-Gradient + admmpd::SolverData::GlobalStepData *gsdata = &data->gsdata; + if (gsdata->r.rows() != nx*3) + { + gsdata->r.resize(nx*3); + gsdata->z.resize(nx*3); + gsdata->p.resize(nx*3); + gsdata->A3p.resize(nx*3); + gsdata->b3_plus_Ctd.resize(nx*3); + make_n3(data->A, gsdata->A3); + } + + gsdata->Ctd = data->spring_k * data->C.transpose()*data->d; + gsdata->CtC = data->spring_k * data->C.transpose()*data->C; + gsdata->A3_plus_CtC = gsdata->A3 + gsdata->CtC; + VectorXd x3(nx*3); + for (int i=0; ib.row(i); + gsdata->b3_plus_Ctd.segment<3>(i*3) = bi+gsdata->Ctd.segment<3>(i*3); + x3.segment<3>(i*3) = data->x.row(i); + } + + gsdata->r.noalias() = gsdata->b3_plus_Ctd - gsdata->A3_plus_CtC*x3; + solve_Ax_b(data,&gsdata->z,&gsdata->r); + gsdata->p = gsdata->z; + + for (int iter=0; itermax_cg_iters; ++iter) + { + gsdata->A3p.noalias() = gsdata->A3_plus_CtC*gsdata->p; + double p_dot_Ap = gsdata->p.dot(gsdata->A3p); + if (p_dot_Ap==0.0) + break; + + double zk_dot_rk = gsdata->z.dot(gsdata->r); + if (zk_dot_rk==0.0) + break; + + double alpha = zk_dot_rk / p_dot_Ap; + x3.noalias() += alpha * gsdata->p; + gsdata->r.noalias() -= alpha * gsdata->A3p; + double r_norm = gsdata->r.lpNorm(); + if (r_norm < options->min_res) + break; + + solve_Ax_b(data,&gsdata->z,&gsdata->r); + double zk1_dot_rk1 = gsdata->z.dot(gsdata->r); + double beta = zk1_dot_rk1 / zk_dot_rk; + gsdata->p = gsdata->z + beta*gsdata->p; + } + + for (int i=0; ix.row(i) = x3.segment<3>(i*3); + +} // end ConjugateGradients solve + +// Solve Ax = b in parallel and apply +// dimensionality mapping with temporaries +void ConjugateGradients::solve_Ax_b( + SolverData *data_, + VectorXd *x_, + VectorXd *b_) +{ + typedef struct LinSolveThreadData { + SolverData *data; + VectorXd *ls_x; + VectorXd *ls_b; + } LinSolveThreadData; + + auto parallel_lin_solve = []( + void *__restrict userdata, + const int i, + const TaskParallelTLS *__restrict UNUSED(tls))->void + { + LinSolveThreadData *td = (LinSolveThreadData*)userdata; + int nx = td->ls_x->rows()/3; + VectorXd b(nx); + for (int j=0; jls_b->operator[](j*3+i); + VectorXd x = td->data->ldltA.solve(b); + for (int j=0; jls_x->operator[](j*3+i) = x[j]; + }; + + LinSolveThreadData thread_data = {.data=data_, .ls_x=x_, .ls_b=b_}; + TaskParallelSettings settings; + BLI_parallel_range_settings_defaults(&settings); + BLI_task_parallel_range(0, 3, &thread_data, parallel_lin_solve, &settings); + +} // end solve Ax=b + void GaussSeidel::solve( const Options *options, SolverData *data) @@ -67,7 +209,6 @@ void GaussSeidel::solve( for (int j=0; j<3; ++j) xi_new[j] *= inv_aii[j]; data->x.row(idx) = xi*(1.0-omega) + xi_new*omega; - data->gsdata.last_dx.row(idx) = data->x.row(idx)-xi.transpose(); // TODO // We can also apply constraints here, like @@ -97,51 +238,255 @@ void GaussSeidel::init_solve( BLI_assert(data->b.rows()==nx); BLI_assert(data->b.cols()==data->x.cols()); - if (data->gsdata.last_dx.rows() != nx) + // Do we need to color the default colorings? + if (data->gsdata.A_colors.size() == 0) + compute_colors(&data->A, 1, data->gsdata.A_colors); + + // Verify color groups are correct { - data->gsdata.last_dx.resize(nx,3); - data->gsdata.last_dx.setZero(); + std::cout << "TESTING " << data->tets.rows() << " tets" << std::endl; + std::cout << "num colors: " << data->gsdata.A_colors.size() << std::endl; + int nt = data->tets.rows(); + int nc = data->gsdata.A_colors.size(); + for (int i=0; i &grp = data->gsdata.A_colors[i]; + int n_grp = grp.size(); + for (int j=0; jtets.row(k); + if (!in_tet(p_idx,t)) + continue; + + for (int l=0; lgsdata.A_colors.size() == 0 ) - compute_colors(&data->A,nullptr,data->gsdata.A_colors); + // Create large A if we haven't already. + if (data->gsdata.A3.rows() != nx*3) + make_n3(data->A, data->gsdata.A3); - // TODO: Eventually we'll replace KtK with the full-dof matrix. + // TODO + // Eventually we'll replace KtK with the full-dof matrix. // For now use z and test collisions against ground plane. - bool has_constraints = data->K[2].nonZeros()>0; - data->gsdata.KtK = data->K[2].transpose()*data->K[2]; + bool has_constraints = data->C.nonZeros()>0; - if (false)//(has_constraints) + // Finally, the new global matrix and rhs + if (has_constraints) { - (void)(has_constraints); - // TODO color A + KtK + data->gsdata.CtC = data->spring_k * data->C.transpose()*data->C; + data->gsdata.Ctd.noalias() = data->spring_k * data->C.transpose()*data->d; + data->gsdata.A3_plus_CtC = data->gsdata.A3 + data->gsdata.CtC; + data->gsdata.b3_plus_Ctd.resize(nx*3); + for (int i=0; igsdata.b3_plus_Ctd[i*3+0] = data->b(i,0)+data->gsdata.Ctd[i*3+0]; + data->gsdata.b3_plus_Ctd[i*3+1] = data->b(i,1)+data->gsdata.Ctd[i*3+1]; + data->gsdata.b3_plus_Ctd[i*3+2] = data->b(i,2)+data->gsdata.Ctd[i*3+2]; + } + compute_colors(&data->gsdata.A3_plus_CtC, 3, data->gsdata.A3_plus_CtC_colors); + } + else + { + data->gsdata.CtC.setZero(); + data->gsdata.Ctd.setZero(); + data->gsdata.A3_plus_CtC = data->gsdata.A3; + data->gsdata.b3_plus_Ctd.resize(nx*3); + for (int i=0; igsdata.b3_plus_Ctd[i*3+0] = data->b(i,0); + data->gsdata.b3_plus_Ctd[i*3+1] = data->b(i,1); + data->gsdata.b3_plus_Ctd[i*3+2] = data->b(i,2); + } + data->gsdata.A3_plus_CtC_colors = data->gsdata.A_colors; } } // end init solve +typedef struct GraphColorThreadData { + const RowSparseMatrix *A; + int stride; + std::vector > *adjacency; + std::vector > *palette; + int init_palette_size; + std::vector *conflict; + std::vector *node_colors; +} GraphColorThreadData; + +// Rehash of graph coloring from +// https://github.com/mattoverby/mclscene/blob/master/include/MCL/GraphColor.hpp void GaussSeidel::compute_colors( const RowSparseMatrix *A, - const RowSparseMatrix *KtK, // if null, just A + int stride, std::vector > &colors) { BLI_assert(A != nullptr); - if (KtK != nullptr) + BLI_assert(stride>0); + + // Graph color settings + int init_palette_size = 6; + + // Graph coloring tmp data + int n_nodes = A->rows()/stride; + std::vector > adjacency(n_nodes, std::vector()); + std::vector > palette(n_nodes, std::set()); + std::vector conflict(n_nodes,1); + std::vector node_colors(n_nodes,-1); + + // + // Step 1) + // Graph color initialization + // + auto parallel_init_graph = []( + void *__restrict userdata, + const int i, + const TaskParallelTLS *__restrict UNUSED(tls)) -> void { - throw std::runtime_error("**GaussSeidel::compute_colors TODO: KtK"); - } + GraphColorThreadData *td = (GraphColorThreadData*)userdata; + typedef RowSparseMatrix::InnerIterator InnerIter; + // Use lower trianglular portion of the matrix. + // That is, store adjacency inds that are lower than + // the current index. + for (InnerIter it(*(td->A),i*td->stride); it; ++it) + { + if (std::abs(it.value())==0.0) + continue; + if (it.col()/td->stride >= it.row()/td->stride) + break; + int idx = it.col()/td->stride; + td->adjacency->at(i).emplace_back(idx); + } + for( int j=0; jinit_palette_size; ++j ) // init colors + td->palette->at(i).insert(j); + }; + GraphColorThreadData thread_data = { + .A = A, .stride = stride, + .adjacency = &adjacency, + .palette = &palette, + .init_palette_size = init_palette_size, + .conflict = &conflict, + .node_colors = &node_colors }; + TaskParallelSettings settings; + BLI_parallel_range_settings_defaults(&settings); + BLI_task_parallel_range(0, n_nodes, &thread_data, parallel_init_graph, &settings); - colors.clear(); - int nx = A->rows(); + std::random_device rd; + std::mt19937 mt(rd()); + std::uniform_int_distribution dist(0, n_nodes); - { // DEBUGGING - colors.resize(nx,std::vector()); - for (int i=0; i node_queue(n_nodes); + std::iota(node_queue.begin(), node_queue.end(), 0); + int max_iters = n_nodes; + for (int rand_iter=0; n_nodes>0 && rand_iter < max_iters; ++rand_iter) + { + // Generate a random color and find conflicts + for (int i=0; i= idx) + throw std::runtime_error("GaussSeidel Error: Oops, not lower diag"); + if (adj_color==curr_color) + conflict[adj_idx] = 1; + } + } + + // Resolve conflicts and trim queue + std::set new_queue; + for (int i=0; i= (int)colors.size() ) + colors.emplace_back(std::vector()); + colors[color].emplace_back(i); } + // Remove empty color groups + for (std::vector >::iterator it = colors.begin(); it != colors.end();) + it->size() == 0 ? it = colors.erase(it) : it++; + } // end compute colors -} // namespace admmpd +} // namespace admmpd \ No newline at end of file diff --git a/extern/softbody/src/admmpd_linsolve.h b/extern/softbody/src/admmpd_linsolve.h index b1dd9ae8f4e..3dc3764c6e9 100644 --- a/extern/softbody/src/admmpd_linsolve.h +++ b/extern/softbody/src/admmpd_linsolve.h @@ -8,25 +8,36 @@ namespace admmpd { +// Preconditioned Conjugate Gradients +class ConjugateGradients { +public: + void solve( + const Options *options, + SolverData *data); + +protected: + // Apply preconditioner + void solve_Ax_b( + SolverData *data, + Eigen::VectorXd *x, + Eigen::VectorXd *b); +}; + +// Multi-Colored Gauss-Seidel class GaussSeidel { public: - // Solves (A + KtK) x = (b + Ktl) - // x and b passed as separate variables - // for debugging/testing purposes. void solve( const Options *options, SolverData *data); protected: - // Allocates data, computes colors void init_solve( const Options *options, SolverData *data); - // Computes colors of A + KtK void compute_colors( const RowSparseMatrix *A, - const RowSparseMatrix *KtK, // if null, just A + int stride, std::vector > &colors); }; diff --git a/extern/softbody/src/admmpd_solver.cpp b/extern/softbody/src/admmpd_solver.cpp index 944ebf7826e..9379d4f297b 100644 --- a/extern/softbody/src/admmpd_solver.cpp +++ b/extern/softbody/src/admmpd_solver.cpp @@ -18,7 +18,6 @@ namespace admmpd { using namespace Eigen; -template using RowSparseMatrix = SparseMatrix; typedef struct ThreadData { const Options *options; @@ -79,8 +78,8 @@ int Solver::solve( // Perform collision detection and linearization update_constraints(options,data,collision); - // Solve Ax=b s.t. Kx=l - solve_conjugate_gradients(options,data); + // Solve Ax=b s.t. Cx=d + ConjugateGradients().solve(options,data); //GaussSeidel().solve(options,data); } // end solver iters @@ -170,168 +169,33 @@ void Solver::update_constraints( collision->detect(&data->x_start, &data->x); - std::vector l_coeffs; - std::vector > trips_x; - std::vector > trips_y; - std::vector > trips_z; + std::vector d_coeffs; + std::vector > trips; // TODO collision detection - collision->jacobian( + collision->linearize( &data->x, - &trips_x, - &trips_y, - &trips_z, - &l_coeffs); + &trips, + &d_coeffs); // Check number of constraints. // If no constraints, clear Jacobian. int nx = data->x.rows(); - int nc = l_coeffs.size(); + int nc = d_coeffs.size(); if (nc==0) { - data->l.setZero(); - for (int i=0; i<3; ++i) - data->K[i].setZero(); - + data->d.setZero(); + data->C.setZero(); return; } // Otherwise update the data. - data->l.resize(nc); - for (int i=0; il[i] = l_coeffs[i]; - data->K[0].resize(nc,nx); - data->K[0].setFromTriplets(trips_x.begin(),trips_x.end()); - data->K[1].resize(nc,nx); - data->K[1].setFromTriplets(trips_y.begin(),trips_y.end()); - data->K[2].resize(nc,nx); - data->K[2].setFromTriplets(trips_z.begin(),trips_z.end()); + data->d = Map(d_coeffs.data(), d_coeffs.size()); + data->C.resize(nc,nx*3); + data->C.setFromTriplets(trips.begin(),trips.end()); } // end update constraints -typedef struct LinSolveThreadData { - SolverData *data; - MatrixXd *ls_x; - MatrixXd *ls_b; -} LinSolveThreadData; - -static void parallel_lin_solve( - void *__restrict userdata, - const int i, - const TaskParallelTLS *__restrict UNUSED(tls)) -{ - LinSolveThreadData *td = (LinSolveThreadData*)userdata; - td->ls_x->col(i) = td->data->ldltA.solve(td->ls_b->col(i)); -} // end parallel lin solve - -void Solver::solve_conjugate_gradients( - const Options *options, - SolverData *data) -{ - BLI_assert(data != NULL); - BLI_assert(options != NULL); - int nx = data->x.rows(); - BLI_assert(nx > 0); - BLI_assert(data->A.rows() == nx); - BLI_assert(data->A.cols() == nx); - BLI_assert(data->b.rows() == nx); - BLI_assert(data->K[0].cols() == nx); - BLI_assert(data->K[1].cols() == nx); - BLI_assert(data->K[2].cols() == nx); - BLI_assert(data->l.rows() > 0); - BLI_assert(data->K[0].rows() == data->l.rows()); - BLI_assert(data->K[1].rows() == data->l.rows()); - BLI_assert(data->K[2].rows() == data->l.rows()); - - // Compute RHS - data->b.noalias() = data->M_xbar + data->DtW2*(data->z-data->u); - - // Solve Ax = b in parallel - auto solve_Ax_b = []( - SolverData *data_, - MatrixXd *x_, - MatrixXd *b_) - { - LinSolveThreadData thread_data = {.data=data_, .ls_x=x_, .ls_b=b_}; - TaskParallelSettings settings; - BLI_parallel_range_settings_defaults(&settings); - BLI_task_parallel_range(0, 3, &thread_data, parallel_lin_solve, &settings); - }; - - // If we don't have any constraints, - // we don't need to perform CG - if (std::max(std::max( - data->K[0].nonZeros(), - data->K[1].nonZeros()), - data->K[2].nonZeros())==0) - { - solve_Ax_b(data,&data->x,&data->b); - return; - } - - // Inner product of matrices interpreted - // if they were instead vectorized - auto mat_inner = []( - const MatrixXd &A, - const MatrixXd &B) - { - double dot = 0.0; - int nr = std::min(A.rows(), B.rows()); - for( int i=0; icgdata; - double eps = options->min_res; - cgdata->b = data->b; - if (cgdata->r.rows() != nx) - { - cgdata->r.resize(nx,3); - cgdata->z.resize(nx,3); - cgdata->p.resize(nx,3); - cgdata->Ap.resize(nx,3); - } - - for (int i=0; i<3; ++i) - { - RowSparseMatrix Kt = data->K[i].transpose(); - cgdata->A[i] = data->A + data->spring_k*RowSparseMatrix(Kt*data->K[i]); - cgdata->b.col(i).noalias() += data->spring_k*Kt*data->l; - cgdata->r.col(i).noalias() = cgdata->b.col(i) - cgdata->A[i]*data->x.col(i); - } - solve_Ax_b(data,&cgdata->z,&cgdata->r); - cgdata->p = cgdata->z; - - for (int iter=0; itermax_cg_iters; ++iter) - { - for (int i=0; i<3; ++i) - cgdata->Ap.col(i).noalias() = cgdata->A[i]*cgdata->p.col(i); - - double p_dot_Ap = mat_inner(cgdata->p,cgdata->Ap); - if (p_dot_Ap==0.0) - break; - - double zk_dot_rk = mat_inner(cgdata->z,cgdata->r); - if (zk_dot_rk==0.0) - break; - - double alpha = zk_dot_rk / p_dot_Ap; - data->x.noalias() += alpha * cgdata->p; - cgdata->r.noalias() -= alpha * cgdata->Ap; - if (cgdata->r.lpNorm() < eps) - break; - - solve_Ax_b(data,&cgdata->z,&cgdata->r); - double beta = mat_inner(cgdata->z,cgdata->r) / zk_dot_rk; - cgdata->p = cgdata->z + beta*cgdata->p; - } - -} // end solve conjugate gradients - bool Solver::compute_matrices( const Options *options, SolverData *data) @@ -392,9 +256,8 @@ bool Solver::compute_matrices( // Constraint data data->spring_k = options->mult_k*data->A.diagonal().maxCoeff(); - data->l = VectorXd::Zero(1); - for (int i=0; i<3; ++i) - data->K[i].resize(1,nx); + data->C.resize(1,nx*3); + data->d = VectorXd::Zero(1); // ADMM dual/lagrange data->z.resize(n_row_D,3); @@ -422,6 +285,10 @@ void Solver::append_energies( Lame lame; lame.set_from_youngs_poisson(options->youngs, options->poisson); + // The possibility of having an error in energy initialization + // while still wanting to continue the simulation is very low. + // We can parallelize this step if need be. + int energy_index = 0; for (int i=0; i D; // reduction matrix RowSparseMatrix DtW2; // D'W^2 RowSparseMatrix A; // M + D'W^2D - RowSparseMatrix K[3]; // constraint Jacobian - Eigen::VectorXd l; // constraint rhs (Kx=l) + RowSparseMatrix C; // linearized constraints + Eigen::VectorXd d; // constraints rhs double spring_k; // constraint stiffness Eigen::SimplicialLDLT > ldltA; - struct CGData { // Temporaries used in conjugate gradients - RowSparseMatrix A[3]; // (M + D'W^2D) + k * Kt K - Eigen::MatrixXd b; // M xbar + DtW2(z-u) + k Kt l - Eigen::MatrixXd r; // residual - Eigen::MatrixXd z; - Eigen::MatrixXd p; - Eigen::MatrixXd Ap; // A * p - } cgdata; - struct GSData { // Temporaries used in Gauss-Seidel - RowSparseMatrix KtK; // k * Kt K, different dim than A! - Eigen::MatrixXd last_dx; // last GS iter change in x - std::vector > A_colors; // colors of just A matrix - std::vector > A_KtK_colors; // colors of just A+KtK - } gsdata; + struct GlobalStepData { // Temporaries used in global step + RowSparseMatrix A3; // (M + D'W^2D) n3 x n3 + RowSparseMatrix CtC; // k * Ct C + RowSparseMatrix A3_plus_CtC; + Eigen::VectorXd Ctd; // k * Ct d + Eigen::VectorXd b3_plus_Ctd; // M xbar + DtW2(z-u) + k Kt l + // Used by Conjugate-Gradients: + Eigen::VectorXd r; // residual + Eigen::VectorXd z; // auxilary + Eigen::VectorXd p; // direction + Eigen::VectorXd A3p; // A3 * p + // Used by Gauss-Seidel: + std::vector > A_colors; // colors of (original) A matrix + std::vector > A3_plus_CtC_colors; // colors of A3+KtK + } gsdata; // Set in append_energies: std::vector indices; // per-energy index into D (row, num rows) std::vector rest_volumes; // per-energy rest volume -- cgit v1.2.3