Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorover0219 <over0219@umn.edu>2020-07-01 01:07:29 +0300
committerover0219 <over0219@umn.edu>2020-07-01 01:07:29 +0300
commit8af51750b24ea037c5ad64641d79b05a60448d11 (patch)
tree614b884417cb67f2532f703a02180002d2030eb1 /extern/softbody/src/admmpd_linsolve.cpp
parenta0310586dfb35cfde3674fb94ac42a9a3ebb6ea1 (diff)
working on mcgs
Diffstat (limited to 'extern/softbody/src/admmpd_linsolve.cpp')
-rw-r--r--extern/softbody/src/admmpd_linsolve.cpp393
1 files changed, 369 insertions, 24 deletions
diff --git a/extern/softbody/src/admmpd_linsolve.cpp b/extern/softbody/src/admmpd_linsolve.cpp
index 5b3a62dc7bb..0d40c49ef4d 100644
--- a/extern/softbody/src/admmpd_linsolve.cpp
+++ b/extern/softbody/src/admmpd_linsolve.cpp
@@ -4,12 +4,154 @@
#include "admmpd_linsolve.h"
#include <numeric>
#include <iostream>
+#include <set>
#include <unordered_set>
+#include <random>
#include "BLI_assert.h"
+#include "BLI_task.h"
namespace admmpd {
using namespace Eigen;
+// Makes full n3 x n3 matrix
+static inline void make_n3(
+ const RowSparseMatrix<double> &A,
+ RowSparseMatrix<double> &A3)
+{
+ int na = A.rows();
+ SparseMatrix<double> A_cm = A; // A to CM
+ SparseMatrix<double> A3_cm(na*3, na*3);
+ A3_cm.reserve(A_cm.nonZeros()*3);
+ int cols = A_cm.rows();
+ for(int c=0; c<cols; ++c)
+ {
+ for(int dim=0; dim<3; ++dim)
+ {
+ int col = c*3+dim;
+ A3_cm.startVec(col);
+ for(SparseMatrix<double>::InnerIterator itA(A_cm,c); itA; ++itA)
+ A3_cm.insertBack((itA.row()*3+dim), col) = itA.value();
+ }
+ }
+ A3_cm.finalize();
+ A3_cm.makeCompressed();
+ A3 = A3_cm; // to RowMajor
+} // end make_n3
+
+void ConjugateGradients::solve(
+ const Options *options,
+ SolverData *data)
+{
+ BLI_assert(data != NULL);
+ BLI_assert(options != NULL);
+ int nx = data->x.rows();
+ BLI_assert(nx > 0);
+ BLI_assert(data->A.rows() == nx);
+ BLI_assert(data->A.cols() == nx);
+ BLI_assert(data->b.rows() == nx);
+ BLI_assert(data->C.cols() == nx*3);
+ BLI_assert(data->d.rows() > 0);
+ BLI_assert(data->C.rows() == data->d.rows());
+
+ // If we don't have any constraints, we don't need to perform CG
+ data->b.noalias() = data->M_xbar + data->DtW2*(data->z-data->u);
+ if (data->C.nonZeros()==0)
+ {
+ data->x = data->ldltA.solve(data->b);
+ return;
+ }
+
+ // Resize data associated with Conjugate-Gradient
+ admmpd::SolverData::GlobalStepData *gsdata = &data->gsdata;
+ if (gsdata->r.rows() != nx*3)
+ {
+ gsdata->r.resize(nx*3);
+ gsdata->z.resize(nx*3);
+ gsdata->p.resize(nx*3);
+ gsdata->A3p.resize(nx*3);
+ gsdata->b3_plus_Ctd.resize(nx*3);
+ make_n3(data->A, gsdata->A3);
+ }
+
+ gsdata->Ctd = data->spring_k * data->C.transpose()*data->d;
+ gsdata->CtC = data->spring_k * data->C.transpose()*data->C;
+ gsdata->A3_plus_CtC = gsdata->A3 + gsdata->CtC;
+ VectorXd x3(nx*3);
+ for (int i=0; i<nx; ++i)
+ {
+ Vector3d bi = data->b.row(i);
+ gsdata->b3_plus_Ctd.segment<3>(i*3) = bi+gsdata->Ctd.segment<3>(i*3);
+ x3.segment<3>(i*3) = data->x.row(i);
+ }
+
+ gsdata->r.noalias() = gsdata->b3_plus_Ctd - gsdata->A3_plus_CtC*x3;
+ solve_Ax_b(data,&gsdata->z,&gsdata->r);
+ gsdata->p = gsdata->z;
+
+ for (int iter=0; iter<options->max_cg_iters; ++iter)
+ {
+ gsdata->A3p.noalias() = gsdata->A3_plus_CtC*gsdata->p;
+ double p_dot_Ap = gsdata->p.dot(gsdata->A3p);
+ if (p_dot_Ap==0.0)
+ break;
+
+ double zk_dot_rk = gsdata->z.dot(gsdata->r);
+ if (zk_dot_rk==0.0)
+ break;
+
+ double alpha = zk_dot_rk / p_dot_Ap;
+ x3.noalias() += alpha * gsdata->p;
+ gsdata->r.noalias() -= alpha * gsdata->A3p;
+ double r_norm = gsdata->r.lpNorm<Infinity>();
+ if (r_norm < options->min_res)
+ break;
+
+ solve_Ax_b(data,&gsdata->z,&gsdata->r);
+ double zk1_dot_rk1 = gsdata->z.dot(gsdata->r);
+ double beta = zk1_dot_rk1 / zk_dot_rk;
+ gsdata->p = gsdata->z + beta*gsdata->p;
+ }
+
+ for (int i=0; i<nx; ++i)
+ data->x.row(i) = x3.segment<3>(i*3);
+
+} // end ConjugateGradients solve
+
+// Solve Ax = b in parallel and apply
+// dimensionality mapping with temporaries
+void ConjugateGradients::solve_Ax_b(
+ SolverData *data_,
+ VectorXd *x_,
+ VectorXd *b_)
+{
+ typedef struct LinSolveThreadData {
+ SolverData *data;
+ VectorXd *ls_x;
+ VectorXd *ls_b;
+ } LinSolveThreadData;
+
+ auto parallel_lin_solve = [](
+ void *__restrict userdata,
+ const int i,
+ const TaskParallelTLS *__restrict UNUSED(tls))->void
+ {
+ LinSolveThreadData *td = (LinSolveThreadData*)userdata;
+ int nx = td->ls_x->rows()/3;
+ VectorXd b(nx);
+ for (int j=0; j<nx; ++j)
+ b[j] = td->ls_b->operator[](j*3+i);
+ VectorXd x = td->data->ldltA.solve(b);
+ for (int j=0; j<nx; ++j)
+ td->ls_x->operator[](j*3+i) = x[j];
+ };
+
+ LinSolveThreadData thread_data = {.data=data_, .ls_x=x_, .ls_b=b_};
+ TaskParallelSettings settings;
+ BLI_parallel_range_settings_defaults(&settings);
+ BLI_task_parallel_range(0, 3, &thread_data, parallel_lin_solve, &settings);
+
+} // end solve Ax=b
+
void GaussSeidel::solve(
const Options *options,
SolverData *data)
@@ -67,7 +209,6 @@ void GaussSeidel::solve(
for (int j=0; j<3; ++j)
xi_new[j] *= inv_aii[j];
data->x.row(idx) = xi*(1.0-omega) + xi_new*omega;
- data->gsdata.last_dx.row(idx) = data->x.row(idx)-xi.transpose();
// TODO
// We can also apply constraints here, like
@@ -97,51 +238,255 @@ void GaussSeidel::init_solve(
BLI_assert(data->b.rows()==nx);
BLI_assert(data->b.cols()==data->x.cols());
- if (data->gsdata.last_dx.rows() != nx)
+ // Do we need to color the default colorings?
+ if (data->gsdata.A_colors.size() == 0)
+ compute_colors(&data->A, 1, data->gsdata.A_colors);
+
+ // Verify color groups are correct
{
- data->gsdata.last_dx.resize(nx,3);
- data->gsdata.last_dx.setZero();
+ std::cout << "TESTING " << data->tets.rows() << " tets" << std::endl;
+ std::cout << "num colors: " << data->gsdata.A_colors.size() << std::endl;
+ int nt = data->tets.rows();
+ int nc = data->gsdata.A_colors.size();
+ for (int i=0; i<nc; ++i)
+ {
+ // Each vertex in the color should not
+ // be a part of a tet with a vertex in the same color
+ const std::vector<int> &grp = data->gsdata.A_colors[i];
+ int n_grp = grp.size();
+ for (int j=0; j<n_grp; ++j)
+ {
+ int p_idx = grp[j];
+ auto in_tet = [](int idx, const RowVector4i &t)
+ {
+ return (t[0]==idx||t[1]==idx||t[2]==idx||t[3]==idx);
+ };
+
+ for (int k=0; k<nt; ++k)
+ {
+ RowVector4i t = data->tets.row(k);
+ if (!in_tet(p_idx,t))
+ continue;
+
+ for (int l=0; l<n_grp; ++l)
+ {
+ int q_idx = grp[l];
+ if (p_idx==q_idx)
+ continue;
+
+ if (in_tet(q_idx,t))
+ {
+std::cerr << "p: " << p_idx << ", q: " << q_idx << ", tet (" << k << "): " << t << std::endl;
+// throw std::runtime_error("GaussSeidel Error: Color contains two verts form same tet!");
+ }
+ }
+ }
+ }
+ }
}
- // Do we need to color the default colorings?
- if( data->gsdata.A_colors.size() == 0 )
- compute_colors(&data->A,nullptr,data->gsdata.A_colors);
+ // Create large A if we haven't already.
+ if (data->gsdata.A3.rows() != nx*3)
+ make_n3(data->A, data->gsdata.A3);
- // TODO: Eventually we'll replace KtK with the full-dof matrix.
+ // TODO
+ // Eventually we'll replace KtK with the full-dof matrix.
// For now use z and test collisions against ground plane.
- bool has_constraints = data->K[2].nonZeros()>0;
- data->gsdata.KtK = data->K[2].transpose()*data->K[2];
+ bool has_constraints = data->C.nonZeros()>0;
- if (false)//(has_constraints)
+ // Finally, the new global matrix and rhs
+ if (has_constraints)
{
- (void)(has_constraints);
- // TODO color A + KtK
+ data->gsdata.CtC = data->spring_k * data->C.transpose()*data->C;
+ data->gsdata.Ctd.noalias() = data->spring_k * data->C.transpose()*data->d;
+ data->gsdata.A3_plus_CtC = data->gsdata.A3 + data->gsdata.CtC;
+ data->gsdata.b3_plus_Ctd.resize(nx*3);
+ for (int i=0; i<nx; ++i)
+ {
+ data->gsdata.b3_plus_Ctd[i*3+0] = data->b(i,0)+data->gsdata.Ctd[i*3+0];
+ data->gsdata.b3_plus_Ctd[i*3+1] = data->b(i,1)+data->gsdata.Ctd[i*3+1];
+ data->gsdata.b3_plus_Ctd[i*3+2] = data->b(i,2)+data->gsdata.Ctd[i*3+2];
+ }
+ compute_colors(&data->gsdata.A3_plus_CtC, 3, data->gsdata.A3_plus_CtC_colors);
+ }
+ else
+ {
+ data->gsdata.CtC.setZero();
+ data->gsdata.Ctd.setZero();
+ data->gsdata.A3_plus_CtC = data->gsdata.A3;
+ data->gsdata.b3_plus_Ctd.resize(nx*3);
+ for (int i=0; i<nx; ++i)
+ {
+ data->gsdata.b3_plus_Ctd[i*3+0] = data->b(i,0);
+ data->gsdata.b3_plus_Ctd[i*3+1] = data->b(i,1);
+ data->gsdata.b3_plus_Ctd[i*3+2] = data->b(i,2);
+ }
+ data->gsdata.A3_plus_CtC_colors = data->gsdata.A_colors;
}
} // end init solve
+typedef struct GraphColorThreadData {
+ const RowSparseMatrix<double> *A;
+ int stride;
+ std::vector<std::vector<int> > *adjacency;
+ std::vector<std::set<int> > *palette;
+ int init_palette_size;
+ std::vector<int> *conflict;
+ std::vector<int> *node_colors;
+} GraphColorThreadData;
+
+// Rehash of graph coloring from
+// https://github.com/mattoverby/mclscene/blob/master/include/MCL/GraphColor.hpp
void GaussSeidel::compute_colors(
const RowSparseMatrix<double> *A,
- const RowSparseMatrix<double> *KtK, // if null, just A
+ int stride,
std::vector<std::vector<int> > &colors)
{
BLI_assert(A != nullptr);
- if (KtK != nullptr)
+ BLI_assert(stride>0);
+
+ // Graph color settings
+ int init_palette_size = 6;
+
+ // Graph coloring tmp data
+ int n_nodes = A->rows()/stride;
+ std::vector<std::vector<int> > adjacency(n_nodes, std::vector<int>());
+ std::vector<std::set<int> > palette(n_nodes, std::set<int>());
+ std::vector<int> conflict(n_nodes,1);
+ std::vector<int> node_colors(n_nodes,-1);
+
+ //
+ // Step 1)
+ // Graph color initialization
+ //
+ auto parallel_init_graph = [](
+ void *__restrict userdata,
+ const int i,
+ const TaskParallelTLS *__restrict UNUSED(tls)) -> void
{
- throw std::runtime_error("**GaussSeidel::compute_colors TODO: KtK");
- }
+ GraphColorThreadData *td = (GraphColorThreadData*)userdata;
+ typedef RowSparseMatrix<double>::InnerIterator InnerIter;
+ // Use lower trianglular portion of the matrix.
+ // That is, store adjacency inds that are lower than
+ // the current index.
+ for (InnerIter it(*(td->A),i*td->stride); it; ++it)
+ {
+ if (std::abs(it.value())==0.0)
+ continue;
+ if (it.col()/td->stride >= it.row()/td->stride)
+ break;
+ int idx = it.col()/td->stride;
+ td->adjacency->at(i).emplace_back(idx);
+ }
+ for( int j=0; j<td->init_palette_size; ++j ) // init colors
+ td->palette->at(i).insert(j);
+ };
+ GraphColorThreadData thread_data = {
+ .A = A, .stride = stride,
+ .adjacency = &adjacency,
+ .palette = &palette,
+ .init_palette_size = init_palette_size,
+ .conflict = &conflict,
+ .node_colors = &node_colors };
+ TaskParallelSettings settings;
+ BLI_parallel_range_settings_defaults(&settings);
+ BLI_task_parallel_range(0, n_nodes, &thread_data, parallel_init_graph, &settings);
- colors.clear();
- int nx = A->rows();
+ std::random_device rd;
+ std::mt19937 mt(rd());
+ std::uniform_int_distribution<int> dist(0, n_nodes);
- { // DEBUGGING
- colors.resize(nx,std::vector<int>());
- for (int i=0; i<nx; ++i)
+ //
+ // Step 2)
+ // Stochastic Graph coloring
+ //
+ std::vector<int> node_queue(n_nodes);
+ std::iota(node_queue.begin(), node_queue.end(), 0);
+ int max_iters = n_nodes;
+ for (int rand_iter=0; n_nodes>0 && rand_iter < max_iters; ++rand_iter)
+ {
+ // Generate a random color and find conflicts
+ for (int i=0; i<n_nodes; ++i)
+ {
+ int idx = node_queue[i];
+ if( palette[idx].size() < 2 ){ // Feed if hungry
+ palette[idx].insert(init_palette_size+rand_iter);
+ }
+
+ int c_idx = dist(mt) % (int)palette[idx].size();
+ int curr_color = *std::next(palette[idx].begin(), c_idx);
+ node_colors[idx] = curr_color;
+ conflict[idx] = 0;
+
+ // Hungarian heuristic: node with largest index keeps color
+ // if both have the same color.
+ // Check lower-indexed adjacent nodes, and mark them
+ // in conflict if they generated the same color.
+ // We can do this if we process colors in an increasing order.
+ int n_adj = adjacency[idx].size();
+ for (int j=0; j<n_adj; ++j)
+ {
+ // Adjacency index buffer only stores lower-indexed nodes.
+ int adj_idx = adjacency[idx][j];
+ int adj_color = node_colors[adj_idx];
+ if (adj_idx >= idx)
+ throw std::runtime_error("GaussSeidel Error: Oops, not lower diag");
+ if (adj_color==curr_color)
+ conflict[adj_idx] = 1;
+ }
+ }
+
+ // Resolve conflicts and trim queue
+ std::set<int> new_queue;
+ for (int i=0; i<n_nodes; ++i)
{
- colors[i].emplace_back(i);
+ int idx = node_queue[i];
+ int curr_color = node_colors[idx];
+ int n_adj = adjacency[idx].size();
+
+ // If this node not in conflict, remove its
+ // color from adjacent palettes. If adjacent
+ // nodes not in conflict, remove their color
+ // from this palette.
+ for (int j=0; j<n_adj; ++j)
+ {
+ int adj_idx = adjacency[idx][j];
+ int adj_color = node_colors[adj_idx];
+ if (!conflict[idx])
+ palette[adj_idx].erase(curr_color);
+ if (!conflict[adj_idx])
+ palette[idx].erase(adj_color);
+ }
+
+ if (conflict[idx])
+ new_queue.emplace(idx);
}
+
+ n_nodes = new_queue.size();
+ node_queue.assign(new_queue.begin(), new_queue.end());
+
+ } // end color loop
+
+ //
+ // Step 3)
+ // Map per-vertex colors
+ //
+ colors.clear();
+ n_nodes = node_colors.size();
+ for( int i=0; i<n_nodes; ++i ){
+ int color = node_colors[i];
+ if (color<0)
+ throw std::runtime_error("GaussSeidel: Error with coloring");
+ while( color >= (int)colors.size() )
+ colors.emplace_back(std::vector<int>());
+ colors[color].emplace_back(i);
}
+ // Remove empty color groups
+ for (std::vector<std::vector<int> >::iterator it = colors.begin(); it != colors.end();)
+ it->size() == 0 ? it = colors.erase(it) : it++;
+
} // end compute colors
-} // namespace admmpd
+} // namespace admmpd \ No newline at end of file