From 8af51750b24ea037c5ad64641d79b05a60448d11 Mon Sep 17 00:00:00 2001
From: over0219 <over0219@umn.edu>
Date: Tue, 30 Jun 2020 17:07:29 -0500
Subject: working on mcgs

---
 extern/softbody/src/admmpd_collision.cpp    |  94 ++++---
 extern/softbody/src/admmpd_collision.h      |  23 +-
 extern/softbody/src/admmpd_embeddedmesh.cpp |  12 +-
 extern/softbody/src/admmpd_embeddedmesh.h   |   3 +-
 extern/softbody/src/admmpd_linsolve.cpp     | 393 ++++++++++++++++++++++++++--
 extern/softbody/src/admmpd_linsolve.h       |  23 +-
 extern/softbody/src/admmpd_solver.cpp       | 171 ++----------
 extern/softbody/src/admmpd_solver.h         |   6 -
 extern/softbody/src/admmpd_types.h          |  35 +--
 9 files changed, 496 insertions(+), 264 deletions(-)

(limited to 'extern/softbody/src')
diff --git a/extern/softbody/src/admmpd_collision.cpp b/extern/softbody/src/admmpd_collision.cpp
index bbda72f2428..b7e835a9185 100644
--- a/extern/softbody/src/admmpd_collision.cpp
+++ b/extern/softbody/src/admmpd_collision.cpp
@@ -20,8 +20,7 @@ VFCollisionPair::VFCollisionPair() :
     p_is_obs(0), // 0 or 1
     q_idx(-1), // face
     q_is_obs(0), // 0 or 1
-	pt_on_q(0,0,0),
-	q_n(0,0,0)
+	pt_on_q(0,0,0)
 	{}
 
 void Collision::set_obstacles(
@@ -89,7 +88,6 @@ void Collision::detect_discrete_vf(
 		pair.q_idx = -1;
 		pair.q_is_obs = 1;
 		pair.pt_on_q = Vector3d(pt[0],pt[1],floor_z);
-		pair.q_n = Vector3d(0,0,1);
 	}
 
 	// Any faces to detect against?
@@ -133,18 +131,17 @@ void Collision::detect_discrete_vf(
 	pair.pt_on_q = find_nearest_tri.output.pt_on_tri;
 
 	// Compute face normal
-	BLI_assert(pair.q_idx >= 0);
-	BLI_assert(pair.q_idx < mesh_tris->rows());
-	RowVector3i tri_inds = mesh_tris->row(pair.q_idx);
-	BLI_assert(tri_inds[0]>=0 && tri_inds[0]<mesh_x->rows());
-	BLI_assert(tri_inds[1]>=0 && tri_inds[1]<mesh_x->rows());
-	BLI_assert(tri_inds[2]>=0 && tri_inds[2]<mesh_x->rows());
-	Vector3d tri[3] = {
-		mesh_x->row(tri_inds[0]),
-		mesh_x->row(tri_inds[1]),
-		mesh_x->row(tri_inds[2])
-	};
-	pair.q_n = ((tri[1]-tri[0]).cross(tri[2]-tri[0])).normalized();
+//	BLI_assert(pair.q_idx >= 0);
+//	BLI_assert(pair.q_idx < mesh_tris->rows());
+//	RowVector3i tri_inds = mesh_tris->row(pair.q_idx);
+//	BLI_assert(tri_inds[0]>=0 && tri_inds[0]<mesh_x->rows());
+//	BLI_assert(tri_inds[1]>=0 && tri_inds[1]<mesh_x->rows());
+//	BLI_assert(tri_inds[2]>=0 && tri_inds[2]<mesh_x->rows());
+//	Vector3d tri[3] = {
+//		mesh_x->row(tri_inds[0]),
+//		mesh_x->row(tri_inds[1]),
+//		mesh_x->row(tri_inds[2])
+//	};
 
 //	std::stringstream ss;
 //	ss << "\nhit:" <<
@@ -247,12 +244,10 @@ int EmbeddedMeshCollision::detect(
 	return vf_pairs.size();
 } // end detect
 
-void EmbeddedMeshCollision::jacobian(
+void EmbeddedMeshCollision::linearize(
 	const Eigen::MatrixXd *x,
-	std::vector<Eigen::Triplet<double> > *trips_x,
-	std::vector<Eigen::Triplet<double> > *trips_y,
-	std::vector<Eigen::Triplet<double> > *trips_z,
-	std::vector<double> *l)
+	std::vector<Eigen::Triplet<double> > *trips,
+	std::vector<double> *d)
 {
 	BLI_assert(x != NULL);
 	BLI_assert(x->cols() == 3);
@@ -261,16 +256,45 @@ void EmbeddedMeshCollision::jacobian(
 	if (np==0)
 		return;
 
-	l->reserve((int)l->size() + np);
-	trips_x->reserve((int)trips_x->size() + np*4);
-	trips_y->reserve((int)trips_y->size() + np*4);
-	trips_z->reserve((int)trips_z->size() + np*4);
+	d->reserve((int)d->size() + np);
+	trips->reserve((int)trips->size() + np*3*4);
 
 	for (int i=0; i<np; ++i)
 	{
 		VFCollisionPair &pair = vf_pairs[i];
 		int emb_p_idx = pair.p_idx;
 
+
+    //p_idx(-1), // point
+    //p_is_obs(0), // 0 or 1
+    //q_idx(-1), // face
+    //q_is_obs(0), // 0 or 1
+//	pt_on_q(0,0,0)
+
+		// Compute normal of triangle at x
+		Vector3d q_n(0,0,0);
+		RowVector3i q_inds(-1,-1,-1);
+		std::vector<Vector3d> q_tris;
+		if (pair.q_is_obs)
+		{
+			// Special case, floor
+			if (pair.q_idx == -1)
+			{
+				q_n = Vector3d(0,0,1);
+			}
+			else
+			{
+				q_inds = obsdata.F.row(pair.q_idx);
+				q_tris = {
+					obsdata.V1.row(q_inds[0]),
+					obsdata.V1.row(q_inds[1]),
+					obsdata.V1.row(q_inds[2])
+				};
+				q_n = ((q_tris[1]-q_tris[0]).cross(q_tris[2]-q_tris[0]));
+				q_n.normalize();
+			}
+		}
+
 		// TODO: obstacle point intersecting mesh
 		if (pair.p_is_obs)
 		{
@@ -283,24 +307,14 @@ void EmbeddedMeshCollision::jacobian(
 			RowVector4d bary = mesh->barys.row(emb_p_idx);
 			int tet_idx = mesh->vtx_to_tet[emb_p_idx];
 			RowVector4i tet = mesh->tets.row(tet_idx);
-
-			// Okay this is pretty ugly. I'm going to have to think about
-			// whether or not this is reasonable.
-			for (int j=0; j<3; ++j)
+			int c_idx = d->size();
+			d->emplace_back(q_n.dot(pair.pt_on_q));
+			for (int j=0; j<4; ++j)
 			{
-				int c_idx = l->size();
-				for (int k=0; k<4; ++k)
-				{
-					if (j==0)
-						trips_x->emplace_back(c_idx, tet[k], bary[k]*pair.q_n[0]);
-					if (j==1)
-						trips_y->emplace_back(c_idx, tet[k], bary[k]*pair.q_n[1]);
-					if (j==2)
-						trips_z->emplace_back(c_idx, tet[k], bary[k]*pair.q_n[2]);
-				}
-				l->emplace_back(pair.pt_on_q[j]*pair.q_n[j]);
+				trips->emplace_back(c_idx, tet[j]*3+0, bary[j]*q_n[0]);
+				trips->emplace_back(c_idx, tet[j]*3+1, bary[j]*q_n[1]);
+				trips->emplace_back(c_idx, tet[j]*3+2, bary[j]*q_n[2]);
 			}
-
 			continue;
 		}
 
diff --git a/extern/softbody/src/admmpd_collision.h b/extern/softbody/src/admmpd_collision.h
index f9c6363b89f..2ab7d343c05 100644
--- a/extern/softbody/src/admmpd_collision.h
+++ b/extern/softbody/src/admmpd_collision.h
@@ -52,15 +52,11 @@ public:
     // Special case for floor since it's common.
     virtual void set_floor(double z) = 0;
 
-    // Linearize the constraints and return Jacobian tensor.
-    // Constraints are linearized about x for constraint
-    // K x = l
-    virtual void jacobian(
+    // Linearize the constraints and return Jacobian.
+    virtual void linearize(
         const Eigen::MatrixXd *x,
-    	std::vector<Eigen::Triplet<double> > *trips_x,
-        std::vector<Eigen::Triplet<double> > *trips_y,
-    	std::vector<Eigen::Triplet<double> > *trips_z,
-		std::vector<double> *l) = 0;
+    	std::vector<Eigen::Triplet<double> > *trips,
+		std::vector<double> *d) = 0;
 
     // Given a point and a surface mesh,
     // perform discrete collision and create
@@ -83,7 +79,8 @@ class EmbeddedMeshCollision : public Collision {
 public:
     EmbeddedMeshCollision(const EmbeddedMeshData *mesh_) :
         mesh(mesh_),
-        floor_z(-std::numeric_limits<double>::max())
+//        floor_z(-std::numeric_limits<double>::max())
+        floor_z(0)
         {}
 
     // A floor is so common that it makes sense to hard
@@ -100,12 +97,10 @@ public:
 
     // Linearizes the collision pairs about x
     // for the constraint Kx=l
-    void jacobian(
+    void linearize(
         const Eigen::MatrixXd *x,
-    	std::vector<Eigen::Triplet<double> > *trips_x,
-        std::vector<Eigen::Triplet<double> > *trips_y,
-    	std::vector<Eigen::Triplet<double> > *trips_z,
-		std::vector<double> *l);
+    	std::vector<Eigen::Triplet<double> > *trips,
+		std::vector<double> *d);
 
 protected:
     // A ptr to the embedded mesh data
diff --git a/extern/softbody/src/admmpd_embeddedmesh.cpp b/extern/softbody/src/admmpd_embeddedmesh.cpp
index 68cd38b27d5..22ac59fef0c 100644
--- a/extern/softbody/src/admmpd_embeddedmesh.cpp
+++ b/extern/softbody/src/admmpd_embeddedmesh.cpp
@@ -67,7 +67,8 @@ bool EmbeddedMesh::generate(
 	const Eigen::MatrixXd &V, // embedded verts
 	const Eigen::MatrixXi &F, // embedded faces
 	EmbeddedMeshData *emb_mesh, // where embedding is stored
-	Eigen::MatrixXd *x_tets) // lattice vertices, n x 3
+	Eigen::MatrixXd *x_tets, // lattice vertices, n x 3
+	bool trim_lattice)
 {
 	// How big the grid cells are as a fraction
 	// of the total mesh.
@@ -191,9 +192,12 @@ bool EmbeddedMesh::generate(
 			.tets = &tets,
 			.keep_tet = &keep_tet
 		};
-		TaskParallelSettings settings;
-		BLI_parallel_range_settings_defaults(&settings);
-		BLI_task_parallel_range(0, nt0, &thread_data, parallel_keep_tet, &settings);
+		if (trim_lattice)
+		{
+			TaskParallelSettings settings;
+			BLI_parallel_range_settings_defaults(&settings);
+			BLI_task_parallel_range(0, nt0, &thread_data, parallel_keep_tet, &settings);
+		}
 
 		// Loop over tets and remove as needed.
 		// Mark referenced vertices to compute a mapping.
diff --git a/extern/softbody/src/admmpd_embeddedmesh.h b/extern/softbody/src/admmpd_embeddedmesh.h
index 85c64c7b0a5..eb7583b93b4 100644
--- a/extern/softbody/src/admmpd_embeddedmesh.h
+++ b/extern/softbody/src/admmpd_embeddedmesh.h
@@ -15,7 +15,8 @@ public:
         const Eigen::MatrixXd &V, // embedded verts
         const Eigen::MatrixXi &F, // embedded faces
         EmbeddedMeshData *emb_mesh, // where embedding is stored
-        Eigen::MatrixXd *x_tets); // lattice vertices, n x 3
+        Eigen::MatrixXd *x_tets, // lattice vertices, n x 3
+        bool trim_lattice = true); // remove elements outside embedded volume
 
     // Returns the vtx mapped from x/v and tets
     Eigen::Vector3d get_mapped_vertex(
diff --git a/extern/softbody/src/admmpd_linsolve.cpp b/extern/softbody/src/admmpd_linsolve.cpp
index 5b3a62dc7bb..0d40c49ef4d 100644
--- a/extern/softbody/src/admmpd_linsolve.cpp
+++ b/extern/softbody/src/admmpd_linsolve.cpp
@@ -4,12 +4,154 @@
 #include "admmpd_linsolve.h"
 #include <numeric>
 #include <iostream>
+#include <set>
 #include <unordered_set>
+#include <random>
 #include "BLI_assert.h"
+#include "BLI_task.h"
 
 namespace admmpd {
 using namespace Eigen;
 
+// Makes full n3 x n3 matrix
+static inline void make_n3(
+		const RowSparseMatrix<double> &A,
+		RowSparseMatrix<double> &A3)
+{
+	int na = A.rows();
+	SparseMatrix<double> A_cm = A; // A to CM
+	SparseMatrix<double> A3_cm(na*3, na*3);
+	A3_cm.reserve(A_cm.nonZeros()*3);
+	int cols = A_cm.rows();
+	for(int c=0; c<cols; ++c)
+	{
+		for(int dim=0; dim<3; ++dim)
+		{
+			int col = c*3+dim;
+			A3_cm.startVec(col);
+			for(SparseMatrix<double>::InnerIterator itA(A_cm,c); itA; ++itA)
+				A3_cm.insertBack((itA.row()*3+dim), col) = itA.value();
+		}
+	}
+	A3_cm.finalize();
+	A3_cm.makeCompressed();
+	A3 = A3_cm; // to RowMajor
+} // end make_n3
+
+void ConjugateGradients::solve(
+		const Options *options,
+		SolverData *data)
+{
+	BLI_assert(data != NULL);
+	BLI_assert(options != NULL);
+	int nx = data->x.rows();
+	BLI_assert(nx > 0);
+	BLI_assert(data->A.rows() == nx);
+	BLI_assert(data->A.cols() == nx);
+	BLI_assert(data->b.rows() == nx);
+	BLI_assert(data->C.cols() == nx*3);
+	BLI_assert(data->d.rows() > 0);
+	BLI_assert(data->C.rows() == data->d.rows());
+
+	// If we don't have any constraints, we don't need to perform CG
+	data->b.noalias() = data->M_xbar + data->DtW2*(data->z-data->u);
+	if (data->C.nonZeros()==0)
+	{
+		data->x = data->ldltA.solve(data->b);
+		return;
+	}
+
+	// Resize data associated with Conjugate-Gradient
+	admmpd::SolverData::GlobalStepData *gsdata = &data->gsdata;
+	if (gsdata->r.rows() != nx*3)
+	{
+		gsdata->r.resize(nx*3);
+		gsdata->z.resize(nx*3);
+		gsdata->p.resize(nx*3);
+		gsdata->A3p.resize(nx*3);
+		gsdata->b3_plus_Ctd.resize(nx*3);
+		make_n3(data->A, gsdata->A3);
+	}
+
+	gsdata->Ctd = data->spring_k * data->C.transpose()*data->d;
+	gsdata->CtC = data->spring_k * data->C.transpose()*data->C;
+	gsdata->A3_plus_CtC = gsdata->A3 + gsdata->CtC;
+	VectorXd x3(nx*3);
+	for (int i=0; i<nx; ++i)
+	{
+		Vector3d bi = data->b.row(i);
+		gsdata->b3_plus_Ctd.segment<3>(i*3) = bi+gsdata->Ctd.segment<3>(i*3);
+		x3.segment<3>(i*3) = data->x.row(i);
+	}
+
+	gsdata->r.noalias() = gsdata->b3_plus_Ctd - gsdata->A3_plus_CtC*x3;
+	solve_Ax_b(data,&gsdata->z,&gsdata->r);
+	gsdata->p = gsdata->z;
+
+	for (int iter=0; iter<options->max_cg_iters; ++iter)
+	{
+		gsdata->A3p.noalias() = gsdata->A3_plus_CtC*gsdata->p;
+		double p_dot_Ap = gsdata->p.dot(gsdata->A3p);
+		if (p_dot_Ap==0.0)
+			break;
+
+		double zk_dot_rk = gsdata->z.dot(gsdata->r);
+		if (zk_dot_rk==0.0)
+			break;
+
+		double alpha = zk_dot_rk / p_dot_Ap;
+		x3.noalias() += alpha * gsdata->p;
+		gsdata->r.noalias() -= alpha * gsdata->A3p;
+		double r_norm = gsdata->r.lpNorm<Infinity>();
+		if (r_norm < options->min_res)
+			break;
+
+		solve_Ax_b(data,&gsdata->z,&gsdata->r);
+		double zk1_dot_rk1 = gsdata->z.dot(gsdata->r);
+		double beta = zk1_dot_rk1 / zk_dot_rk;
+		gsdata->p = gsdata->z + beta*gsdata->p;
+	}
+
+	for (int i=0; i<nx; ++i)
+		data->x.row(i) = x3.segment<3>(i*3);
+
+} // end ConjugateGradients solve
+
+// Solve Ax = b in parallel and apply
+// dimensionality mapping with temporaries
+void ConjugateGradients::solve_Ax_b(
+	SolverData *data_,
+	VectorXd *x_,
+	VectorXd *b_)
+{
+	typedef struct LinSolveThreadData {
+		SolverData *data;
+		VectorXd *ls_x;
+		VectorXd *ls_b;
+	} LinSolveThreadData;
+
+	auto parallel_lin_solve = [](
+		void *__restrict userdata,
+		const int i,
+		const TaskParallelTLS *__restrict UNUSED(tls))->void
+	{
+		LinSolveThreadData *td = (LinSolveThreadData*)userdata;
+		int nx = td->ls_x->rows()/3;
+		VectorXd b(nx);
+		for (int j=0; j<nx; ++j)
+			b[j] = td->ls_b->operator[](j*3+i);
+		VectorXd x = td->data->ldltA.solve(b);
+		for (int j=0; j<nx; ++j)
+			td->ls_x->operator[](j*3+i) = x[j];
+	};
+
+	LinSolveThreadData thread_data = {.data=data_, .ls_x=x_, .ls_b=b_};
+	TaskParallelSettings settings;
+	BLI_parallel_range_settings_defaults(&settings);
+	BLI_task_parallel_range(0, 3, &thread_data, parallel_lin_solve, &settings);
+
+} // end solve Ax=b
+
 void GaussSeidel::solve(
 		const Options *options,
 		SolverData *data)
@@ -67,7 +209,6 @@ void GaussSeidel::solve(
 				for (int j=0; j<3; ++j)
 					xi_new[j] *= inv_aii[j];
 				data->x.row(idx) = xi*(1.0-omega) + xi_new*omega;
-				data->gsdata.last_dx.row(idx) = data->x.row(idx)-xi.transpose();
 
 				// TODO
 				// We can also apply constraints here, like
@@ -97,51 +238,255 @@ void GaussSeidel::init_solve(
 	BLI_assert(data->b.rows()==nx);
 	BLI_assert(data->b.cols()==data->x.cols());
 
-	if (data->gsdata.last_dx.rows() != nx)
+	// Do we need to color the default colorings?
+	if (data->gsdata.A_colors.size() == 0)
+		compute_colors(&data->A, 1, data->gsdata.A_colors);
+
+	// Verify color groups are correct
 	{
-		data->gsdata.last_dx.resize(nx,3);
-		data->gsdata.last_dx.setZero();
+		std::cout << "TESTING " << data->tets.rows() << " tets" << std::endl;
+		std::cout << "num colors: " << data->gsdata.A_colors.size() << std::endl;
+		int nt = data->tets.rows();
+		int nc = data->gsdata.A_colors.size();
+		for (int i=0; i<nc; ++i)
+		{
+			// Each vertex in the color should not
+			// be a part of a tet with a vertex in the same color
+			const std::vector<int> &grp = data->gsdata.A_colors[i];
+			int n_grp = grp.size();
+			for (int j=0; j<n_grp; ++j)
+			{
+				int p_idx = grp[j];
+				auto in_tet = [](int idx, const RowVector4i &t)
+				{
+					return (t[0]==idx||t[1]==idx||t[2]==idx||t[3]==idx);
+				};
+
+				for (int k=0; k<nt; ++k)
+				{
+					RowVector4i t = data->tets.row(k);
+					if (!in_tet(p_idx,t))
+						continue;
+
+					for (int l=0; l<n_grp; ++l)
+					{
+						int q_idx = grp[l];
+						if (p_idx==q_idx)
+							continue;
+
+						if (in_tet(q_idx,t))
+						{
+std::cerr << "p: " << p_idx << ", q: " << q_idx << ", tet (" << k << "): " << t << std::endl;
+//							throw std::runtime_error("GaussSeidel Error: Color contains two verts form same tet!");
+						}
+					}
+				}
+			}
+		}
 	}
 
-	// Do we need to color the default colorings?
-	if( data->gsdata.A_colors.size() == 0 )
-		compute_colors(&data->A,nullptr,data->gsdata.A_colors);
+	// Create large A if we haven't already.
+	if (data->gsdata.A3.rows() != nx*3)
+		make_n3(data->A, data->gsdata.A3);
 
-	// TODO: Eventually we'll replace KtK with the full-dof matrix.
+	// TODO
+	// Eventually we'll replace KtK with the full-dof matrix.
 	// For now use z and test collisions against ground plane.
-	bool has_constraints = data->K[2].nonZeros()>0;
-	data->gsdata.KtK = data->K[2].transpose()*data->K[2];
+	bool has_constraints = data->C.nonZeros()>0;
 
-	if (false)//(has_constraints)
+	// Finally, the new global matrix and rhs
+	if (has_constraints)
 	{
-		(void)(has_constraints);
-		// TODO color A + KtK
+		data->gsdata.CtC = data->spring_k * data->C.transpose()*data->C;
+		data->gsdata.Ctd.noalias() = data->spring_k * data->C.transpose()*data->d;
+		data->gsdata.A3_plus_CtC = data->gsdata.A3 + data->gsdata.CtC;
+		data->gsdata.b3_plus_Ctd.resize(nx*3);
+		for (int i=0; i<nx; ++i)
+		{
+			data->gsdata.b3_plus_Ctd[i*3+0] = data->b(i,0)+data->gsdata.Ctd[i*3+0];
+			data->gsdata.b3_plus_Ctd[i*3+1] = data->b(i,1)+data->gsdata.Ctd[i*3+1];
+			data->gsdata.b3_plus_Ctd[i*3+2] = data->b(i,2)+data->gsdata.Ctd[i*3+2];
+		}
+		compute_colors(&data->gsdata.A3_plus_CtC, 3, data->gsdata.A3_plus_CtC_colors);
+	}
+	else
+	{
+		data->gsdata.CtC.setZero();
+		data->gsdata.Ctd.setZero();
+		data->gsdata.A3_plus_CtC = data->gsdata.A3;
+		data->gsdata.b3_plus_Ctd.resize(nx*3);
+		for (int i=0; i<nx; ++i)
+		{
+			data->gsdata.b3_plus_Ctd[i*3+0] = data->b(i,0);
+			data->gsdata.b3_plus_Ctd[i*3+1] = data->b(i,1);
+			data->gsdata.b3_plus_Ctd[i*3+2] = data->b(i,2);
+		}
+		data->gsdata.A3_plus_CtC_colors = data->gsdata.A_colors;
 	}
 
 } // end init solve
 
+typedef struct GraphColorThreadData {
+	const RowSparseMatrix<double> *A;
+	int stride;
+	std::vector<std::vector<int> > *adjacency;
+	std::vector<std::set<int> > *palette;
+	int init_palette_size;
+	std::vector<int> *conflict;
+	std::vector<int> *node_colors;
+} GraphColorThreadData;
+
+// Rehash of graph coloring from
+// https://github.com/mattoverby/mclscene/blob/master/include/MCL/GraphColor.hpp
 void GaussSeidel::compute_colors(
 		const RowSparseMatrix<double> *A,
-		const RowSparseMatrix<double> *KtK, // if null, just A
+		int stride,
 		std::vector<std::vector<int> > &colors)
 {
 	BLI_assert(A != nullptr);
-	if (KtK != nullptr)
+	BLI_assert(stride>0);
+
+	// Graph color settings
+	int init_palette_size = 6;
+
+	// Graph coloring tmp data
+	int n_nodes = A->rows()/stride;
+	std::vector<std::vector<int> > adjacency(n_nodes, std::vector<int>());
+	std::vector<std::set<int> > palette(n_nodes, std::set<int>());
+	std::vector<int> conflict(n_nodes,1);
+	std::vector<int> node_colors(n_nodes,-1);
+
+	//
+	// Step 1)
+	// Graph color initialization
+	//
+	auto parallel_init_graph = [](
+		void *__restrict userdata,
+		const int i,
+		const TaskParallelTLS *__restrict UNUSED(tls)) -> void
 	{
-		throw std::runtime_error("**GaussSeidel::compute_colors TODO: KtK");
-	}
+		GraphColorThreadData *td = (GraphColorThreadData*)userdata;
+		typedef RowSparseMatrix<double>::InnerIterator InnerIter;
+		// Use lower trianglular portion of the matrix.
+		// That is, store adjacency inds that are lower than
+		// the current index.
+		for (InnerIter it(*(td->A),i*td->stride); it; ++it)
+		{
+			if (std::abs(it.value())==0.0)
+				continue;
+			if (it.col()/td->stride >= it.row()/td->stride)
+				break;
+			int idx = it.col()/td->stride;
+			td->adjacency->at(i).emplace_back(idx);
+		}
+		for( int j=0; j<td->init_palette_size; ++j ) // init colors
+			td->palette->at(i).insert(j); 
+	};
+	GraphColorThreadData thread_data = {
+		.A = A, .stride = stride,
+		.adjacency = &adjacency,
+		.palette = &palette,
+		.init_palette_size = init_palette_size,
+		.conflict = &conflict,
+		.node_colors = &node_colors };
+	TaskParallelSettings settings;
+	BLI_parallel_range_settings_defaults(&settings);
+	BLI_task_parallel_range(0, n_nodes, &thread_data, parallel_init_graph, &settings);
 
-	colors.clear();
-	int nx = A->rows();
+    std::random_device rd;
+    std::mt19937 mt(rd());
+	std::uniform_int_distribution<int> dist(0, n_nodes);
 
-	{ // DEBUGGING
-		colors.resize(nx,std::vector<int>());
-		for (int i=0; i<nx; ++i)
+	//
+	// Step 2)
+	// Stochastic Graph coloring
+	//
+	std::vector<int> node_queue(n_nodes);
+	std::iota(node_queue.begin(), node_queue.end(), 0);
+	int max_iters = n_nodes;
+	for (int rand_iter=0; n_nodes>0 && rand_iter < max_iters; ++rand_iter)
+	{
+		// Generate a random color and find conflicts
+		for (int i=0; i<n_nodes; ++i)
+		{
+			int idx = node_queue[i];
+			if( palette[idx].size() < 2 ){ // Feed if hungry
+				palette[idx].insert(init_palette_size+rand_iter);
+			}
+
+			int c_idx = dist(mt) % (int)palette[idx].size();
+			int curr_color = *std::next(palette[idx].begin(), c_idx);
+			node_colors[idx] = curr_color;
+			conflict[idx] = 0;
+
+			// Hungarian heuristic: node with largest index keeps color
+			// if both have the same color.
+			// Check lower-indexed adjacent nodes, and mark them
+			// in conflict if they generated the same color.
+			// We can do this if we process colors in an increasing order.
+			int n_adj = adjacency[idx].size();
+			for (int j=0; j<n_adj; ++j)
+			{
+				// Adjacency index buffer only stores lower-indexed nodes.
+				int adj_idx = adjacency[idx][j];
+				int adj_color = node_colors[adj_idx];
+				if (adj_idx >= idx)
+					throw std::runtime_error("GaussSeidel Error: Oops, not lower diag");
+				if (adj_color==curr_color)
+					conflict[adj_idx] = 1;					
+			}
+		}
+
+		// Resolve conflicts and trim queue
+		std::set<int> new_queue;
+		for (int i=0; i<n_nodes; ++i)
 		{
-			colors[i].emplace_back(i);
+			int idx = node_queue[i];
+			int curr_color = node_colors[idx];
+			int n_adj = adjacency[idx].size();
+
+			// If this node not in conflict, remove its
+			// color from adjacent palettes. If adjacent
+			// nodes not in conflict, remove their color
+			// from this palette.
+			for (int j=0; j<n_adj; ++j)
+			{
+				int adj_idx = adjacency[idx][j];
+				int adj_color = node_colors[adj_idx];
+				if (!conflict[idx])
+					palette[adj_idx].erase(curr_color);
+				if (!conflict[adj_idx])
+					palette[idx].erase(adj_color);
+			}
+
+			if (conflict[idx])
+				new_queue.emplace(idx);
 		}
+
+		n_nodes = new_queue.size();
+		node_queue.assign(new_queue.begin(), new_queue.end());
+
+	} // end color loop
+
+	//
+	// Step 3)
+	// Map per-vertex colors
+	//
+	colors.clear();
+	n_nodes = node_colors.size();
+	for( int i=0; i<n_nodes; ++i ){
+		int color = node_colors[i];
+		if (color<0)
+			throw std::runtime_error("GaussSeidel: Error with coloring");
+		while( color >= (int)colors.size() )
+			colors.emplace_back(std::vector<int>());
+		colors[color].emplace_back(i);
 	}
 
+	// Remove empty color groups
+	for (std::vector<std::vector<int> >::iterator it = colors.begin(); it != colors.end();)
+		it->size() == 0 ? it = colors.erase(it) : it++;
+
 } // end compute colors
 
-} // namespace admmpd
+} // namespace admmpd
\ No newline at end of file
diff --git a/extern/softbody/src/admmpd_linsolve.h b/extern/softbody/src/admmpd_linsolve.h
index b1dd9ae8f4e..3dc3764c6e9 100644
--- a/extern/softbody/src/admmpd_linsolve.h
+++ b/extern/softbody/src/admmpd_linsolve.h
@@ -8,25 +8,36 @@
 
 namespace admmpd {
 
+// Preconditioned Conjugate Gradients
+class ConjugateGradients {
+public:
+	void solve(
+		const Options *options,
+		SolverData *data);
+
+protected:
+	// Apply preconditioner
+	void solve_Ax_b(
+		SolverData *data,
+		Eigen::VectorXd *x,
+		Eigen::VectorXd *b);
+};
+
+// Multi-Colored Gauss-Seidel
 class GaussSeidel {
 public:
-	// Solves (A + KtK) x = (b + Ktl)
-	// x and b passed as separate variables
-	// for debugging/testing purposes.
 	void solve(
 		const Options *options,
 		SolverData *data);
 
 protected:
-	// Allocates data, computes colors
 	void init_solve(
 		const Options *options,
 		SolverData *data);
 
-	// Computes colors of A + KtK
 	void compute_colors(
 		const RowSparseMatrix<double> *A,
-		const RowSparseMatrix<double> *KtK, // if null, just A
+		int stride,
 		std::vector<std::vector<int> > &colors);
 
 };
diff --git a/extern/softbody/src/admmpd_solver.cpp b/extern/softbody/src/admmpd_solver.cpp
index 944ebf7826e..9379d4f297b 100644
--- a/extern/softbody/src/admmpd_solver.cpp
+++ b/extern/softbody/src/admmpd_solver.cpp
@@ -18,7 +18,6 @@
 
 namespace admmpd {
 using namespace Eigen;
-template <typename T> using RowSparseMatrix = SparseMatrix<T,RowMajor>;
 
 typedef struct ThreadData {
 	const Options *options;
@@ -79,8 +78,8 @@ int Solver::solve(
 		// Perform collision detection and linearization
 		update_constraints(options,data,collision);
 
-		// Solve Ax=b s.t. Kx=l
-		solve_conjugate_gradients(options,data);
+		// Solve Ax=b s.t. Cx=d
+		ConjugateGradients().solve(options,data);
 		//GaussSeidel().solve(options,data);
 
 	} // end solver iters
@@ -170,168 +169,33 @@ void Solver::update_constraints(
 
 	collision->detect(&data->x_start, &data->x);
 
-	std::vector<double> l_coeffs;
-	std::vector<Eigen::Triplet<double> > trips_x;
-    std::vector<Eigen::Triplet<double> > trips_y;
-    std::vector<Eigen::Triplet<double> > trips_z;
+	std::vector<double> d_coeffs;
+	std::vector<Eigen::Triplet<double> > trips;
 
 	// TODO collision detection
-	collision->jacobian(
+	collision->linearize(
 		&data->x,
-		&trips_x,
-		&trips_y,
-		&trips_z,
-		&l_coeffs);
+		&trips,
+		&d_coeffs);
 
 	// Check number of constraints.
 	// If no constraints, clear Jacobian.
 	int nx = data->x.rows();
-	int nc = l_coeffs.size();
+	int nc = d_coeffs.size();
 	if (nc==0)
 	{
-		data->l.setZero();
-		for (int i=0; i<3; ++i)
-			data->K[i].setZero();
-
+		data->d.setZero();
+		data->C.setZero();
 		return;
 	}
 
 	// Otherwise update the data.
-	data->l.resize(nc);
-	for (int i=0; i<nc; ++i)
-		data->l[i] = l_coeffs[i];
-	data->K[0].resize(nc,nx);
-	data->K[0].setFromTriplets(trips_x.begin(),trips_x.end());
-	data->K[1].resize(nc,nx);
-	data->K[1].setFromTriplets(trips_y.begin(),trips_y.end());
-	data->K[2].resize(nc,nx);
-	data->K[2].setFromTriplets(trips_z.begin(),trips_z.end());
+	data->d = Map<VectorXd>(d_coeffs.data(), d_coeffs.size());
+	data->C.resize(nc,nx*3);
+	data->C.setFromTriplets(trips.begin(),trips.end());
 
 } // end update constraints
 
-typedef struct LinSolveThreadData {
-	SolverData *data;
-	MatrixXd *ls_x;
-	MatrixXd *ls_b;
-} LinSolveThreadData;
-
-static void parallel_lin_solve(
-	void *__restrict userdata,
-	const int i,
-	const TaskParallelTLS *__restrict UNUSED(tls))
-{
-	LinSolveThreadData *td = (LinSolveThreadData*)userdata;
-	td->ls_x->col(i) = td->data->ldltA.solve(td->ls_b->col(i));
-} // end parallel lin solve
-
-void Solver::solve_conjugate_gradients(
-	const Options *options,
-	SolverData *data)
-{
-	BLI_assert(data != NULL);
-	BLI_assert(options != NULL);
-	int nx = data->x.rows();
-	BLI_assert(nx > 0);
-	BLI_assert(data->A.rows() == nx);
-	BLI_assert(data->A.cols() == nx);
-	BLI_assert(data->b.rows() == nx);
-	BLI_assert(data->K[0].cols() == nx);
-	BLI_assert(data->K[1].cols() == nx);
-	BLI_assert(data->K[2].cols() == nx);
-	BLI_assert(data->l.rows() > 0);
-	BLI_assert(data->K[0].rows() == data->l.rows());
-	BLI_assert(data->K[1].rows() == data->l.rows());
-	BLI_assert(data->K[2].rows() == data->l.rows());
-
-	// Compute RHS
-	data->b.noalias() = data->M_xbar + data->DtW2*(data->z-data->u);
-
-	// Solve Ax = b in parallel
-	auto solve_Ax_b = [](
-		SolverData *data_,
-		MatrixXd *x_,
-		MatrixXd *b_)
-	{
-		LinSolveThreadData thread_data = {.data=data_, .ls_x=x_, .ls_b=b_};
-		TaskParallelSettings settings;
-		BLI_parallel_range_settings_defaults(&settings);
-		BLI_task_parallel_range(0, 3, &thread_data, parallel_lin_solve, &settings);
-	};
-
-	// If we don't have any constraints,
-	// we don't need to perform CG
-	if (std::max(std::max(
-		data->K[0].nonZeros(),
-		data->K[1].nonZeros()),
-		data->K[2].nonZeros())==0)
-	{
-		solve_Ax_b(data,&data->x,&data->b);
-		return;
-	}
-
-	// Inner product of matrices interpreted
-	// if they were instead vectorized
-	auto mat_inner = [](
-		const MatrixXd &A,
-		const MatrixXd &B)
-	{
-		double dot = 0.0;
-		int nr = std::min(A.rows(), B.rows());
-		for( int i=0; i<nr; ++i )
-			for(int j=0; j<3; ++j)
-				dot += A(i,j)*B(i,j);
-
-		return dot;
-	};
-
-	// Update CGData
-	admmpd::SolverData::CGData *cgdata = &data->cgdata;
-	double eps = options->min_res;
-	cgdata->b = data->b;
-	if (cgdata->r.rows() != nx)
-	{
-		cgdata->r.resize(nx,3);
-		cgdata->z.resize(nx,3);
-		cgdata->p.resize(nx,3);
-		cgdata->Ap.resize(nx,3);
-	}
-
-	for (int i=0; i<3; ++i)
-	{
-		RowSparseMatrix<double> Kt = data->K[i].transpose();
-		cgdata->A[i] = data->A + data->spring_k*RowSparseMatrix<double>(Kt*data->K[i]);
-		cgdata->b.col(i).noalias() += data->spring_k*Kt*data->l;
-		cgdata->r.col(i).noalias() = cgdata->b.col(i) - cgdata->A[i]*data->x.col(i);
-	}
-	solve_Ax_b(data,&cgdata->z,&cgdata->r);
-	cgdata->p = cgdata->z;
-
-	for (int iter=0; iter<options->max_cg_iters; ++iter)
-	{
-		for (int i=0; i<3; ++i)
-			cgdata->Ap.col(i).noalias() = cgdata->A[i]*cgdata->p.col(i);
-
-		double p_dot_Ap = mat_inner(cgdata->p,cgdata->Ap);
-		if (p_dot_Ap==0.0)
-			break;
-
-		double zk_dot_rk = mat_inner(cgdata->z,cgdata->r);
-		if (zk_dot_rk==0.0)
-			break;
-
-		double alpha = zk_dot_rk / p_dot_Ap;
-		data->x.noalias() += alpha * cgdata->p;
-		cgdata->r.noalias() -= alpha * cgdata->Ap;
-		if (cgdata->r.lpNorm<Infinity>() < eps)
-			break;
-
-		solve_Ax_b(data,&cgdata->z,&cgdata->r);
-		double beta = mat_inner(cgdata->z,cgdata->r) / zk_dot_rk;
-		cgdata->p = cgdata->z + beta*cgdata->p;
-	}
-
-} // end solve conjugate gradients
-
 bool Solver::compute_matrices(
 	const Options *options,
 	SolverData *data)
@@ -392,9 +256,8 @@ bool Solver::compute_matrices(
 
 	// Constraint data
 	data->spring_k = options->mult_k*data->A.diagonal().maxCoeff();
-	data->l = VectorXd::Zero(1);
-	for (int i=0; i<3; ++i)
-		data->K[i].resize(1,nx);
+	data->C.resize(1,nx*3);
+	data->d = VectorXd::Zero(1);
 
 	// ADMM dual/lagrange
 	data->z.resize(n_row_D,3);
@@ -422,6 +285,10 @@ void Solver::append_energies(
 	Lame lame;
 	lame.set_from_youngs_poisson(options->youngs, options->poisson);
 
+	// The possibility of having an error in energy initialization
+	// while still wanting to continue the simulation is very low.
+	// We can parallelize this step if need be.
+
 	int energy_index = 0;
 	for (int i=0; i<nt; ++i)
 	{
diff --git a/extern/softbody/src/admmpd_solver.h b/extern/softbody/src/admmpd_solver.h
index 3c16b733b82..dc94bedd5a4 100644
--- a/extern/softbody/src/admmpd_solver.h
+++ b/extern/softbody/src/admmpd_solver.h
@@ -44,12 +44,6 @@ protected:
         const Options *options,
         SolverData *data);
 
-    // Global step with CG:
-    // 1/2||Ax-b||^2 + k/2||Kx-l||^2
-	void solve_conjugate_gradients(
-        const Options *options,
-        SolverData *data);
-
     bool compute_matrices(
         const Options *options,
         SolverData *data);
diff --git a/extern/softbody/src/admmpd_types.h b/extern/softbody/src/admmpd_types.h
index 46765150e1c..3615fd9cd5f 100644
--- a/extern/softbody/src/admmpd_types.h
+++ b/extern/softbody/src/admmpd_types.h
@@ -32,7 +32,7 @@ struct Options {
         mult_k(1),
         min_res(1e-6),
         youngs(1000000),
-        poisson(0.299),
+        poisson(0.399),
         grav(0,0,-9.8)
         {}
 };
@@ -72,24 +72,25 @@ struct SolverData {
     RowSparseMatrix<double> D; // reduction matrix
     RowSparseMatrix<double> DtW2; // D'W^2
     RowSparseMatrix<double> A; // M + D'W^2D
-    RowSparseMatrix<double> K[3]; // constraint Jacobian
-    Eigen::VectorXd l; // constraint rhs (Kx=l)
+    RowSparseMatrix<double> C; // linearized constraints
+    Eigen::VectorXd d; // constraints rhs
     double spring_k; // constraint stiffness
 	Eigen::SimplicialLDLT<Eigen::SparseMatrix<double> > ldltA;
-    struct CGData { // Temporaries used in conjugate gradients
-        RowSparseMatrix<double> A[3]; // (M + D'W^2D) + k * Kt K
-        Eigen::MatrixXd b; // M xbar + DtW2(z-u) + k Kt l
-        Eigen::MatrixXd r; // residual
-        Eigen::MatrixXd z;
-        Eigen::MatrixXd p;
-        Eigen::MatrixXd Ap; // A * p
-    } cgdata;
-    struct GSData { // Temporaries used in Gauss-Seidel
-		RowSparseMatrix<double> KtK; // k * Kt K, different dim than A!
-        Eigen::MatrixXd last_dx; // last GS iter change in x
-		std::vector<std::vector<int> > A_colors; // colors of just A matrix
-        std::vector<std::vector<int> > A_KtK_colors; // colors of just A+KtK
-	} gsdata;
+    struct GlobalStepData { // Temporaries used in global step
+        RowSparseMatrix<double> A3; // (M + D'W^2D) n3 x n3
+        RowSparseMatrix<double> CtC; // k * Ct C
+        RowSparseMatrix<double> A3_plus_CtC;
+        Eigen::VectorXd Ctd; // k * Ct d
+        Eigen::VectorXd b3_plus_Ctd; // M xbar + DtW2(z-u) + k Kt l
+        // Used by Conjugate-Gradients:
+        Eigen::VectorXd r; // residual
+        Eigen::VectorXd z; // auxilary
+        Eigen::VectorXd p; // direction
+        Eigen::VectorXd A3p; // A3 * p
+        // Used by Gauss-Seidel:
+        std::vector<std::vector<int> > A_colors; // colors of (original) A matrix
+        std::vector<std::vector<int> > A3_plus_CtC_colors; // colors of A3+KtK
+    } gsdata;
     // Set in append_energies:
 	std::vector<Eigen::Vector2i> indices; // per-energy index into D (row, num rows)
 	std::vector<double> rest_volumes; // per-energy rest volume
-- 
cgit v1.2.3