From c4484057685a905876ed472a61a0fc0f6085f885 Mon Sep 17 00:00:00 2001
From: over0219 <over0219@umn.edu>
Date: Wed, 1 Jul 2020 18:26:23 -0500
Subject: fixed mcgs

---
 extern/softbody/src/admmpd_collision.cpp    |  76 ++++-
 extern/softbody/src/admmpd_collision.h      |  37 ++-
 extern/softbody/src/admmpd_embeddedmesh.cpp |  79 ++---
 extern/softbody/src/admmpd_embeddedmesh.h   |   7 +-
 extern/softbody/src/admmpd_linsolve.cpp     | 456 ++++++++++++++++------------
 extern/softbody/src/admmpd_linsolve.h       |  17 +-
 extern/softbody/src/admmpd_solver.cpp       |  22 +-
 extern/softbody/src/admmpd_types.h          |  17 +-
 intern/softbody/admmpd_api.cpp              |  12 +-
 9 files changed, 429 insertions(+), 294 deletions(-)
diff --git a/extern/softbody/src/admmpd_collision.cpp b/extern/softbody/src/admmpd_collision.cpp
index 289af9fc68b..3ff7f949215 100644
--- a/extern/softbody/src/admmpd_collision.cpp
+++ b/extern/softbody/src/admmpd_collision.cpp
@@ -75,7 +75,6 @@ void Collision::detect_discrete_vf(
 	const Eigen::MatrixXd *mesh_x,
 	const Eigen::MatrixXi *mesh_tris,
 	bool mesh_is_obs,
-	double floor_z,
 	std::vector<VFCollisionPair> *pairs)
 {
 	// Any faces to detect against?
@@ -170,9 +169,9 @@ static void parallel_detect(
 	
 	if (td->embmesh != nullptr)
 	{
-		int tet_idx = td->embmesh->vtx_to_tet[i];
+		int tet_idx = td->embmesh->emb_vtx_to_tet[i];
 		RowVector4i tet = td->embmesh->tets.row(tet_idx);
-		Vector4d bary = td->embmesh->barys.row(i);
+		Vector4d bary = td->embmesh->emb_barys.row(i);
 		Vector3d pt = 
 			bary[0] * td->x1->row(tet[0]) +
 			bary[1] * td->x1->row(tet[1]) +
@@ -198,7 +197,6 @@ static void parallel_detect(
 			&td->obsdata->V1,
 			&td->obsdata->F,
 			true,
-			td->floor_z,
 			&tl_pairs );
 
 	//	Collision::detect_discrete_vf(
@@ -227,6 +225,10 @@ int EmbeddedMeshCollision::detect(
 	std::vector<std::vector<VFCollisionPair> > pt_vf_pairs
 		(max_threads, std::vector<VFCollisionPair>());
 
+	double floor_z = this->settings.floor_z;
+	if (!this->settings.test_floor)
+		floor_z = -std::numeric_limits<double>::max();
+
 	DetectThreadData thread_data = {
 		.tetmesh = nullptr,
 		.embmesh = mesh,
@@ -237,11 +239,10 @@ int EmbeddedMeshCollision::detect(
 		.pt_vf_pairs = &pt_vf_pairs
 	};
 
-	int nev = mesh->x_rest.rows();
-
-	TaskParallelSettings settings;
-	BLI_parallel_range_settings_defaults(&settings);
-	BLI_task_parallel_range(0, nev, &thread_data, parallel_detect, &settings);
+	int nev = mesh->emb_rest_x.rows();
+	TaskParallelSettings thrd_settings;
+	BLI_parallel_range_settings_defaults(&thrd_settings);
+	BLI_task_parallel_range(0, nev, &thread_data, parallel_detect, &thrd_settings);
 
 	// Combine thread-local results
 	vf_pairs.clear();
@@ -254,6 +255,59 @@ int EmbeddedMeshCollision::detect(
 	return vf_pairs.size();
 } // end detect
 
+void EmbeddedMeshCollision::graph(
+	std::vector<std::set<int> > &g)
+{
+	int np = vf_pairs.size();
+	if (np==0)
+		return;
+
+	int nv = mesh->rest_x.rows();
+	if ((int)g.size() < nv)
+		g.resize(nv, std::set<int>());
+
+	for (int i=0; i<np; ++i)
+	{
+		VFCollisionPair &pair = vf_pairs[i];
+		std::set<int> stencil;
+
+		if (!pair.p_is_obs)
+		{
+			int tet_idx = mesh->emb_vtx_to_tet[pair.p_idx];
+			RowVector4i tet = mesh->tets.row(tet_idx);
+			stencil.emplace(tet[0]);
+			stencil.emplace(tet[1]);
+			stencil.emplace(tet[2]);
+			stencil.emplace(tet[3]);
+		}
+		if (!pair.q_is_obs)
+		{
+			RowVector3i emb_face = mesh->emb_faces.row(pair.q_idx);
+			for (int j=0; j<3; ++j)
+			{
+				int tet_idx = mesh->emb_vtx_to_tet[emb_face[j]];
+				RowVector4i tet = mesh->tets.row(tet_idx);
+				stencil.emplace(tet[0]);
+				stencil.emplace(tet[1]);
+				stencil.emplace(tet[2]);
+				stencil.emplace(tet[3]);	
+			}
+		}
+
+		for (std::set<int>::iterator it = stencil.begin();
+			it != stencil.end(); ++it)
+		{
+			for (std::set<int>::iterator it2 = stencil.begin();
+				it2 != stencil.end(); ++it2)
+			{
+				if (*it == *it2)
+					continue;
+				g[*it].emplace(*it2);
+			}
+		}
+	}
+} // end graph
+
 void EmbeddedMeshCollision::linearize(
 	const Eigen::MatrixXd *x,
 	std::vector<Eigen::Triplet<double> > *trips,
@@ -314,8 +368,8 @@ void EmbeddedMeshCollision::linearize(
 		// Obstacle collision
 		if (pair.q_is_obs)
 		{
-			RowVector4d bary = mesh->barys.row(emb_p_idx);
-			int tet_idx = mesh->vtx_to_tet[emb_p_idx];
+			RowVector4d bary = mesh->emb_barys.row(emb_p_idx);
+			int tet_idx = mesh->emb_vtx_to_tet[emb_p_idx];
 			RowVector4i tet = mesh->tets.row(tet_idx);
 			int c_idx = d->size();
 			d->emplace_back(q_n.dot(pair.pt_on_q));
diff --git a/extern/softbody/src/admmpd_collision.h b/extern/softbody/src/admmpd_collision.h
index 95623510969..22d7dd58f8c 100644
--- a/extern/softbody/src/admmpd_collision.h
+++ b/extern/softbody/src/admmpd_collision.h
@@ -6,6 +6,7 @@
 
 #include "admmpd_bvh.h"
 #include "admmpd_types.h"
+#include <set>
 
 namespace admmpd {
 
@@ -29,6 +30,16 @@ public:
         AABBTree<double,3> tree;
     } obsdata;
 
+    struct Settings {
+        double floor_z;
+        bool test_floor;
+        Settings() :
+            floor_z(0),
+//            floor_z(-std::numeric_limits<double>::max()),
+            test_floor(true)
+            {}
+    } settings;
+
     virtual ~Collision() {}
 
     // Performs collision detection.
@@ -37,6 +48,11 @@ public:
         const Eigen::MatrixXd *x0,
         const Eigen::MatrixXd *x1) = 0;
 
+    // Appends the per-vertex graph of dependencies
+    // for constraints (ignores obstacles).
+    virtual void graph(
+        std::vector<std::set<int> > &g) = 0;
+
     // Set the soup of obstacles for this time step.
     // I don't really like having to switch up interface style, but we'll
     // do so here to avoid copies that would happen in admmpd_api.
@@ -48,7 +64,8 @@ public:
         int nf);
 
     // Special case for floor since it's common.
-    virtual void set_floor(double z) = 0;
+    virtual void set_floor(double z) { settings.floor_z=z; }
+    virtual double get_floor() const { return settings.floor_z; }
 
     // Linearize the constraints and return Jacobian.
     virtual void linearize(
@@ -68,31 +85,22 @@ public:
         const Eigen::MatrixXd *mesh_x,
         const Eigen::MatrixXi *mesh_tris,
         bool mesh_is_obs,
-        double floor_z,
         std::vector<VFCollisionPair> *pairs);
 };
 
 // Collision detection against multiple meshes
 class EmbeddedMeshCollision : public Collision {
 public:
-    EmbeddedMeshCollision(const EmbeddedMeshData *mesh_) :
-        mesh(mesh_),
-//        floor_z(-std::numeric_limits<double>::max())
-        floor_z(0)
-        {}
-
-    // A floor is so common that it makes sense to hard
-    // code floor collision instead of using a floor mesh.
-    void set_floor(double z)
-    {
-        floor_z = z;
-    };
+    EmbeddedMeshCollision(const EmbeddedMeshData *mesh_) : mesh(mesh_){}
 
     // Performs collision detection and stores pairs
     int detect(
         const Eigen::MatrixXd *x0,
         const Eigen::MatrixXd *x1);
 
+    void graph(
+        std::vector<std::set<int> > &g);
+    
     // Linearizes the collision pairs about x
     // for the constraint Kx=l
     void linearize(
@@ -103,7 +111,6 @@ public:
 protected:
     // A ptr to the embedded mesh data
     const EmbeddedMeshData *mesh;
-    double floor_z;
 
     // Pairs are compute on detect
     std::vector<VFCollisionPair> vf_pairs;
diff --git a/extern/softbody/src/admmpd_embeddedmesh.cpp b/extern/softbody/src/admmpd_embeddedmesh.cpp
index 0c2cdcac938..17e2b28c0e8 100644
--- a/extern/softbody/src/admmpd_embeddedmesh.cpp
+++ b/extern/softbody/src/admmpd_embeddedmesh.cpp
@@ -215,11 +215,10 @@ bool EmbeddedMesh::generate(
 	const Eigen::MatrixXd &V, // embedded verts
 	const Eigen::MatrixXi &F, // embedded faces
 	EmbeddedMeshData *emb_mesh, // where embedding is stored
-	Eigen::MatrixXd *x_tets, // lattice vertices, n x 3
 	bool trim_lattice)
 {
-	emb_mesh->faces = F;
-	emb_mesh->x_rest = V;
+	emb_mesh->emb_faces = F;
+	emb_mesh->emb_rest_x = V;
 
 	AlignedBox<double,3> aabb;
 	int nev = V.rows();
@@ -315,14 +314,14 @@ bool EmbeddedMesh::generate(
 		int ntv0 = data.verts.size(); // original num verts
 		int ntv1 = refd_verts.size(); // reduced num verts
 		BLI_assert(ntv1 <= ntv0);
-		x_tets->resize(ntv1,3);
+		emb_mesh->rest_x.resize(ntv1,3);
 		int vtx_count = 0;
 		for (int i=0; i<ntv0; ++i)
 		{
 			if (refd_verts.count(i)>0)
 			{
 				for(int j=0; j<3; ++j){
-					x_tets->operator()(vtx_count,j) = data.verts[i][j];
+					emb_mesh->rest_x(vtx_count,j) = data.verts[i][j];
 				}
 				vtx_old_to_new[i] = vtx_count;
 				vtx_count++;
@@ -342,8 +341,7 @@ bool EmbeddedMesh::generate(
 	}
 
 	// Now compute the baryweighting for embedded vertices
-	return compute_embedding(
-		emb_mesh, &V, x_tets);
+	return compute_embedding(emb_mesh);
 
 } // end gen lattice
 
@@ -352,16 +350,10 @@ bool EmbeddedMesh::generate(
 
 void EmbeddedMesh::compute_masses(
 	EmbeddedMeshData *emb_mesh, // where embedding is stored
-	const Eigen::MatrixXd *x_embed, // embedded vertices, p x 3
-	const Eigen::MatrixXd *x_tets, // lattice vertices, n x 3
 	Eigen::VectorXd *masses_tets, // masses of the lattice verts
 	double density_kgm3)
 {
 	BLI_assert(emb_mesh != NULL);
-	BLI_assert(x_embed != NULL);
-	BLI_assert(x_tets != NULL);
-	BLI_assert(x_tets->rows() > 0);
-	BLI_assert(x_tets->cols() == 3);
 	BLI_assert(masses_tets != NULL);
 	BLI_assert(density_kgm3 > 0);
 
@@ -371,18 +363,18 @@ void EmbeddedMesh::compute_masses(
 	// Source: https://github.com/mattoverby/mclscene/blob/master/include/MCL/TetMesh.hpp
 	// Computes volume-weighted masses for each vertex
 	// density_kgm3 is the unit-volume density
-	int nx = x_tets->rows();
+	int nx = emb_mesh->rest_x.rows();
 	masses_tets->resize(nx);
 	masses_tets->setZero();
 	int n_tets = emb_mesh->tets.rows();
 	for (int t=0; t<n_tets; ++t)
 	{
 		RowVector4i tet = emb_mesh->tets.row(t);
-		RowVector3d tet_v0 = x_tets->row(tet[0]);
+		RowVector3d tet_v0 = emb_mesh->rest_x.row(tet[0]);
 		Matrix3d edges;
-		edges.col(0) = x_tets->row(tet[1]) - tet_v0;
-		edges.col(1) = x_tets->row(tet[2]) - tet_v0;
-		edges.col(2) = x_tets->row(tet[3]) - tet_v0;
+		edges.col(0) = emb_mesh->rest_x.row(tet[1]) - tet_v0;
+		edges.col(1) = emb_mesh->rest_x.row(tet[2]) - tet_v0;
+		edges.col(2) = emb_mesh->rest_x.row(tet[3]) - tet_v0;
 		double vol = std::abs((edges).determinant()/6.f);
 		double tet_mass = density_kgm3 * vol;
 		masses_tets->operator[](tet[0]) += tet_mass / 4.f;
@@ -405,8 +397,6 @@ void EmbeddedMesh::compute_masses(
 typedef struct FindTetThreadData {
 	AABBTree<double,3> *tree;
 	EmbeddedMeshData *emb_mesh; // thread sets vtx_to_tet and barys
-	const Eigen::MatrixXd *x_embed;
-	const Eigen::MatrixXd *x_tets;
 } FindTetThreadData;
 
 static void parallel_point_in_tet(
@@ -415,41 +405,40 @@ static void parallel_point_in_tet(
 	const TaskParallelTLS *__restrict UNUSED(tls))
 {
 	FindTetThreadData *td = (FindTetThreadData*)userdata;
-	Vector3d pt = td->x_embed->row(i);
-	PointInTetMeshTraverse<double> traverser(pt, td->x_tets, &td->emb_mesh->tets);
+	Vector3d pt = td->emb_mesh->emb_rest_x.row(i);
+	PointInTetMeshTraverse<double> traverser(pt, &td->emb_mesh->rest_x, &td->emb_mesh->tets);
 	bool success = td->tree->traverse(traverser);
 	int tet_idx = traverser.output.prim;
 	if (success && tet_idx >= 0)
 	{
 		RowVector4i tet = td->emb_mesh->tets.row(tet_idx);
 		Vector3d t[4] = {
-			td->x_tets->row(tet[0]),
-			td->x_tets->row(tet[1]),
-			td->x_tets->row(tet[2]),
-			td->x_tets->row(tet[3])
+			td->emb_mesh->rest_x.row(tet[0]),
+			td->emb_mesh->rest_x.row(tet[1]),
+			td->emb_mesh->rest_x.row(tet[2]),
+			td->emb_mesh->rest_x.row(tet[3])
 		};
-		td->emb_mesh->vtx_to_tet[i] = tet_idx;
+		td->emb_mesh->emb_vtx_to_tet[i] = tet_idx;
 		Vector4d b = barycoords::point_tet(pt,t[0],t[1],t[2],t[3]);
-		td->emb_mesh->barys.row(i) = b;
+		td->emb_mesh->emb_barys.row(i) = b;
 	}
 } // end parallel lin solve
 
 bool EmbeddedMesh::compute_embedding(
-	EmbeddedMeshData *emb_mesh, // where embedding is stored
-	const Eigen::MatrixXd *x_embed, // embedded vertices, p x 3
-	const Eigen::MatrixXd *x_tets) // lattice vertices, n x 3
+	EmbeddedMeshData *emb_mesh)
 {
 	BLI_assert(emb_mesh!=NULL);
-	BLI_assert(x_embed!=NULL);
-	BLI_assert(x_tets!=NULL);
 
-	int nv = x_embed->rows();
+	int nv = emb_mesh->emb_rest_x.rows();
 	if (nv==0)
+	{
+		printf("**EmbeddedMesh::compute_embedding: No embedded vertices");
 		return false;
+	}
 
-	emb_mesh->barys.resize(nv,4);
-	emb_mesh->barys.setOnes();
-	emb_mesh->vtx_to_tet.resize(nv);
+	emb_mesh->emb_barys.resize(nv,4);
+	emb_mesh->emb_barys.setOnes();
+	emb_mesh->emb_vtx_to_tet.resize(nv);
 	int nt = emb_mesh->tets.rows();
 
 	// BVH tree for finding point-in-tet and computing
@@ -462,7 +451,7 @@ bool EmbeddedMesh::compute_embedding(
 		tet_aabbs[i].setEmpty();
 		RowVector4i tet = emb_mesh->tets.row(i);
 		for (int j=0; j<4; ++j)
-			tet_aabbs[i].extend(x_tets->row(tet[j]).transpose());
+			tet_aabbs[i].extend(emb_mesh->rest_x.row(tet[j]).transpose());
 
 		tet_aabbs[i].extend(tet_aabbs[i].min()-veta);
 		tet_aabbs[i].extend(tet_aabbs[i].max()+veta);
@@ -473,9 +462,7 @@ bool EmbeddedMesh::compute_embedding(
 
 	FindTetThreadData thread_data = {
 		.tree = &tree,
-		.emb_mesh = emb_mesh,
-		.x_embed = x_embed,
-		.x_tets = x_tets
+		.emb_mesh = emb_mesh
 	};
 	TaskParallelSettings settings;
 	BLI_parallel_range_settings_defaults(&settings);
@@ -485,7 +472,7 @@ bool EmbeddedMesh::compute_embedding(
 	const double eps = 1e-8;
 	for (int i=0; i<nv; ++i)
 	{
-		RowVector4d b = emb_mesh->barys.row(i);
+		RowVector4d b = emb_mesh->emb_barys.row(i);
 		if (b.minCoeff() < -eps)
 		{
 			printf("**Lattice::generate Error: negative barycoords\n");
@@ -505,8 +492,8 @@ bool EmbeddedMesh::compute_embedding(
 
 
 // Export the mesh for funsies
-std::ofstream of("v_lattice.txt"); of << *x_tets; of.close();
-std::ofstream of2("t_lattice.txt"); of2 << emb_mesh->tets; of2.close();
+//std::ofstream of("v_lattice.txt"); of << emb_mesh->rest_x; of.close();
+//std::ofstream of2("t_lattice.txt"); of2 << emb_mesh->tets; of2.close();
 
 	return true;
 
@@ -517,9 +504,9 @@ Eigen::Vector3d EmbeddedMesh::get_mapped_vertex(
 	const Eigen::MatrixXd *x_data,
 	int idx)
 {
-    int t_idx = emb_mesh->vtx_to_tet[idx];
+    int t_idx = emb_mesh->emb_vtx_to_tet[idx];
     RowVector4i tet = emb_mesh->tets.row(t_idx);
-    RowVector4d b = emb_mesh->barys.row(idx);
+    RowVector4d b = emb_mesh->emb_barys.row(idx);
     return Vector3d(
 		x_data->row(tet[0]) * b[0] +
 		x_data->row(tet[1]) * b[1] +
diff --git a/extern/softbody/src/admmpd_embeddedmesh.h b/extern/softbody/src/admmpd_embeddedmesh.h
index eb7583b93b4..6d1022b8a0c 100644
--- a/extern/softbody/src/admmpd_embeddedmesh.h
+++ b/extern/softbody/src/admmpd_embeddedmesh.h
@@ -15,7 +15,6 @@ public:
         const Eigen::MatrixXd &V, // embedded verts
         const Eigen::MatrixXi &F, // embedded faces
         EmbeddedMeshData *emb_mesh, // where embedding is stored
-        Eigen::MatrixXd *x_tets, // lattice vertices, n x 3
         bool trim_lattice = true); // remove elements outside embedded volume
 
     // Returns the vtx mapped from x/v and tets
@@ -28,8 +27,6 @@ public:
     // for the lattice vertices
     void compute_masses(
         EmbeddedMeshData *emb_mesh, // where embedding is stored
-        const Eigen::MatrixXd *x_embed, // embedded vertices, p x 3
-        const Eigen::MatrixXd *x_tets, // lattice vertices, n x 3
         Eigen::VectorXd *masses_tets, // masses of the lattice verts
         double density_kgm3 = 2100);
 
@@ -38,9 +35,7 @@ protected:
     // Returns true on success
     // Computes the embedding data, like barycoords
     bool compute_embedding(
-        EmbeddedMeshData *emb_mesh, // where embedding is stored
-        const Eigen::MatrixXd *x_embed, // embedded vertices, p x 3
-        const Eigen::MatrixXd *x_tets); // lattice vertices, n x 3
+        EmbeddedMeshData *emb_mesh);
 
 }; // class Lattice
 
diff --git a/extern/softbody/src/admmpd_linsolve.cpp b/extern/softbody/src/admmpd_linsolve.cpp
index 548e1b36dcf..05f7d5da5f4 100644
--- a/extern/softbody/src/admmpd_linsolve.cpp
+++ b/extern/softbody/src/admmpd_linsolve.cpp
@@ -40,8 +40,10 @@ static inline void make_n3(
 
 void ConjugateGradients::solve(
 		const Options *options,
-		SolverData *data)
+		SolverData *data,
+		Collision *collision)
 {
+	(void)(collision); // unused
 	BLI_assert(data != NULL);
 	BLI_assert(options != NULL);
 	int nx = data->x.rows();
@@ -154,83 +156,117 @@ void ConjugateGradients::solve_Ax_b(
 
 void GaussSeidel::solve(
 		const Options *options,
-		SolverData *data)
+		SolverData *data,
+		Collision *collision)
 {
-	init_solve(options,data);
-	std::vector<std::vector<int> > *colors;
-	//if (data->gsdata.KtK.nonZeros()==0)
-		colors = &data->gsdata.A_colors;
-	//else...
-
-	double omega = 1.0; // over relaxation
-	int n_colors = colors->size();
+	init_solve(options,data,collision);
+	MatrixXd dx(data->x.rows(),3);
+	dx.setZero();
+
+	struct GaussSeidelThreadData {
+		int iter;
+		int color;
+		std::vector<std::vector<int> > *colors;
+		const Options *options;
+		SolverData *data;
+		Collision *collision;
+		MatrixXd *dx;
+	};
 
-	// Outer iteration loop
-	int iter = 0;
-	for (; iter < options->max_gs_iters; ++iter)
+	GaussSeidelThreadData thread_data = {
+		.iter = 0,
+		.color = 0,
+		.colors = &data->gsdata.A3_plus_CtC_colors,
+		.options = options,
+		.data = data,
+		.collision = collision,
+		.dx = &dx };
+
+	// Inner iteration of Gauss-Seidel
+	auto parallel_gs_sweep = [](void *__restrict userdata, const int i_,
+		const TaskParallelTLS *__restrict UNUSED(tls)) -> void
 	{
-		for (int color=0; color<n_colors; ++color)
+		GaussSeidelThreadData *td = (GaussSeidelThreadData*)userdata;
+		int idx = td->colors->at(td->color)[i_];
+		double omega = td->options->gs_omega;
+		typedef RowSparseMatrix<double>::InnerIterator InnerIter;
+
+		// Loop over expanded A matrix, i.e. segment update
+		Vector3d LUx(0,0,0);
+		Vector3d inv_aii(0,0,0);
+		for (int j=0; j<3; ++j)
 		{
-			const std::vector<int> &inds = colors->at(color);
-			int n_inds = inds.size();
-			for (int i=0; i<n_inds; ++i)
+			InnerIter rit(td->data->gsdata.A3_plus_CtC, idx*3+j);
+			for (; rit; ++rit)
 			{
-				int idx = inds[i];
-
-				// Special case pins TODO
-				// We can skip the usual Gauss-Seidel update
-	//			if (is_pinned[idx]) ...
+				int r = rit.row();
+				int c = rit.col();
+				double v = rit.value();
+				if (v==0.0)
+					continue;
 
-				RowSparseMatrix<double>::InnerIterator rit(data->A,idx);
-				Vector3d LUx(0,0,0);
-				Vector3d inv_aii(0,0,0);
-				for (; rit; ++rit)
+				if (r==c) // Diagonal
 				{
-					int r = rit.row();
-					int c = rit.col();
-					double v = rit.value();
-					if (v==0.0)
-						continue;
-
-					if (r==c) // Diagonal
-					{
-						inv_aii.array() = 1.0/v;
-						continue;
-					}
-					Vector3d xj = data->x.row(c);
-					LUx += v*xj;
+					inv_aii[j] = 1.0/v;
+					continue;
 				}
 
-				// Update x
-				Vector3d bi = data->b.row(idx);
-				Vector3d xi = data->x.row(idx);
-				Vector3d xi_new = (bi-LUx);
+				double xj = td->data->x(c/3,j);
+				LUx[j] += v*xj;
+			}
 
-				for (int j=0; j<3; ++j)
-					xi_new[j] *= inv_aii[j];
-				data->x.row(idx) = xi*(1.0-omega) + xi_new*omega;
+		} // end loop segment
 
-				// TODO
-				// We can also apply constraints here, like
-				// checking against Collision::floor_z
-				if (data->x(idx,2)<0)
-					data->x(idx,2)=0;
+		// Update x
+		Vector3d bi = td->data->b.row(idx).transpose()
+			+ td->data->gsdata.Ctd.segment<3>(idx*3);
+		Vector3d xi = td->data->x.row(idx);
+		Vector3d xi_new = (bi-LUx);
 
-			} // end loop inds
-		} // end loop colors
+		for (int j=0; j<3; ++j)
+			xi_new[j] *= inv_aii[j];
+		td->data->x.row(idx) = xi*(1.0-omega) + xi_new*omega;
+
+		// Check fast-query constraints
+		double floor_z = td->collision->get_floor();
+//		if (td->data->x(idx,2) < floor_z)
+//			td->data->x(idx,2) = floor_z;
+
+		// Update deltas
+		td->dx->row(idx) = td->data->x.row(idx)-xi.transpose();
+
+	};
 
-		// TODO check exit condition
+	TaskParallelSettings thrd_settings;
+	BLI_parallel_range_settings_defaults(&thrd_settings);
 
+	// Outer iteration loop
+	int n_colors = data->gsdata.A3_plus_CtC_colors.size();
+	int iter = 0;
+	for (; iter < options->max_gs_iters; ++iter)
+	{
+		for (int color=0; color<n_colors; ++color)
+		{
+			thread_data.color = color;
+			int n_inds = data->gsdata.A3_plus_CtC_colors[color].size();
+			BLI_task_parallel_range(0, n_inds, &thread_data, parallel_gs_sweep, &thrd_settings);
+		} // end loop colors
+
+		double dxn = dx.rowwise().lpNorm<Infinity>().maxCoeff();
+		if (dxn < options->min_res)
+			break;
 	} // end loop GS iters
 
 } // end solve with constraints
 
 void GaussSeidel::init_solve(
 		const Options *options,
-		SolverData *data)
+		SolverData *data,
+		Collision *collision)
 {
 	BLI_assert(options != nullptr);
 	BLI_assert(data != nullptr);
+	BLI_assert(collision != nullptr);
 	int nx = data->x.rows();
 	BLI_assert(nx>0);
 	BLI_assert(data->x.cols()==3);
@@ -240,53 +276,13 @@ void GaussSeidel::init_solve(
 
 	// Do we need to color the default colorings?
 	if (data->gsdata.A_colors.size() == 0)
-		compute_colors(&data->A, 1, data->gsdata.A_colors);
-
-	// Verify color groups are correct
 	{
-		std::cout << "TESTING " << data->tets.rows() << " tets" << std::endl;
-		std::cout << "num colors: " << data->gsdata.A_colors.size() << std::endl;
-		int nt = data->tets.rows();
-		int nc = data->gsdata.A_colors.size();
-		for (int i=0; i<nc; ++i)
-		{
-			// Each vertex in the color should not
-			// be a part of a tet with a vertex in the same color
-			const std::vector<int> &grp = data->gsdata.A_colors[i];
-			int n_grp = grp.size();
-			for (int j=0; j<n_grp; ++j)
-			{
-				int p_idx = grp[j];
-				auto in_tet = [](int idx, const RowVector4i &t)
-				{
-					return (t[0]==idx||t[1]==idx||t[2]==idx||t[3]==idx);
-				};
-
-				for (int k=0; k<nt; ++k)
-				{
-					RowVector4i t = data->tets.row(k);
-					if (!in_tet(p_idx,t))
-						continue;
-
-					for (int l=0; l<n_grp; ++l)
-					{
-						int q_idx = grp[l];
-						if (p_idx==q_idx)
-							continue;
-
-						if (in_tet(q_idx,t))
-						{
-std::cerr << "p: " << p_idx << ", q: " << q_idx << ", tet (" << k << "): " << t << std::endl;
-//							throw std::runtime_error("GaussSeidel Error: Color contains two verts form same tet!");
-						}
-					}
-				}
-			}
-		}
+		std::vector<std::set<int> > c_graph;
+		compute_colors(data->energies_graph, c_graph, data->gsdata.A_colors);
 	}
 
 	// Create large A if we haven't already.
-	if (data->gsdata.A3.rows() != nx*3)
+	if (data->gsdata.A3.nonZeros()==0)
 		make_n3(data->A, data->gsdata.A3);
 
 	// TODO
@@ -307,11 +303,17 @@ std::cerr << "p: " << p_idx << ", q: " << q_idx << ", tet (" << k << "): " << t
 			data->gsdata.b3_plus_Ctd[i*3+1] = data->b(i,1)+data->gsdata.Ctd[i*3+1];
 			data->gsdata.b3_plus_Ctd[i*3+2] = data->b(i,2)+data->gsdata.Ctd[i*3+2];
 		}
-		compute_colors(&data->gsdata.A3_plus_CtC, 3, data->gsdata.A3_plus_CtC_colors);
+		std::vector<std::set<int> > c_graph;
+		collision->graph(c_graph);
+		compute_colors(data->energies_graph, c_graph, data->gsdata.A3_plus_CtC_colors);
 	}
 	else
 	{
+		if (data->gsdata.CtC.rows() != nx*3)
+			data->gsdata.CtC.resize(nx*3, nx*3);
 		data->gsdata.CtC.setZero();
+		if (data->gsdata.Ctd.rows() != nx*3)
+			data->gsdata.Ctd.resize(nx*3);
 		data->gsdata.Ctd.setZero();
 		data->gsdata.A3_plus_CtC = data->gsdata.A3;
 		data->gsdata.b3_plus_Ctd.resize(nx*3);
@@ -326,145 +328,161 @@ std::cerr << "p: " << p_idx << ", q: " << q_idx << ", tet (" << k << "): " << t
 
 } // end init solve
 
-struct GraphColorThreadData {
-	const RowSparseMatrix<double> *A;
-	int stride;
-	std::vector<std::vector<int> > *adjacency;
-	std::vector<std::set<int> > *palette;
-	int init_palette_size;
-	std::vector<int> *conflict;
-	std::vector<int> *node_colors;
-};
-
 // Rehash of graph coloring from
 // https://github.com/mattoverby/mclscene/blob/master/include/MCL/GraphColor.hpp
 void GaussSeidel::compute_colors(
-		const RowSparseMatrix<double> *A,
-		int stride,
+		const std::vector<std::set<int> > &vertex_energies_graph,
+		const std::vector<std::set<int> > &vertex_constraints_graph,
 		std::vector<std::vector<int> > &colors)
 {
-	BLI_assert(A != nullptr);
-	BLI_assert(stride>0);
+	int n_nodes = vertex_energies_graph.size();
+	BLI_assert(n_nodes>0);
+	BLI_assert(
+		vertex_constraints_graph.size()==0 ||
+		(int)vertex_constraints_graph.size()==n_nodes);
 
 	// Graph color settings
 	int init_palette_size = 6;
 
 	// Graph coloring tmp data
-	int n_nodes = A->rows()/stride;
-	std::vector<std::vector<int> > adjacency(n_nodes, std::vector<int>());
 	std::vector<std::set<int> > palette(n_nodes, std::set<int>());
 	std::vector<int> conflict(n_nodes,1);
 	std::vector<int> node_colors(n_nodes,-1);
+	std::vector<int> node_queue(n_nodes);
+	std::iota(node_queue.begin(), node_queue.end(), 0);
+    std::random_device rd;
+    std::mt19937 mt(rd());
+	std::uniform_int_distribution<int> dist(0, n_nodes);
+
+	struct GraphColorThreadData {
+		int iter;
+		int init_palette_size;
+		const std::vector<std::set<int> > *e_graph; // energies
+		const std::vector<std::set<int> > *c_graph; // constraints
+		std::vector<std::set<int> > *palette;
+		std::vector<int> *conflict;
+		std::vector<int> *node_colors;
+		std::vector<int> *node_queue;
+    	std::mt19937 *mt;
+		std::uniform_int_distribution<int> *dist;
+	};
+	GraphColorThreadData thread_data = {
+		.iter = 0,
+		.init_palette_size = init_palette_size,
+		.e_graph = &vertex_energies_graph,
+		.c_graph = &vertex_constraints_graph,
+		.palette = &palette,
+		.conflict = &conflict,
+		.node_colors = &node_colors,
+		.node_queue = &node_queue,
+		.mt = &mt,
+		.dist = &dist };
+	TaskParallelSettings thrd_settings;
+	BLI_parallel_range_settings_defaults(&thrd_settings);
 
 	//
 	// Step 1)
 	// Graph color initialization
 	//
-	auto parallel_init_graph = [](
-		void *__restrict userdata,
-		const int i,
+	auto init_graph = [](void *__restrict userdata, const int i,
 		const TaskParallelTLS *__restrict UNUSED(tls)) -> void
 	{
 		GraphColorThreadData *td = (GraphColorThreadData*)userdata;
-		typedef RowSparseMatrix<double>::InnerIterator InnerIter;
-		// Use lower trianglular portion of the matrix.
-		// That is, store adjacency inds that are lower than
-		// the current index.
-		for (InnerIter it(*(td->A),i*td->stride); it; ++it)
-		{
-			if (std::abs(it.value())==0.0)
-				continue;
-			if (it.col()/td->stride >= it.row()/td->stride)
-				break;
-			int idx = it.col()/td->stride;
-			td->adjacency->at(i).emplace_back(idx);
-		}
 		for( int j=0; j<td->init_palette_size; ++j ) // init colors
 			td->palette->at(i).insert(j); 
 	};
-	GraphColorThreadData thread_data = {
-		.A = A, .stride = stride,
-		.adjacency = &adjacency,
-		.palette = &palette,
-		.init_palette_size = init_palette_size,
-		.conflict = &conflict,
-		.node_colors = &node_colors };
-	TaskParallelSettings settings;
-	BLI_parallel_range_settings_defaults(&settings);
-	BLI_task_parallel_range(0, n_nodes, &thread_data, parallel_init_graph, &settings);
-
-    std::random_device rd;
-    std::mt19937 mt(rd());
-	std::uniform_int_distribution<int> dist(0, n_nodes);
+	BLI_task_parallel_range(0, n_nodes, &thread_data, init_graph, &thrd_settings);
 
 	//
 	// Step 2)
 	// Stochastic Graph coloring
 	//
-	std::vector<int> node_queue(n_nodes);
-	std::iota(node_queue.begin(), node_queue.end(), 0);
 	int max_iters = n_nodes;
-	for (int rand_iter=0; n_nodes>0 && rand_iter < max_iters; ++rand_iter)
+	for (int rand_iter=0; n_nodes>0 && rand_iter<max_iters; ++rand_iter)
 	{
-		// Generate a random color and find conflicts
-		for (int i=0; i<n_nodes; ++i)
+		thread_data.iter = rand_iter;
+
+		// Generate a random color
+		auto generate_color = [](void *__restrict userdata, const int i,
+			const TaskParallelTLS *__restrict UNUSED(tls)) -> void
 		{
-			int idx = node_queue[i];
-			if( palette[idx].size() < 2 ){ // Feed if hungry
-				palette[idx].insert(init_palette_size+rand_iter);
+			GraphColorThreadData *td = (GraphColorThreadData*)userdata;
+			int idx = td->node_queue->at(i);
+			if (td->palette->at(idx).size()<2) // Feed the hungry
+					td->palette->at(idx).insert(td->init_palette_size+td->iter);
+			int c_idx = td->dist->operator()(*td->mt) % td->palette->at(idx).size();
+			td->node_colors->at(idx) = *std::next(td->palette->at(idx).begin(), c_idx);
+		};
+		BLI_task_parallel_range(0, n_nodes, &thread_data, generate_color, &thrd_settings);
+
+		// Detect conflicts
+		auto detect_conflicts = [](void *__restrict userdata, const int i,
+			const TaskParallelTLS *__restrict UNUSED(tls)) -> void
+		{
+			GraphColorThreadData *td = (GraphColorThreadData*)userdata;
+			int idx = td->node_queue->at(i);
+			int curr_c = td->node_colors->at(idx);
+			bool curr_conflict = false;
+			for (std::set<int>::iterator e_it = td->e_graph->at(idx).begin();
+				e_it != td->e_graph->at(idx).end() && !curr_conflict; ++e_it)
+			{
+				int adj_idx = *e_it;
+				if (adj_idx<=idx)
+					continue; // Hungarian heuristic
+				int adj_c = td->node_colors->at(adj_idx);
+				if (curr_c==adj_c)
+					curr_conflict = true;
 			}
-
-			int c_idx = dist(mt) % (int)palette[idx].size();
-			int curr_color = *std::next(palette[idx].begin(), c_idx);
-			node_colors[idx] = curr_color;
-			conflict[idx] = 0;
-
-			// Hungarian heuristic: node with largest index keeps color
-			// if both have the same color.
-			// Check lower-indexed adjacent nodes, and mark them
-			// in conflict if they generated the same color.
-			// We can do this if we process colors in an increasing order.
-			int n_adj = adjacency[idx].size();
-			for (int j=0; j<n_adj; ++j)
+			if ((int)td->c_graph->size() > idx)
 			{
-				// Adjacency index buffer only stores lower-indexed nodes.
-				int adj_idx = adjacency[idx][j];
-				int adj_color = node_colors[adj_idx];
-				if (adj_idx >= idx)
-					throw std::runtime_error("GaussSeidel Error: Oops, not lower diag");
-				if (adj_color==curr_color)
-					conflict[adj_idx] = 1;					
+				for (std::set<int>::iterator c_it = td->c_graph->at(idx).begin();
+					c_it != td->c_graph->at(idx).end() && !curr_conflict; ++c_it)
+				{
+					int adj_idx = *c_it;
+					if (adj_idx<=idx)
+						continue; // Hungarian heuristic
+					int adj_c = td->node_colors->at(adj_idx);
+					if (curr_c==adj_c)
+						curr_conflict = true;
+				}
 			}
-		}
+			td->conflict->at(idx) = curr_conflict;
+		};
+		BLI_task_parallel_range(0, n_nodes, &thread_data, detect_conflicts, &thrd_settings);
 
-		// Resolve conflicts and trim queue
-		std::set<int> new_queue;
+		// Resolve conflicts and update queue
+		std::vector<int> new_queue;
 		for (int i=0; i<n_nodes; ++i)
 		{
 			int idx = node_queue[i];
-			int curr_color = node_colors[idx];
-			int n_adj = adjacency[idx].size();
-
-			// If this node not in conflict, remove its
-			// color from adjacent palettes. If adjacent
-			// nodes not in conflict, remove their color
-			// from this palette.
-			for (int j=0; j<n_adj; ++j)
+			if (conflict[idx])
+				new_queue.emplace_back(idx);
+			else
 			{
-				int adj_idx = adjacency[idx][j];
-				int adj_color = node_colors[adj_idx];
-				if (!conflict[idx])
-					palette[adj_idx].erase(curr_color);
-				if (!conflict[adj_idx])
-					palette[idx].erase(adj_color);
+				int curr_color = node_colors[idx];
+				// Remove color from neighbor palletes
+				for (std::set<int>::iterator e_it = vertex_energies_graph[idx].begin();
+					e_it != vertex_energies_graph[idx].end(); ++e_it)
+				{
+					int adj_idx = *e_it;
+					if (conflict[adj_idx]) // still in the set?
+						palette[adj_idx].erase(curr_color);
+				}
+				if ((int)vertex_constraints_graph.size() > idx)
+				{
+					for (std::set<int>::iterator c_it = vertex_constraints_graph[idx].begin();
+						c_it != vertex_constraints_graph[idx].end(); ++c_it)
+					{
+						int adj_idx = *c_it;
+						if (conflict[adj_idx]) // still in the set?
+							palette[adj_idx].erase(curr_color);
+					}
+				}
 			}
-
-			if (conflict[idx])
-				new_queue.emplace(idx);
 		}
 
-		n_nodes = new_queue.size();
-		node_queue.assign(new_queue.begin(), new_queue.end());
+		node_queue = new_queue;
+		n_nodes = node_queue.size();
 
 	} // end color loop
 
@@ -473,6 +491,7 @@ void GaussSeidel::compute_colors(
 	// Map per-vertex colors
 	//
 	colors.clear();
+	colors.resize(14,std::vector<int>());
 	n_nodes = node_colors.size();
 	for( int i=0; i<n_nodes; ++i ){
 		int color = node_colors[i];
@@ -489,4 +508,51 @@ void GaussSeidel::compute_colors(
 
 } // end compute colors
 
+void GaussSeidel::verify_colors(SolverData *data)
+{
+	// TODO check constraints too
+	// Verify color groups are correct
+	std::cout << "TESTING " << data->tets.rows() << " tets" << std::endl;
+	std::cout << "num colors: " << data->gsdata.A_colors.size() << std::endl;
+	int nt = data->tets.rows();
+	int nc = data->gsdata.A_colors.size();
+	for (int i=0; i<nc; ++i)
+	{
+		// Each vertex in the color should not
+		// be a part of a tet with a vertex in the same color
+		const std::vector<int> &grp = data->gsdata.A_colors[i];
+		int n_grp = grp.size();
+		for (int j=0; j<n_grp; ++j)
+		{
+			int p_idx = grp[j];
+			auto in_tet = [](int idx, const RowVector4i &t)
+			{
+				return (t[0]==idx||t[1]==idx||t[2]==idx||t[3]==idx);
+			};
+
+			for (int k=0; k<nt; ++k)
+			{
+				RowVector4i t = data->tets.row(k);
+				if (!in_tet(p_idx,t))
+					continue;
+
+				for (int l=0; l<n_grp; ++l)
+				{
+					int q_idx = grp[l];
+					if (p_idx==q_idx)
+						continue;
+
+					if (in_tet(q_idx,t))
+					{
+						std::cerr << "p: " << p_idx << ", q: " << q_idx <<
+							", tet (" << k << "): " << t <<
+							", color: " << i <<
+							std::endl;
+					}
+				}
+			}
+		}
+	}
+} // end verify colors
+
 } // namespace admmpd
\ No newline at end of file
diff --git a/extern/softbody/src/admmpd_linsolve.h b/extern/softbody/src/admmpd_linsolve.h
index 3dc3764c6e9..ee6a82b4048 100644
--- a/extern/softbody/src/admmpd_linsolve.h
+++ b/extern/softbody/src/admmpd_linsolve.h
@@ -5,6 +5,7 @@
 #define ADMMPD_LINSOLVE_H_
 
 #include "admmpd_types.h"
+#include "admmpd_collision.h"
 
 namespace admmpd {
 
@@ -13,7 +14,8 @@ class ConjugateGradients {
 public:
 	void solve(
 		const Options *options,
-		SolverData *data);
+		SolverData *data,
+		Collision *collision);
 
 protected:
 	// Apply preconditioner
@@ -28,18 +30,23 @@ class GaussSeidel {
 public:
 	void solve(
 		const Options *options,
-		SolverData *data);
+		SolverData *data,
+		Collision *collision);
 
 protected:
 	void init_solve(
 		const Options *options,
-		SolverData *data);
+		SolverData *data,
+		Collision *collision);
 
 	void compute_colors(
-		const RowSparseMatrix<double> *A,
-		int stride,
+		const std::vector<std::set<int> > &vertex_energies_graph,
+		const std::vector<std::set<int> > &vertex_constraints_graph,
 		std::vector<std::vector<int> > &colors);
 
+	// For debugging:
+	void verify_colors(SolverData *data);
+
 };
 
 } // namespace admmpd
diff --git a/extern/softbody/src/admmpd_solver.cpp b/extern/softbody/src/admmpd_solver.cpp
index 6a4e9bc7e02..7a586106d21 100644
--- a/extern/softbody/src/admmpd_solver.cpp
+++ b/extern/softbody/src/admmpd_solver.cpp
@@ -46,7 +46,7 @@ bool Solver::init(
 	if (!compute_matrices(options,data))
 		return false;
 
-	printf("Solver::init:\n\tNum tets: %d\n\tNum verts: %d\n",T.rows(),V.rows());
+	printf("Solver::init:\n\tNum tets: %d\n\tNum verts: %d\n",(int)T.rows(),(int)V.rows());
 
 	return true;
 } // end init
@@ -79,8 +79,8 @@ int Solver::solve(
 		update_constraints(options,data,collision);
 
 		// Solve Ax=b s.t. Cx=d
-		ConjugateGradients().solve(options,data);
-		//GaussSeidel().solve(options,data);
+		//ConjugateGradients().solve(options,data,collision);
+		GaussSeidel().solve(options,data,collision);
 
 	} // end solver iters
 
@@ -279,6 +279,10 @@ void Solver::append_energies(
 	int nt = data->tets.rows();
 	BLI_assert(nt > 0);
 
+	int nx = data->x.rows();
+	if ((int)data->energies_graph.size() != nx)
+		data->energies_graph.resize(nx,std::set<int>());
+
 	data->indices.reserve(nt);
 	data->rest_volumes.reserve(nt);
 	data->weights.reserve(nt);
@@ -312,6 +316,18 @@ void Solver::append_energies(
 			continue;
 		}
 
+		int ele_dim = ele.cols();
+		for (int j=0; j<ele_dim; ++j)
+		{
+			int ej = ele[j];
+			for (int k=0; k<ele_dim; ++k)
+			{
+				int ek = ele[k];
+				if (ej==ek)
+					continue;
+				data->energies_graph[ej].emplace(ek);
+			}
+		}
 		data->indices.emplace_back(energy_index, energy_dim);
 		energy_index += energy_dim;
 	}
diff --git a/extern/softbody/src/admmpd_types.h b/extern/softbody/src/admmpd_types.h
index 688118c1246..eaeb1d0c93a 100644
--- a/extern/softbody/src/admmpd_types.h
+++ b/extern/softbody/src/admmpd_types.h
@@ -8,6 +8,7 @@
 #include <Eigen/Sparse>
 #include <Eigen/SparseCholesky>
 #include <vector>
+#include <set>
 
 // TODO template type for float/double
 
@@ -19,8 +20,9 @@ struct Options {
     int max_admm_iters;
     int max_cg_iters;
     int max_gs_iters;
+    double gs_omega; // Gauss-Seidel relaxation
     double mult_k; // stiffness multiplier for constraints
-    double min_res; // min residual for CG solver
+    double min_res; // exit tolerance for global step
     double youngs; // Young's modulus // TODO variable per-tet
     double poisson; // Poisson ratio // TODO variable per-tet
     Eigen::Vector3d grav;
@@ -29,8 +31,9 @@ struct Options {
         max_admm_iters(50),
         max_cg_iters(10),
         max_gs_iters(30),
+        gs_omega(1),
         mult_k(3),
-        min_res(1e-6),
+        min_res(1e-8),
         youngs(10000000),
         poisson(0.399),
         grav(0,0,-9.8)
@@ -49,11 +52,12 @@ struct TetMeshData {
 };
 
 struct EmbeddedMeshData { // i.e. the lattice
-    Eigen::MatrixXd x_rest; // embedded verts at rest
-    Eigen::MatrixXi faces; // embedded faces
+    Eigen::MatrixXd emb_rest_x; // embedded verts at rest
+    Eigen::MatrixXi emb_faces; // embedded faces
+    Eigen::VectorXi emb_vtx_to_tet; // what tet vtx is embedded in, p x 1
+    Eigen::MatrixXd emb_barys; // barycoords of the embedding, p x 4
     Eigen::MatrixXi tets; // lattice elements, m x 4
-    Eigen::VectorXi vtx_to_tet; // what tet vtx is embedded in, p x 1
-    Eigen::MatrixXd barys; // barycoords of the embedding, p x 4
+    Eigen::MatrixXd rest_x; // lattice verts at rest
 }; // type 1
 
 struct SolverData {
@@ -92,6 +96,7 @@ struct SolverData {
         std::vector<std::vector<int> > A3_plus_CtC_colors; // colors of A3+KtK
     } gsdata;
     // Set in append_energies:
+    std::vector<std::set<int> > energies_graph; // per-vertex adjacency list (graph)
 	std::vector<Eigen::Vector2i> indices; // per-energy index into D (row, num rows)
 	std::vector<double> rest_volumes; // per-energy rest volume
 	std::vector<double> weights; // per-energy weights
diff --git a/intern/softbody/admmpd_api.cpp b/intern/softbody/admmpd_api.cpp
index b1bdbab49cb..261c935b413 100644
--- a/intern/softbody/admmpd_api.cpp
+++ b/intern/softbody/admmpd_api.cpp
@@ -158,16 +158,14 @@ static int admmpd_init_with_lattice(
   }
 
   iface->totverts = 0;
-  bool success = admmpd::EmbeddedMesh().generate(in_V,in_F,iface->idata->embmesh,V);
+  bool trim_lattice = true;
+  bool success = admmpd::EmbeddedMesh().generate(in_V,in_F,iface->idata->embmesh,trim_lattice);
   if (success)
   {
-    admmpd::EmbeddedMesh().compute_masses(
-      iface->idata->embmesh,
-      &iface->idata->embmesh->x_rest,
-      V, m);
-  
-    iface->totverts = V->rows();
+    admmpd::EmbeddedMesh().compute_masses(iface->idata->embmesh, m);
     *T = iface->idata->embmesh->tets;
+    *V = iface->idata->embmesh->rest_x;
+    iface->totverts = V->rows();
     return 1;
   }
 
-- 
cgit v1.2.3