Simulation: Rename `physics` directory to `simulation`

Function names will be updated in a separate commit. This will be the place for the new particle system and other code related to the Simulation data block. We don't want to have all that code in blenkernel. Approved by brecht.
author: Jacques Lucke <jacques@blender.org> 2020-07-16 15:27:47 +0300
committer: Jacques Lucke <jacques@blender.org> 2020-07-16 15:28:38 +0300
commit: 9363c4de0635394548fa2eb8d205581313029775 (patch)
tree: 18f0a4be6ed78a8c8f8f5a33d575f2465ed95ee9 /source/blender/simulation
parent: 66b48ad8fb3bea4c0b643fe23fd25e70843067cd (diff)
9 files changed, 7460 insertions, 0 deletions
diff --git a/source/blender/simulation/BPH_mass_spring.h b/source/blender/simulation/BPH_mass_spring.h
new file mode 100644
index 00000000000..5a8c78812a4
--- /dev/null
+++ b/source/blender/simulation/BPH_mass_spring.h
@@ -0,0 +1,62 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) Blender Foundation
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup bph
+ */
+
+#ifndef __BPH_MASS_SPRING_H__
+#define __BPH_MASS_SPRING_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ClothModifierData;
+struct Depsgraph;
+struct Implicit_Data;
+struct ListBase;
+struct Object;
+
+typedef enum eMassSpringSolverStatus {
+  BPH_SOLVER_SUCCESS = (1 << 0),
+  BPH_SOLVER_NUMERICAL_ISSUE = (1 << 1),
+  BPH_SOLVER_NO_CONVERGENCE = (1 << 2),
+  BPH_SOLVER_INVALID_INPUT = (1 << 3),
+} eMassSpringSolverStatus;
+
+struct Implicit_Data *BPH_mass_spring_solver_create(int numverts, int numsprings);
+void BPH_mass_spring_solver_free(struct Implicit_Data *id);
+int BPH_mass_spring_solver_numvert(struct Implicit_Data *id);
+
+int BPH_cloth_solver_init(struct Object *ob, struct ClothModifierData *clmd);
+void BPH_cloth_solver_free(struct ClothModifierData *clmd);
+int BPH_cloth_solve(struct Depsgraph *depsgraph,
+                    struct Object *ob,
+                    float frame,
+                    struct ClothModifierData *clmd,
+                    struct ListBase *effectors);
+void BKE_cloth_solver_set_positions(struct ClothModifierData *clmd);
+void BKE_cloth_solver_set_volume(ClothModifierData *clmd);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/source/blender/simulation/CMakeLists.txt b/source/blender/simulation/CMakeLists.txt
new file mode 100644
index 00000000000..10520a18513
--- /dev/null
+++ b/source/blender/simulation/CMakeLists.txt
@@ -0,0 +1,57 @@
+# ***** BEGIN GPL LICENSE BLOCK *****
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+#
+# The Original Code is Copyright (C) 2014, Blender Foundation
+# All rights reserved.
+# ***** END GPL LICENSE BLOCK *****
+
+set(INC
+  .
+  intern
+  ../blenkernel
+  ../blenlib
+  ../depsgraph
+  ../imbuf
+  ../makesdna
+  ../../../intern/guardedalloc
+)
+
+set(INC_SYS
+  ${EIGEN3_INCLUDE_DIRS}
+)
+
+set(SRC
+  intern/BPH_mass_spring.cpp
+  intern/ConstrainedConjugateGradient.h
+  intern/eigen_utils.h
+  intern/hair_volume.cpp
+  intern/implicit.h
+  intern/implicit_blender.c
+  intern/implicit_eigen.cpp
+
+  BPH_mass_spring.h
+)
+
+set(LIB
+)
+
+if(WITH_OPENMP_STATIC)
+  list(APPEND LIB
+    ${OpenMP_LIBRARIES}
+  )
+endif()
+
+blender_add_lib(bf_physics "${SRC}" "${INC}" "${INC_SYS}" "${LIB}")
diff --git a/source/blender/simulation/intern/BPH_mass_spring.cpp b/source/blender/simulation/intern/BPH_mass_spring.cpp
new file mode 100644
index 00000000000..051f11aa1d9
--- /dev/null
+++ b/source/blender/simulation/intern/BPH_mass_spring.cpp
@@ -0,0 +1,1355 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) Blender Foundation
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup bph
+ */
+
+#include "MEM_guardedalloc.h"
+
+#include "DNA_cloth_types.h"
+#include "DNA_meshdata_types.h"
+#include "DNA_modifier_types.h"
+#include "DNA_object_force_types.h"
+#include "DNA_object_types.h"
+#include "DNA_scene_types.h"
+
+#include "BLI_linklist.h"
+#include "BLI_math.h"
+#include "BLI_utildefines.h"
+
+#include "BKE_cloth.h"
+#include "BKE_collision.h"
+#include "BKE_effect.h"
+
+#include "BPH_mass_spring.h"
+#include "implicit.h"
+
+#include "DEG_depsgraph.h"
+#include "DEG_depsgraph_query.h"
+
+static float I3[3][3] = {{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}};
+
+/* Number of off-diagonal non-zero matrix blocks.
+ * Basically there is one of these for each vertex-vertex interaction.
+ */
+static int cloth_count_nondiag_blocks(Cloth *cloth)
+{
+  LinkNode *link;
+  int nondiag = 0;
+
+  for (link = cloth->springs; link; link = link->next) {
+    ClothSpring *spring = (ClothSpring *)link->link;
+    switch (spring->type) {
+      case CLOTH_SPRING_TYPE_BENDING_HAIR:
+        /* angular bending combines 3 vertices */
+        nondiag += 3;
+        break;
+
+      default:
+        /* all other springs depend on 2 vertices only */
+        nondiag += 1;
+        break;
+    }
+  }
+
+  return nondiag;
+}
+
+static bool cloth_get_pressure_weights(ClothModifierData *clmd,
+                                       const MVertTri *vt,
+                                       float *r_weights)
+{
+  /* We have custom vertex weights for pressure. */
+  if (clmd->sim_parms->vgroup_pressure > 0) {
+    Cloth *cloth = clmd->clothObject;
+    ClothVertex *verts = cloth->verts;
+
+    for (unsigned int j = 0; j < 3; j++) {
+      r_weights[j] = verts[vt->tri[j]].pressure_factor;
+
+      /* Skip the entire triangle if it has a zero weight. */
+      if (r_weights[j] == 0.0f) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+static void cloth_calc_pressure_gradient(ClothModifierData *clmd,
+                                         const float gradient_vector[3],
+                                         float *r_vertex_pressure)
+{
+  Cloth *cloth = clmd->clothObject;
+  Implicit_Data *data = cloth->implicit;
+  unsigned int mvert_num = cloth->mvert_num;
+  float pt[3];
+
+  for (unsigned int i = 0; i < mvert_num; i++) {
+    BPH_mass_spring_get_position(data, i, pt);
+    r_vertex_pressure[i] = dot_v3v3(pt, gradient_vector);
+  }
+}
+
+static float cloth_calc_volume(ClothModifierData *clmd)
+{
+  /* Calculate the (closed) cloth volume. */
+  Cloth *cloth = clmd->clothObject;
+  const MVertTri *tri = cloth->tri;
+  Implicit_Data *data = cloth->implicit;
+  float weights[3] = {1.0f, 1.0f, 1.0f};
+  float vol = 0;
+
+  /* Early exit for hair, as it never has volume. */
+  if (clmd->hairdata) {
+    return 0.0f;
+  }
+
+  for (unsigned int i = 0; i < cloth->primitive_num; i++) {
+    const MVertTri *vt = &tri[i];
+
+    if (cloth_get_pressure_weights(clmd, vt, weights)) {
+      vol += BPH_tri_tetra_volume_signed_6x(data, vt->tri[0], vt->tri[1], vt->tri[2]);
+    }
+  }
+
+  /* We need to divide by 6 to get the actual volume. */
+  vol = vol / 6.0f;
+
+  return vol;
+}
+
+static float cloth_calc_rest_volume(ClothModifierData *clmd)
+{
+  /* Calculate the (closed) cloth volume. */
+  Cloth *cloth = clmd->clothObject;
+  const MVertTri *tri = cloth->tri;
+  const ClothVertex *v = cloth->verts;
+  float weights[3] = {1.0f, 1.0f, 1.0f};
+  float vol = 0;
+
+  /* Early exit for hair, as it never has volume. */
+  if (clmd->hairdata) {
+    return 0.0f;
+  }
+
+  for (unsigned int i = 0; i < cloth->primitive_num; i++) {
+    const MVertTri *vt = &tri[i];
+
+    if (cloth_get_pressure_weights(clmd, vt, weights)) {
+      vol += volume_tri_tetrahedron_signed_v3_6x(
+          v[vt->tri[0]].xrest, v[vt->tri[1]].xrest, v[vt->tri[2]].xrest);
+    }
+  }
+
+  /* We need to divide by 6 to get the actual volume. */
+  vol = vol / 6.0f;
+
+  return vol;
+}
+
+static float cloth_calc_average_pressure(ClothModifierData *clmd, const float *vertex_pressure)
+{
+  Cloth *cloth = clmd->clothObject;
+  const MVertTri *tri = cloth->tri;
+  Implicit_Data *data = cloth->implicit;
+  float weights[3] = {1.0f, 1.0f, 1.0f};
+  float total_force = 0;
+  float total_area = 0;
+
+  for (unsigned int i = 0; i < cloth->primitive_num; i++) {
+    const MVertTri *vt = &tri[i];
+
+    if (cloth_get_pressure_weights(clmd, vt, weights)) {
+      float area = BPH_tri_area(data, vt->tri[0], vt->tri[1], vt->tri[2]);
+
+      total_force += (vertex_pressure[vt->tri[0]] + vertex_pressure[vt->tri[1]] +
+                      vertex_pressure[vt->tri[2]]) *
+                     area / 3.0f;
+      total_area += area;
+    }
+  }
+
+  return total_force / total_area;
+}
+
+int BPH_cloth_solver_init(Object *UNUSED(ob), ClothModifierData *clmd)
+{
+  Cloth *cloth = clmd->clothObject;
+  ClothVertex *verts = cloth->verts;
+  const float ZERO[3] = {0.0f, 0.0f, 0.0f};
+  Implicit_Data *id;
+  unsigned int i, nondiag;
+
+  nondiag = cloth_count_nondiag_blocks(cloth);
+  cloth->implicit = id = BPH_mass_spring_solver_create(cloth->mvert_num, nondiag);
+
+  for (i = 0; i < cloth->mvert_num; i++) {
+    BPH_mass_spring_set_vertex_mass(id, i, verts[i].mass);
+  }
+
+  for (i = 0; i < cloth->mvert_num; i++) {
+    BPH_mass_spring_set_motion_state(id, i, verts[i].x, ZERO);
+  }
+
+  return 1;
+}
+
+void BPH_cloth_solver_free(ClothModifierData *clmd)
+{
+  Cloth *cloth = clmd->clothObject;
+
+  if (cloth->implicit) {
+    BPH_mass_spring_solver_free(cloth->implicit);
+    cloth->implicit = NULL;
+  }
+}
+
+void BKE_cloth_solver_set_positions(ClothModifierData *clmd)
+{
+  Cloth *cloth = clmd->clothObject;
+  ClothVertex *verts = cloth->verts;
+  unsigned int mvert_num = cloth->mvert_num, i;
+  ClothHairData *cloth_hairdata = clmd->hairdata;
+  Implicit_Data *id = cloth->implicit;
+
+  for (i = 0; i < mvert_num; i++) {
+    if (cloth_hairdata) {
+      ClothHairData *root = &cloth_hairdata[i];
+      BPH_mass_spring_set_rest_transform(id, i, root->rot);
+    }
+    else {
+      BPH_mass_spring_set_rest_transform(id, i, I3);
+    }
+
+    BPH_mass_spring_set_motion_state(id, i, verts[i].x, verts[i].v);
+  }
+}
+
+void BKE_cloth_solver_set_volume(ClothModifierData *clmd)
+{
+  Cloth *cloth = clmd->clothObject;
+
+  cloth->initial_mesh_volume = cloth_calc_rest_volume(clmd);
+}
+
+/* Init constraint matrix
+ * This is part of the modified CG method suggested by Baraff/Witkin in
+ * "Large Steps in Cloth Simulation" (Siggraph 1998)
+ */
+static void cloth_setup_constraints(ClothModifierData *clmd)
+{
+  Cloth *cloth = clmd->clothObject;
+  Implicit_Data *data = cloth->implicit;
+  ClothVertex *verts = cloth->verts;
+  int mvert_num = cloth->mvert_num;
+  int v;
+
+  const float ZERO[3] = {0.0f, 0.0f, 0.0f};
+
+  BPH_mass_spring_clear_constraints(data);
+
+  for (v = 0; v < mvert_num; v++) {
+    if (verts[v].flags & CLOTH_VERT_FLAG_PINNED) {
+      /* pinned vertex constraints */
+      BPH_mass_spring_add_constraint_ndof0(data, v, ZERO); /* velocity is defined externally */
+    }
+
+    verts[v].impulse_count = 0;
+  }
+}
+
+/* computes where the cloth would be if it were subject to perfectly stiff edges
+ * (edge distance constraints) in a lagrangian solver.  then add forces to help
+ * guide the implicit solver to that state.  this function is called after
+ * collisions*/
+static int UNUSED_FUNCTION(cloth_calc_helper_forces)(Object *UNUSED(ob),
+                                                     ClothModifierData *clmd,
+                                                     float (*initial_cos)[3],
+                                                     float UNUSED(step),
+                                                     float dt)
+{
+  Cloth *cloth = clmd->clothObject;
+  float(*cos)[3] = (float(*)[3])MEM_callocN(sizeof(float[3]) * cloth->mvert_num,
+                                            "cos cloth_calc_helper_forces");
+  float *masses = (float *)MEM_callocN(sizeof(float) * cloth->mvert_num,
+                                       "cos cloth_calc_helper_forces");
+  LinkNode *node;
+  ClothSpring *spring;
+  ClothVertex *cv;
+  int i, steps;
+
+  cv = cloth->verts;
+  for (i = 0; i < cloth->mvert_num; i++, cv++) {
+    copy_v3_v3(cos[i], cv->tx);
+
+    if (cv->goal == 1.0f || len_squared_v3v3(initial_cos[i], cv->tx) != 0.0f) {
+      masses[i] = 1e+10;
+    }
+    else {
+      masses[i] = cv->mass;
+    }
+  }
+
+  steps = 55;
+  for (i = 0; i < steps; i++) {
+    for (node = cloth->springs; node; node = node->next) {
+      /* ClothVertex *cv1, *cv2; */ /* UNUSED */
+      int v1, v2;
+      float len, c, l, vec[3];
+
+      spring = (ClothSpring *)node->link;
+      if (spring->type != CLOTH_SPRING_TYPE_STRUCTURAL &&
+          spring->type != CLOTH_SPRING_TYPE_SHEAR) {
+        continue;
+      }
+
+      v1 = spring->ij;
+      v2 = spring->kl;
+      /* cv1 = cloth->verts + v1; */ /* UNUSED */
+      /* cv2 = cloth->verts + v2; */ /* UNUSED */
+      len = len_v3v3(cos[v1], cos[v2]);
+
+      sub_v3_v3v3(vec, cos[v1], cos[v2]);
+      normalize_v3(vec);
+
+      c = (len - spring->restlen);
+      if (c == 0.0f) {
+        continue;
+      }
+
+      l = c / ((1.0f / masses[v1]) + (1.0f / masses[v2]));
+
+      mul_v3_fl(vec, -(1.0f / masses[v1]) * l);
+      add_v3_v3(cos[v1], vec);
+
+      sub_v3_v3v3(vec, cos[v2], cos[v1]);
+      normalize_v3(vec);
+
+      mul_v3_fl(vec, -(1.0f / masses[v2]) * l);
+      add_v3_v3(cos[v2], vec);
+    }
+  }
+
+  cv = cloth->verts;
+  for (i = 0; i < cloth->mvert_num; i++, cv++) {
+    float vec[3];
+
+    /*compute forces*/
+    sub_v3_v3v3(vec, cos[i], cv->tx);
+    mul_v3_fl(vec, cv->mass * dt * 20.0f);
+    add_v3_v3(cv->tv, vec);
+    // copy_v3_v3(cv->tx, cos[i]);
+  }
+
+  MEM_freeN(cos);
+  MEM_freeN(masses);
+
+  return 1;
+}
+
+BLI_INLINE void cloth_calc_spring_force(ClothModifierData *clmd, ClothSpring *s)
+{
+  Cloth *cloth = clmd->clothObject;
+  ClothSimSettings *parms = clmd->sim_parms;
+  Implicit_Data *data = cloth->implicit;
+  bool using_angular = parms->bending_model == CLOTH_BENDING_ANGULAR;
+  bool resist_compress = (parms->flags & CLOTH_SIMSETTINGS_FLAG_RESIST_SPRING_COMPRESS) &&
+                         !using_angular;
+
+  s->flags &= ~CLOTH_SPRING_FLAG_NEEDED;
+
+  /* Calculate force of bending springs. */
+  if ((s->type & CLOTH_SPRING_TYPE_BENDING) && using_angular) {
+#ifdef CLOTH_FORCE_SPRING_BEND
+    float k, scaling;
+
+    s->flags |= CLOTH_SPRING_FLAG_NEEDED;
+
+    scaling = parms->bending + s->ang_stiffness * fabsf(parms->max_bend - parms->bending);
+    k = scaling * s->restlen *
+        0.1f; /* Multiplying by 0.1, just to scale the forces to more reasonable values. */
+
+    BPH_mass_spring_force_spring_angular(
+        data, s->ij, s->kl, s->pa, s->pb, s->la, s->lb, s->restang, k, parms->bending_damping);
+#endif
+  }
+
+  /* Calculate force of structural + shear springs. */
+  if (s->type &
+      (CLOTH_SPRING_TYPE_STRUCTURAL | CLOTH_SPRING_TYPE_SEWING | CLOTH_SPRING_TYPE_INTERNAL)) {
+#ifdef CLOTH_FORCE_SPRING_STRUCTURAL
+    float k_tension, scaling_tension;
+
+    s->flags |= CLOTH_SPRING_FLAG_NEEDED;
+
+    scaling_tension = parms->tension +
+                      s->lin_stiffness * fabsf(parms->max_tension - parms->tension);
+    k_tension = scaling_tension / (parms->avg_spring_len + FLT_EPSILON);
+
+    if (s->type & CLOTH_SPRING_TYPE_SEWING) {
+      /* TODO: verify, half verified (couldn't see error)
+       * sewing springs usually have a large distance at first so clamp the force so we don't get
+       * tunneling through collision objects. */
+      BPH_mass_spring_force_spring_linear(data,
+                                          s->ij,
+                                          s->kl,
+                                          s->restlen,
+                                          k_tension,
+                                          parms->tension_damp,
+                                          0.0f,
+                                          0.0f,
+                                          false,
+                                          false,
+                                          parms->max_sewing);
+    }
+    else if (s->type & CLOTH_SPRING_TYPE_STRUCTURAL) {
+      float k_compression, scaling_compression;
+      scaling_compression = parms->compression +
+                            s->lin_stiffness * fabsf(parms->max_compression - parms->compression);
+      k_compression = scaling_compression / (parms->avg_spring_len + FLT_EPSILON);
+
+      BPH_mass_spring_force_spring_linear(data,
+                                          s->ij,
+                                          s->kl,
+                                          s->restlen,
+                                          k_tension,
+                                          parms->tension_damp,
+                                          k_compression,
+                                          parms->compression_damp,
+                                          resist_compress,
+                                          using_angular,
+                                          0.0f);
+    }
+    else {
+      /* CLOTH_SPRING_TYPE_INTERNAL */
+      BLI_assert(s->type & CLOTH_SPRING_TYPE_INTERNAL);
+
+      scaling_tension = parms->internal_tension +
+                        s->lin_stiffness *
+                            fabsf(parms->max_internal_tension - parms->internal_tension);
+      k_tension = scaling_tension / (parms->avg_spring_len + FLT_EPSILON);
+      float scaling_compression = parms->internal_compression +
+                                  s->lin_stiffness * fabsf(parms->max_internal_compression -
+                                                           parms->internal_compression);
+      float k_compression = scaling_compression / (parms->avg_spring_len + FLT_EPSILON);
+
+      float k_tension_damp = parms->tension_damp;
+      float k_compression_damp = parms->compression_damp;
+
+      if (k_tension == 0.0f) {
+        /* No damping so it behaves as if no tension spring was there at all. */
+        k_tension_damp = 0.0f;
+      }
+
+      if (k_compression == 0.0f) {
+        /* No damping so it behaves as if no compression spring was there at all. */
+        k_compression_damp = 0.0f;
+      }
+
+      BPH_mass_spring_force_spring_linear(data,
+                                          s->ij,
+                                          s->kl,
+                                          s->restlen,
+                                          k_tension,
+                                          k_tension_damp,
+                                          k_compression,
+                                          k_compression_damp,
+                                          resist_compress,
+                                          using_angular,
+                                          0.0f);
+    }
+#endif
+  }
+  else if (s->type & CLOTH_SPRING_TYPE_SHEAR) {
+#ifdef CLOTH_FORCE_SPRING_SHEAR
+    float k, scaling;
+
+    s->flags |= CLOTH_SPRING_FLAG_NEEDED;
+
+    scaling = parms->shear + s->lin_stiffness * fabsf(parms->max_shear - parms->shear);
+    k = scaling / (parms->avg_spring_len + FLT_EPSILON);
+
+    BPH_mass_spring_force_spring_linear(data,
+                                        s->ij,
+                                        s->kl,
+                                        s->restlen,
+                                        k,
+                                        parms->shear_damp,
+                                        0.0f,
+                                        0.0f,
+                                        resist_compress,
+                                        false,
+                                        0.0f);
+#endif
+  }
+  else if (s->type & CLOTH_SPRING_TYPE_BENDING) { /* calculate force of bending springs */
+#ifdef CLOTH_FORCE_SPRING_BEND
+    float kb, cb, scaling;
+
+    s->flags |= CLOTH_SPRING_FLAG_NEEDED;
+
+    scaling = parms->bending + s->lin_stiffness * fabsf(parms->max_bend - parms->bending);
+    kb = scaling / (20.0f * (parms->avg_spring_len + FLT_EPSILON));
+
+    // Fix for [#45084] for cloth stiffness must have cb proportional to kb
+    cb = kb * parms->bending_damping;
+
+    BPH_mass_spring_force_spring_bending(data, s->ij, s->kl, s->restlen, kb, cb);
+#endif
+  }
+  else if (s->type & CLOTH_SPRING_TYPE_BENDING_HAIR) {
+#ifdef CLOTH_FORCE_SPRING_BEND
+    float kb, cb, scaling;
+
+    s->flags |= CLOTH_SPRING_FLAG_NEEDED;
+
+    /* XXX WARNING: angular bending springs for hair apply stiffness factor as an overall factor,
+     * unlike cloth springs! this is crap, but needed due to cloth/hair mixing ... max_bend factor
+     * is not even used for hair, so ...
+     */
+    scaling = s->lin_stiffness * parms->bending;
+    kb = scaling / (20.0f * (parms->avg_spring_len + FLT_EPSILON));
+
+    // Fix for [#45084] for cloth stiffness must have cb proportional to kb
+    cb = kb * parms->bending_damping;
+
+    /* XXX assuming same restlen for ij and jk segments here,
+     * this can be done correctly for hair later. */
+    BPH_mass_spring_force_spring_bending_hair(data, s->ij, s->kl, s->mn, s->target, kb, cb);
+
+#  if 0
+    {
+      float x_kl[3], x_mn[3], v[3], d[3];
+
+      BPH_mass_spring_get_motion_state(data, s->kl, x_kl, v);
+      BPH_mass_spring_get_motion_state(data, s->mn, x_mn, v);
+
+      BKE_sim_debug_data_add_dot(clmd->debug_data, x_kl, 0.9, 0.9, 0.9, "target", 7980, s->kl);
+      BKE_sim_debug_data_add_line(
+          clmd->debug_data, x_kl, x_mn, 0.8, 0.8, 0.8, "target", 7981, s->kl);
+
+      copy_v3_v3(d, s->target);
+      BKE_sim_debug_data_add_vector(
+          clmd->debug_data, x_kl, d, 0.8, 0.8, 0.2, "target", 7982, s->kl);
+
+      // copy_v3_v3(d, s->target_ij);
+      // BKE_sim_debug_data_add_vector(clmd->debug_data, x, d, 1, 0.4, 0.4, "target", 7983, s->kl);
+    }
+#  endif
+#endif
+  }
+}
+
+static void hair_get_boundbox(ClothModifierData *clmd, float gmin[3], float gmax[3])
+{
+  Cloth *cloth = clmd->clothObject;
+  Implicit_Data *data = cloth->implicit;
+  unsigned int mvert_num = cloth->mvert_num;
+  int i;
+
+  INIT_MINMAX(gmin, gmax);
+  for (i = 0; i < mvert_num; i++) {
+    float x[3];
+    BPH_mass_spring_get_motion_state(data, i, x, NULL);
+    DO_MINMAX(x, gmin, gmax);
+  }
+}
+
+static void cloth_calc_force(
+    Scene *scene, ClothModifierData *clmd, float UNUSED(frame), ListBase *effectors, float time)
+{
+  /* Collect forces and derivatives:  F, dFdX, dFdV */
+  Cloth *cloth = clmd->clothObject;
+  ClothSimSettings *parms = clmd->sim_parms;
+  Implicit_Data *data = cloth->implicit;
+  unsigned int i = 0;
+  float drag = clmd->sim_parms->Cvi * 0.01f; /* viscosity of air scaled in percent */
+  float gravity[3] = {0.0f, 0.0f, 0.0f};
+  const MVertTri *tri = cloth->tri;
+  unsigned int mvert_num = cloth->mvert_num;
+  ClothVertex *vert;
+
+#ifdef CLOTH_FORCE_GRAVITY
+  /* global acceleration (gravitation) */
+  if (scene->physics_settings.flag & PHYS_GLOBAL_GRAVITY) {
+    /* scale gravity force */
+    mul_v3_v3fl(gravity,
+                scene->physics_settings.gravity,
+                0.001f * clmd->sim_parms->effector_weights->global_gravity);
+  }
+
+  vert = cloth->verts;
+  for (i = 0; i < cloth->mvert_num; i++, vert++) {
+    BPH_mass_spring_force_gravity(data, i, vert->mass, gravity);
+
+    /* Vertex goal springs */
+    if ((!(vert->flags & CLOTH_VERT_FLAG_PINNED)) && (vert->goal > FLT_EPSILON)) {
+      float goal_x[3], goal_v[3];
+      float k;
+
+      /* divide by time_scale to prevent goal vertices' delta locations from being multiplied */
+      interp_v3_v3v3(goal_x, vert->xold, vert->xconst, time / clmd->sim_parms->time_scale);
+      sub_v3_v3v3(goal_v, vert->xconst, vert->xold); /* distance covered over dt==1 */
+
+      k = vert->goal * clmd->sim_parms->goalspring /
+          (clmd->sim_parms->avg_spring_len + FLT_EPSILON);
+
+      BPH_mass_spring_force_spring_goal(
+          data, i, goal_x, goal_v, k, clmd->sim_parms->goalfrict * 0.01f);
+    }
+  }
+#endif
+
+  /* cloth_calc_volume_force(clmd); */
+
+#ifdef CLOTH_FORCE_DRAG
+  BPH_mass_spring_force_drag(data, drag);
+#endif
+  /* handle pressure forces (making sure that this never gets computed for hair). */
+  if ((parms->flags & CLOTH_SIMSETTINGS_FLAG_PRESSURE) && (clmd->hairdata == NULL)) {
+    /* The difference in pressure between the inside and outside of the mesh.*/
+    float pressure_difference = 0.0f;
+    float volume_factor = 1.0f;
+
+    float init_vol;
+    if (parms->flags & CLOTH_SIMSETTINGS_FLAG_PRESSURE_VOL) {
+      init_vol = clmd->sim_parms->target_volume;
+    }
+    else {
+      init_vol = cloth->initial_mesh_volume;
+    }
+
+    /* Check if we need to calculate the volume of the mesh. */
+    if (init_vol > 1E-6f) {
+      float f;
+      float vol = cloth_calc_volume(clmd);
+
+      /* If the volume is the same don't apply any pressure. */
+      volume_factor = init_vol / vol;
+      pressure_difference = volume_factor - 1;
+
+      /* Calculate an artificial maximum value for cloth pressure. */
+      f = fabs(clmd->sim_parms->uniform_pressure_force) + 200.0f;
+
+      /* Clamp the cloth pressure to the calculated maximum value. */
+      CLAMP_MAX(pressure_difference, f);
+    }
+
+    pressure_difference += clmd->sim_parms->uniform_pressure_force;
+    pressure_difference *= clmd->sim_parms->pressure_factor;
+
+    /* Compute the hydrostatic pressure gradient if enabled. */
+    float fluid_density = clmd->sim_parms->fluid_density * 1000; /* kg/l -> kg/m3 */
+    float *hydrostatic_pressure = NULL;
+
+    if (fabs(fluid_density) > 1e-6f) {
+      float hydrostatic_vector[3];
+      copy_v3_v3(hydrostatic_vector, gravity);
+
+      /* When the fluid is inside the object, factor in the acceleration of
+       * the object into the pressure field, as gravity is indistinguishable
+       * from acceleration from the inside. */
+      if (fluid_density > 0) {
+        sub_v3_v3(hydrostatic_vector, cloth->average_acceleration);
+
+        /* Preserve the total mass by scaling density to match the change in volume. */
+        fluid_density *= volume_factor;
+      }
+
+      mul_v3_fl(hydrostatic_vector, fluid_density);
+
+      /* Compute an array of per-vertex hydrostatic pressure, and subtract the average. */
+      hydrostatic_pressure = (float *)MEM_mallocN(sizeof(float) * mvert_num,
+                                                  "hydrostatic pressure gradient");
+
+      cloth_calc_pressure_gradient(clmd, hydrostatic_vector, hydrostatic_pressure);
+
+      pressure_difference -= cloth_calc_average_pressure(clmd, hydrostatic_pressure);
+    }
+
+    /* Apply pressure. */
+    if (hydrostatic_pressure || fabs(pressure_difference) > 1E-6f) {
+      float weights[3] = {1.0f, 1.0f, 1.0f};
+
+      for (i = 0; i < cloth->primitive_num; i++) {
+        const MVertTri *vt = &tri[i];
+
+        if (cloth_get_pressure_weights(clmd, vt, weights)) {
+          BPH_mass_spring_force_pressure(data,
+                                         vt->tri[0],
+                                         vt->tri[1],
+                                         vt->tri[2],
+                                         pressure_difference,
+                                         hydrostatic_pressure,
+                                         weights);
+        }
+      }
+    }
+
+    if (hydrostatic_pressure) {
+      MEM_freeN(hydrostatic_pressure);
+    }
+  }
+
+  /* handle external forces like wind */
+  if (effectors) {
+    bool is_not_hair = (clmd->hairdata == NULL) && (cloth->primitive_num > 0);
+    bool has_wind = false, has_force = false;
+
+    /* cache per-vertex forces to avoid redundant calculation */
+    float(*winvec)[3] = (float(*)[3])MEM_callocN(sizeof(float[3]) * mvert_num * 2,
+                                                 "effector forces");
+    float(*forcevec)[3] = is_not_hair ? winvec + mvert_num : winvec;
+
+    for (i = 0; i < cloth->mvert_num; i++) {
+      float x[3], v[3];
+      EffectedPoint epoint;
+
+      BPH_mass_spring_get_motion_state(data, i, x, v);
+      pd_point_from_loc(scene, x, v, i, &epoint);
+      BKE_effectors_apply(effectors,
+                          NULL,
+                          clmd->sim_parms->effector_weights,
+                          &epoint,
+                          forcevec[i],
+                          winvec[i],
+                          NULL);
+
+      has_wind = has_wind || !is_zero_v3(winvec[i]);
+      has_force = has_force || !is_zero_v3(forcevec[i]);
+    }
+
+    /* Hair has only edges. */
+    if (is_not_hair) {
+      for (i = 0; i < cloth->primitive_num; i++) {
+        const MVertTri *vt = &tri[i];
+        if (has_wind) {
+          BPH_mass_spring_force_face_wind(data, vt->tri[0], vt->tri[1], vt->tri[2], winvec);
+        }
+        if (has_force) {
+          BPH_mass_spring_force_face_extern(data, vt->tri[0], vt->tri[1], vt->tri[2], forcevec);
+        }
+      }
+    }
+    else {
+#if 0
+      ClothHairData *hairdata = clmd->hairdata;
+      ClothHairData *hair_ij, *hair_kl;
+
+      for (LinkNode *link = cloth->springs; link; link = link->next) {
+        ClothSpring *spring = (ClothSpring *)link->link;
+        if (spring->type == CLOTH_SPRING_TYPE_STRUCTURAL) {
+          if (hairdata) {
+            hair_ij = &hairdata[spring->ij];
+            hair_kl = &hairdata[spring->kl];
+            BPH_mass_spring_force_edge_wind(
+                data, spring->ij, spring->kl, hair_ij->radius, hair_kl->radius, winvec);
+          }
+          else {
+            BPH_mass_spring_force_edge_wind(data, spring->ij, spring->kl, 1.0f, 1.0f, winvec);
+          }
+        }
+      }
+#else
+      ClothHairData *hairdata = clmd->hairdata;
+
+      vert = cloth->verts;
+      for (i = 0; i < cloth->mvert_num; i++, vert++) {
+        if (hairdata) {
+          ClothHairData *hair = &hairdata[i];
+          BPH_mass_spring_force_vertex_wind(data, i, hair->radius, winvec);
+        }
+        else {
+          BPH_mass_spring_force_vertex_wind(data, i, 1.0f, winvec);
+        }
+      }
+#endif
+    }
+
+    MEM_freeN(winvec);
+  }
+
+  // calculate spring forces
+  for (LinkNode *link = cloth->springs; link; link = link->next) {
+    ClothSpring *spring = (ClothSpring *)link->link;
+    // only handle active springs
+    if (!(spring->flags & CLOTH_SPRING_FLAG_DEACTIVATE)) {
+      cloth_calc_spring_force(clmd, spring);
+    }
+  }
+}
+
+/* returns vertexes' motion state */
+BLI_INLINE void cloth_get_grid_location(Implicit_Data *data,
+                                        float cell_scale,
+                                        const float cell_offset[3],
+                                        int index,
+                                        float x[3],
+                                        float v[3])
+{
+  BPH_mass_spring_get_position(data, index, x);
+  BPH_mass_spring_get_new_velocity(data, index, v);
+
+  mul_v3_fl(x, cell_scale);
+  add_v3_v3(x, cell_offset);
+}
+
+/* returns next spring forming a continuous hair sequence */
+BLI_INLINE LinkNode *hair_spring_next(LinkNode *spring_link)
+{
+  ClothSpring *spring = (ClothSpring *)spring_link->link;
+  LinkNode *next = spring_link->next;
+  if (next) {
+    ClothSpring *next_spring = (ClothSpring *)next->link;
+    if (next_spring->type == CLOTH_SPRING_TYPE_STRUCTURAL && next_spring->kl == spring->ij) {
+      return next;
+    }
+  }
+  return NULL;
+}
+
+/* XXX this is nasty: cloth meshes do not explicitly store
+ * the order of hair segments!
+ * We have to rely on the spring build function for now,
+ * which adds structural springs in reverse order:
+ *   (3,4), (2,3), (1,2)
+ * This is currently the only way to figure out hair geometry inside this code ...
+ */
+static LinkNode *cloth_continuum_add_hair_segments(HairGrid *grid,
+                                                   const float cell_scale,
+                                                   const float cell_offset[3],
+                                                   Cloth *cloth,
+                                                   LinkNode *spring_link)
+{
+  Implicit_Data *data = cloth->implicit;
+  LinkNode *next_spring_link = NULL; /* return value */
+  ClothSpring *spring1, *spring2, *spring3;
+  // ClothVertex *verts = cloth->verts;
+  // ClothVertex *vert3, *vert4;
+  float x1[3], v1[3], x2[3], v2[3], x3[3], v3[3], x4[3], v4[3];
+  float dir1[3], dir2[3], dir3[3];
+
+  spring1 = NULL;
+  spring2 = NULL;
+  spring3 = (ClothSpring *)spring_link->link;
+
+  zero_v3(x1);
+  zero_v3(v1);
+  zero_v3(dir1);
+  zero_v3(x2);
+  zero_v3(v2);
+  zero_v3(dir2);
+
+  // vert3 = &verts[spring3->kl];
+  cloth_get_grid_location(data, cell_scale, cell_offset, spring3->kl, x3, v3);
+  // vert4 = &verts[spring3->ij];
+  cloth_get_grid_location(data, cell_scale, cell_offset, spring3->ij, x4, v4);
+  sub_v3_v3v3(dir3, x4, x3);
+  normalize_v3(dir3);
+
+  while (spring_link) {
+    /* move on */
+    spring1 = spring2;
+    spring2 = spring3;
+
+    // vert3 = vert4;
+
+    copy_v3_v3(x1, x2);
+    copy_v3_v3(v1, v2);
+    copy_v3_v3(x2, x3);
+    copy_v3_v3(v2, v3);
+    copy_v3_v3(x3, x4);
+    copy_v3_v3(v3, v4);
+
+    copy_v3_v3(dir1, dir2);
+    copy_v3_v3(dir2, dir3);
+
+    /* read next segment */
+    next_spring_link = spring_link->next;
+    spring_link = hair_spring_next(spring_link);
+
+    if (spring_link) {
+      spring3 = (ClothSpring *)spring_link->link;
+      // vert4 = &verts[spring3->ij];
+      cloth_get_grid_location(data, cell_scale, cell_offset, spring3->ij, x4, v4);
+      sub_v3_v3v3(dir3, x4, x3);
+      normalize_v3(dir3);
+    }
+    else {
+      spring3 = NULL;
+      // vert4 = NULL;
+      zero_v3(x4);
+      zero_v3(v4);
+      zero_v3(dir3);
+    }
+
+    BPH_hair_volume_add_segment(
+        grid, x1, v1, x2, v2, x3, v3, x4, v4, spring1 ? dir1 : NULL, dir2, spring3 ? dir3 : NULL);
+  }
+
+  return next_spring_link;
+}
+
+static void cloth_continuum_fill_grid(HairGrid *grid, Cloth *cloth)
+{
+#if 0
+  Implicit_Data *data = cloth->implicit;
+  int mvert_num = cloth->mvert_num;
+  ClothVertex *vert;
+  int i;
+
+  for (i = 0, vert = cloth->verts; i < mvert_num; i++, vert++) {
+    float x[3], v[3];
+
+    cloth_get_vertex_motion_state(data, vert, x, v);
+    BPH_hair_volume_add_vertex(grid, x, v);
+  }
+#else
+  LinkNode *link;
+  float cellsize, gmin[3], cell_scale, cell_offset[3];
+
+  /* scale and offset for transforming vertex locations into grid space
+   * (cell size is 0..1, gmin becomes origin)
+   */
+  BPH_hair_volume_grid_geometry(grid, &cellsize, NULL, gmin, NULL);
+  cell_scale = cellsize > 0.0f ? 1.0f / cellsize : 0.0f;
+  mul_v3_v3fl(cell_offset, gmin, cell_scale);
+  negate_v3(cell_offset);
+
+  link = cloth->springs;
+  while (link) {
+    ClothSpring *spring = (ClothSpring *)link->link;
+    if (spring->type == CLOTH_SPRING_TYPE_STRUCTURAL) {
+      link = cloth_continuum_add_hair_segments(grid, cell_scale, cell_offset, cloth, link);
+    }
+    else {
+      link = link->next;
+    }
+  }
+#endif
+  BPH_hair_volume_normalize_vertex_grid(grid);
+}
+
+static void cloth_continuum_step(ClothModifierData *clmd, float dt)
+{
+  ClothSimSettings *parms = clmd->sim_parms;
+  Cloth *cloth = clmd->clothObject;
+  Implicit_Data *data = cloth->implicit;
+  int mvert_num = cloth->mvert_num;
+  ClothVertex *vert;
+
+  const float fluid_factor = 0.95f; /* blend between PIC and FLIP methods */
+  float smoothfac = parms->velocity_smooth;
+  /* XXX FIXME arbitrary factor!!! this should be based on some intuitive value instead,
+   * like number of hairs per cell and time decay instead of "strength"
+   */
+  float density_target = parms->density_target;
+  float density_strength = parms->density_strength;
+  float gmin[3], gmax[3];
+  int i;
+
+  /* clear grid info */
+  zero_v3_int(clmd->hair_grid_res);
+  zero_v3(clmd->hair_grid_min);
+  zero_v3(clmd->hair_grid_max);
+  clmd->hair_grid_cellsize = 0.0f;
+
+  hair_get_boundbox(clmd, gmin, gmax);
+
+  /* gather velocities & density */
+  if (smoothfac > 0.0f || density_strength > 0.0f) {
+    HairGrid *grid = BPH_hair_volume_create_vertex_grid(
+        clmd->sim_parms->voxel_cell_size, gmin, gmax);
+
+    cloth_continuum_fill_grid(grid, cloth);
+
+    /* main hair continuum solver */
+    BPH_hair_volume_solve_divergence(grid, dt, density_target, density_strength);
+
+    for (i = 0, vert = cloth->verts; i < mvert_num; i++, vert++) {
+      float x[3], v[3], nv[3];
+
+      /* calculate volumetric velocity influence */
+      BPH_mass_spring_get_position(data, i, x);
+      BPH_mass_spring_get_new_velocity(data, i, v);
+
+      BPH_hair_volume_grid_velocity(grid, x, v, fluid_factor, nv);
+
+      interp_v3_v3v3(nv, v, nv, smoothfac);
+
+      /* apply on hair data */
+      BPH_mass_spring_set_new_velocity(data, i, nv);
+    }
+
+    /* store basic grid info in the modifier data */
+    BPH_hair_volume_grid_geometry(grid,
+                                  &clmd->hair_grid_cellsize,
+                                  clmd->hair_grid_res,
+                                  clmd->hair_grid_min,
+                                  clmd->hair_grid_max);
+
+#if 0 /* DEBUG hair velocity vector field */
+    {
+      const int size = 64;
+      int i, j;
+      float offset[3], a[3], b[3];
+      const int axis = 0;
+      const float shift = 0.0f;
+
+      copy_v3_v3(offset, clmd->hair_grid_min);
+      zero_v3(a);
+      zero_v3(b);
+
+      offset[axis] = shift * clmd->hair_grid_cellsize;
+      a[(axis + 1) % 3] = clmd->hair_grid_max[(axis + 1) % 3] -
+                          clmd->hair_grid_min[(axis + 1) % 3];
+      b[(axis + 2) % 3] = clmd->hair_grid_max[(axis + 2) % 3] -
+                          clmd->hair_grid_min[(axis + 2) % 3];
+
+      BKE_sim_debug_data_clear_category(clmd->debug_data, "grid velocity");
+      for (j = 0; j < size; j++) {
+        for (i = 0; i < size; i++) {
+          float x[3], v[3], gvel[3], gvel_smooth[3], gdensity;
+
+          madd_v3_v3v3fl(x, offset, a, (float)i / (float)(size - 1));
+          madd_v3_v3fl(x, b, (float)j / (float)(size - 1));
+          zero_v3(v);
+
+          BPH_hair_volume_grid_interpolate(grid, x, &gdensity, gvel, gvel_smooth, NULL, NULL);
+
+          // BKE_sim_debug_data_add_circle(
+          //     clmd->debug_data, x, gdensity, 0.7, 0.3, 1,
+          //     "grid density", i, j, 3111);
+          if (!is_zero_v3(gvel) || !is_zero_v3(gvel_smooth)) {
+            float dvel[3];
+            sub_v3_v3v3(dvel, gvel_smooth, gvel);
+            // BKE_sim_debug_data_add_vector(
+            //     clmd->debug_data, x, gvel, 0.4, 0, 1,
+            //     "grid velocity", i, j, 3112);
+            // BKE_sim_debug_data_add_vector(
+            //     clmd->debug_data, x, gvel_smooth, 0.6, 1, 1,
+            //     "grid velocity", i, j, 3113);
+            BKE_sim_debug_data_add_vector(
+                clmd->debug_data, x, dvel, 0.4, 1, 0.7, "grid velocity", i, j, 3114);
+#  if 0
+            if (gdensity > 0.0f) {
+              float col0[3] = {0.0, 0.0, 0.0};
+              float col1[3] = {0.0, 1.0, 0.0};
+              float col[3];
+
+              interp_v3_v3v3(col, col0, col1,
+                             CLAMPIS(gdensity * clmd->sim_parms->density_strength, 0.0, 1.0));
+              // BKE_sim_debug_data_add_circle(
+              //     clmd->debug_data, x, gdensity * clmd->sim_parms->density_strength, 0, 1, 0.4,
+              //     "grid velocity", i, j, 3115);
+              // BKE_sim_debug_data_add_dot(
+              //     clmd->debug_data, x, col[0], col[1], col[2],
+              //     "grid velocity", i, j, 3115);
+              BKE_sim_debug_data_add_circle(
+                  clmd->debug_data, x, 0.01f, col[0], col[1], col[2], "grid velocity", i, j, 3115);
+            }
+#  endif
+          }
+        }
+      }
+    }
+#endif
+
+    BPH_hair_volume_free_vertex_grid(grid);
+  }
+}
+
+#if 0
+static void cloth_calc_volume_force(ClothModifierData *clmd)
+{
+  ClothSimSettings *parms = clmd->sim_parms;
+  Cloth *cloth = clmd->clothObject;
+  Implicit_Data *data = cloth->implicit;
+  int mvert_num = cloth->mvert_num;
+  ClothVertex *vert;
+
+  /* 2.0f is an experimental value that seems to give good results */
+  float smoothfac = 2.0f * parms->velocity_smooth;
+  float collfac = 2.0f * parms->collider_friction;
+  float pressfac = parms->pressure;
+  float minpress = parms->pressure_threshold;
+  float gmin[3], gmax[3];
+  int i;
+
+  hair_get_boundbox(clmd, gmin, gmax);
+
+  /* gather velocities & density */
+  if (smoothfac > 0.0f || pressfac > 0.0f) {
+    HairVertexGrid *vertex_grid = BPH_hair_volume_create_vertex_grid(
+        clmd->sim_parms->voxel_res, gmin, gmax);
+
+    vert = cloth->verts;
+    for (i = 0; i < mvert_num; i++, vert++) {
+      float x[3], v[3];
+
+      if (vert->solver_index < 0) {
+        copy_v3_v3(x, vert->x);
+        copy_v3_v3(v, vert->v);
+      }
+      else {
+        BPH_mass_spring_get_motion_state(data, vert->solver_index, x, v);
+      }
+      BPH_hair_volume_add_vertex(vertex_grid, x, v);
+    }
+    BPH_hair_volume_normalize_vertex_grid(vertex_grid);
+
+    vert = cloth->verts;
+    for (i = 0; i < mvert_num; i++, vert++) {
+      float x[3], v[3], f[3], dfdx[3][3], dfdv[3][3];
+
+      if (vert->solver_index < 0) {
+        continue;
+      }
+
+      /* calculate volumetric forces */
+      BPH_mass_spring_get_motion_state(data, vert->solver_index, x, v);
+      BPH_hair_volume_vertex_grid_forces(
+          vertex_grid, x, v, smoothfac, pressfac, minpress, f, dfdx, dfdv);
+      /* apply on hair data */
+      BPH_mass_spring_force_extern(data, vert->solver_index, f, dfdx, dfdv);
+    }
+
+    BPH_hair_volume_free_vertex_grid(vertex_grid);
+  }
+}
+#endif
+
+static void cloth_calc_average_acceleration(ClothModifierData *clmd, float dt)
+{
+  Cloth *cloth = clmd->clothObject;
+  Implicit_Data *data = cloth->implicit;
+  int i, mvert_num = cloth->mvert_num;
+  float total[3] = {0.0f, 0.0f, 0.0f};
+
+  for (i = 0; i < mvert_num; i++) {
+    float v[3], nv[3];
+
+    BPH_mass_spring_get_velocity(data, i, v);
+    BPH_mass_spring_get_new_velocity(data, i, nv);
+
+    sub_v3_v3(nv, v);
+    add_v3_v3(total, nv);
+  }
+
+  mul_v3_fl(total, 1.0f / dt / mvert_num);
+
+  /* Smooth the data using a running average to prevent instability.
+   * This is effectively an abstraction of the wave propagation speed in fluid. */
+  interp_v3_v3v3(cloth->average_acceleration, total, cloth->average_acceleration, powf(0.25f, dt));
+}
+
+static void cloth_solve_collisions(
+    Depsgraph *depsgraph, Object *ob, ClothModifierData *clmd, float step, float dt)
+{
+  Cloth *cloth = clmd->clothObject;
+  Implicit_Data *id = cloth->implicit;
+  ClothVertex *verts = cloth->verts;
+  int mvert_num = cloth->mvert_num;
+  const float time_multiplier = 1.0f / (clmd->sim_parms->dt * clmd->sim_parms->timescale);
+  int i;
+
+  if (!(clmd->coll_parms->flags &
+        (CLOTH_COLLSETTINGS_FLAG_ENABLED | CLOTH_COLLSETTINGS_FLAG_SELF))) {
+    return;
+  }
+
+  if (!clmd->clothObject->bvhtree) {
+    return;
+  }
+
+  BPH_mass_spring_solve_positions(id, dt);
+
+  /* Update verts to current positions. */
+  for (i = 0; i < mvert_num; i++) {
+    BPH_mass_spring_get_new_position(id, i, verts[i].tx);
+
+    sub_v3_v3v3(verts[i].tv, verts[i].tx, verts[i].txold);
+    zero_v3(verts[i].dcvel);
+  }
+
+  if (cloth_bvh_collision(depsgraph,
+                          ob,
+                          clmd,
+                          step / clmd->sim_parms->timescale,
+                          dt / clmd->sim_parms->timescale)) {
+    for (i = 0; i < mvert_num; i++) {
+      if ((clmd->sim_parms->vgroup_mass > 0) && (verts[i].flags & CLOTH_VERT_FLAG_PINNED)) {
+        continue;
+      }
+
+      BPH_mass_spring_get_new_velocity(id, i, verts[i].tv);
+      madd_v3_v3fl(verts[i].tv, verts[i].dcvel, time_multiplier);
+      BPH_mass_spring_set_new_velocity(id, i, verts[i].tv);
+    }
+  }
+}
+
+static void cloth_clear_result(ClothModifierData *clmd)
+{
+  ClothSolverResult *sres = clmd->solver_result;
+
+  sres->status = 0;
+  sres->max_error = sres->min_error = sres->avg_error = 0.0f;
+  sres->max_iterations = sres->min_iterations = 0;
+  sres->avg_iterations = 0.0f;
+}
+
+static void cloth_record_result(ClothModifierData *clmd, ImplicitSolverResult *result, float dt)
+{
+  ClothSolverResult *sres = clmd->solver_result;
+
+  if (sres->status) { /* already initialized ? */
+    /* error only makes sense for successful iterations */
+    if (result->status == BPH_SOLVER_SUCCESS) {
+      sres->min_error = min_ff(sres->min_error, result->error);
+      sres->max_error = max_ff(sres->max_error, result->error);
+      sres->avg_error += result->error * dt;
+    }
+
+    sres->min_iterations = min_ii(sres->min_iterations, result->iterations);
+    sres->max_iterations = max_ii(sres->max_iterations, result->iterations);
+    sres->avg_iterations += (float)result->iterations * dt;
+  }
+  else {
+    /* error only makes sense for successful iterations */
+    if (result->status == BPH_SOLVER_SUCCESS) {
+      sres->min_error = sres->max_error = result->error;
+      sres->avg_error += result->error * dt;
+    }
+
+    sres->min_iterations = sres->max_iterations = result->iterations;
+    sres->avg_iterations += (float)result->iterations * dt;
+  }
+
+  sres->status |= result->status;
+}
+
+int BPH_cloth_solve(
+    Depsgraph *depsgraph, Object *ob, float frame, ClothModifierData *clmd, ListBase *effectors)
+{
+  /* Hair currently is a cloth sim in disguise ...
+   * Collision detection and volumetrics work differently then.
+   * Bad design, TODO
+   */
+  Scene *scene = DEG_get_evaluated_scene(depsgraph);
+  const bool is_hair = (clmd->hairdata != NULL);
+
+  unsigned int i = 0;
+  float step = 0.0f, tf = clmd->sim_parms->timescale;
+  Cloth *cloth = clmd->clothObject;
+  ClothVertex *verts = cloth->verts /*, *cv*/;
+  unsigned int mvert_num = cloth->mvert_num;
+  float dt = clmd->sim_parms->dt * clmd->sim_parms->timescale;
+  Implicit_Data *id = cloth->implicit;
+
+  /* Hydrostatic pressure gradient of the fluid inside the object is affected by acceleration. */
+  bool use_acceleration = (clmd->sim_parms->flags & CLOTH_SIMSETTINGS_FLAG_PRESSURE) &&
+                          (clmd->sim_parms->fluid_density > 0);
+
+  BKE_sim_debug_data_clear_category("collision");
+
+  if (!clmd->solver_result) {
+    clmd->solver_result = (ClothSolverResult *)MEM_callocN(sizeof(ClothSolverResult),
+                                                           "cloth solver result");
+  }
+  cloth_clear_result(clmd);
+
+  if (clmd->sim_parms->vgroup_mass > 0) { /* Do goal stuff. */
+    for (i = 0; i < mvert_num; i++) {
+      // update velocities with constrained velocities from pinned verts
+      if (verts[i].flags & CLOTH_VERT_FLAG_PINNED) {
+        float v[3];
+        sub_v3_v3v3(v, verts[i].xconst, verts[i].xold);
+        // mul_v3_fl(v, clmd->sim_parms->stepsPerFrame);
+        /* divide by time_scale to prevent constrained velocities from being multiplied */
+        mul_v3_fl(v, 1.0f / clmd->sim_parms->time_scale);
+        BPH_mass_spring_set_velocity(id, i, v);
+      }
+    }
+  }
+
+  if (!use_acceleration) {
+    zero_v3(cloth->average_acceleration);
+  }
+
+  while (step < tf) {
+    ImplicitSolverResult result;
+
+    /* setup vertex constraints for pinned vertices */
+    cloth_setup_constraints(clmd);
+
+    /* initialize forces to zero */
+    BPH_mass_spring_clear_forces(id);
+
+    // calculate forces
+    cloth_calc_force(scene, clmd, frame, effectors, step);
+
+    // calculate new velocity and position
+    BPH_mass_spring_solve_velocities(id, dt, &result);
+    cloth_record_result(clmd, &result, dt);
+
+    /* Calculate collision impulses. */
+    cloth_solve_collisions(depsgraph, ob, clmd, step, dt);
+
+    if (is_hair) {
+      cloth_continuum_step(clmd, dt);
+    }
+
+    if (use_acceleration) {
+      cloth_calc_average_acceleration(clmd, dt);
+    }
+
+    BPH_mass_spring_solve_positions(id, dt);
+    BPH_mass_spring_apply_result(id);
+
+    /* move pinned verts to correct position */
+    for (i = 0; i < mvert_num; i++) {
+      if (clmd->sim_parms->vgroup_mass > 0) {
+        if (verts[i].flags & CLOTH_VERT_FLAG_PINNED) {
+          float x[3];
+          /* divide by time_scale to prevent pinned vertices'
+           * delta locations from being multiplied */
+          interp_v3_v3v3(
+              x, verts[i].xold, verts[i].xconst, (step + dt) / clmd->sim_parms->time_scale);
+          BPH_mass_spring_set_position(id, i, x);
+        }
+      }
+
+      BPH_mass_spring_get_motion_state(id, i, verts[i].txold, NULL);
+    }
+
+    step += dt;
+  }
+
+  /* copy results back to cloth data */
+  for (i = 0; i < mvert_num; i++) {
+    BPH_mass_spring_get_motion_state(id, i, verts[i].x, verts[i].v);
+    copy_v3_v3(verts[i].txold, verts[i].x);
+  }
+
+  return 1;
+}
diff --git a/source/blender/simulation/intern/ConstrainedConjugateGradient.h b/source/blender/simulation/intern/ConstrainedConjugateGradient.h
new file mode 100644
index 00000000000..c924490f97d
--- /dev/null
+++ b/source/blender/simulation/intern/ConstrainedConjugateGradient.h
@@ -0,0 +1,335 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) Blender Foundation
+ * All rights reserved.
+ */
+
+#ifndef __CONSTRAINEDCONJUGATEGRADIENT_H__
+#define __CONSTRAINEDCONJUGATEGRADIENT_H__
+
+#include <Eigen/Core>
+
+namespace Eigen {
+
+namespace internal {
+
+/** \internal Low-level conjugate gradient algorithm
+ * \param mat: The matrix A
+ * \param rhs: The right hand side vector b
+ * \param x: On input and initial solution, on output the computed solution.
+ * \param precond: A preconditioner being able to efficiently solve for an
+ * approximation of Ax=b (regardless of b)
+ * \param iters: On input the max number of iteration,
+ * on output the number of performed iterations.
+ * \param tol_error: On input the tolerance error,
+ * on output an estimation of the relative error.
+ */
+template<typename MatrixType,
+         typename Rhs,
+         typename Dest,
+         typename FilterMatrixType,
+         typename Preconditioner>
+EIGEN_DONT_INLINE void constrained_conjugate_gradient(const MatrixType &mat,
+                                                      const Rhs &rhs,
+                                                      Dest &x,
+                                                      const FilterMatrixType &filter,
+                                                      const Preconditioner &precond,
+                                                      int &iters,
+                                                      typename Dest::RealScalar &tol_error)
+{
+  using std::abs;
+  using std::sqrt;
+  typedef typename Dest::RealScalar RealScalar;
+  typedef typename Dest::Scalar Scalar;
+  typedef Matrix<Scalar, Dynamic, 1> VectorType;
+
+  RealScalar tol = tol_error;
+  int maxIters = iters;
+
+  int n = mat.cols();
+
+  VectorType residual = filter * (rhs - mat * x);  // initial residual
+
+  RealScalar rhsNorm2 = (filter * rhs).squaredNorm();
+  if (rhsNorm2 == 0) {
+    /* XXX TODO set constrained result here */
+    x.setZero();
+    iters = 0;
+    tol_error = 0;
+    return;
+  }
+  RealScalar threshold = tol * tol * rhsNorm2;
+  RealScalar residualNorm2 = residual.squaredNorm();
+  if (residualNorm2 < threshold) {
+    iters = 0;
+    tol_error = sqrt(residualNorm2 / rhsNorm2);
+    return;
+  }
+
+  VectorType p(n);
+  p = filter * precond.solve(residual);  // initial search direction
+
+  VectorType z(n), tmp(n);
+  RealScalar absNew = numext::real(
+      residual.dot(p));  // the square of the absolute value of r scaled by invM
+  int i = 0;
+  while (i < maxIters) {
+    tmp.noalias() = filter * (mat * p);  // the bottleneck of the algorithm
+
+    Scalar alpha = absNew / p.dot(tmp);  // the amount we travel on dir
+    x += alpha * p;                      // update solution
+    residual -= alpha * tmp;             // update residue
+
+    residualNorm2 = residual.squaredNorm();
+    if (residualNorm2 < threshold) {
+      break;
+    }
+
+    z = precond.solve(residual);  // approximately solve for "A z = residual"
+
+    RealScalar absOld = absNew;
+    absNew = numext::real(residual.dot(z));  // update the absolute value of r
+    RealScalar beta =
+        absNew /
+        absOld;  // calculate the Gram-Schmidt value used to create the new search direction
+    p = filter * (z + beta * p);  // update search direction
+    i++;
+  }
+  tol_error = sqrt(residualNorm2 / rhsNorm2);
+  iters = i;
+}
+
+}  // namespace internal
+
+#if 0 /* unused */
+template<typename MatrixType> struct MatrixFilter {
+  MatrixFilter() : m_cmat(NULL)
+  {
+  }
+
+  MatrixFilter(const MatrixType &cmat) : m_cmat(&cmat)
+  {
+  }
+
+  void setMatrix(const MatrixType &cmat)
+  {
+    m_cmat = &cmat;
+  }
+
+  template<typename VectorType> void apply(VectorType v) const
+  {
+    v = (*m_cmat) * v;
+  }
+
+ protected:
+  const MatrixType *m_cmat;
+};
+#endif
+
+template<typename _MatrixType,
+         int _UpLo = Lower,
+         typename _FilterMatrixType = _MatrixType,
+         typename _Preconditioner = DiagonalPreconditioner<typename _MatrixType::Scalar>>
+class ConstrainedConjugateGradient;
+
+namespace internal {
+
+template<typename _MatrixType, int _UpLo, typename _FilterMatrixType, typename _Preconditioner>
+struct traits<
+    ConstrainedConjugateGradient<_MatrixType, _UpLo, _FilterMatrixType, _Preconditioner>> {
+  typedef _MatrixType MatrixType;
+  typedef _FilterMatrixType FilterMatrixType;
+  typedef _Preconditioner Preconditioner;
+};
+
+}  // namespace internal
+
+/** \ingroup IterativeLinearSolvers_Module
+ * \brief A conjugate gradient solver for sparse self-adjoint problems with additional constraints
+ *
+ * This class allows to solve for A.x = b sparse linear problems using a conjugate gradient
+ * algorithm. The sparse matrix A must be selfadjoint. The vectors x and b can be either dense or
+ * sparse.
+ *
+ * \tparam _MatrixType the type of the sparse matrix A, can be a dense or a sparse matrix.
+ * \tparam _UpLo the triangular part that will be used for the computations. It can be Lower
+ *               or Upper. Default is Lower.
+ * \tparam _Preconditioner the type of the preconditioner. Default is DiagonalPreconditioner
+ *
+ * The maximal number of iterations and tolerance value can be controlled via the
+ * setMaxIterations() and setTolerance() methods. The defaults are the size of the problem for the
+ * maximal number of iterations and NumTraits<Scalar>::epsilon() for the tolerance.
+ *
+ * This class can be used as the direct solver classes. Here is a typical usage example:
+ * \code
+ * int n = 10000;
+ * VectorXd x(n), b(n);
+ * SparseMatrix<double> A(n,n);
+ * // fill A and b
+ * ConjugateGradient<SparseMatrix<double> > cg;
+ * cg.compute(A);
+ * x = cg.solve(b);
+ * std::cout << "#iterations:     " << cg.iterations() << std::endl;
+ * std::cout << "estimated error: " << cg.error()      << std::endl;
+ * // update b, and solve again
+ * x = cg.solve(b);
+ * \endcode
+ *
+ * By default the iterations start with x=0 as an initial guess of the solution.
+ * One can control the start using the solveWithGuess() method. Here is a step by
+ * step execution example starting with a random guess and printing the evolution
+ * of the estimated error:
+ * * \code
+ * x = VectorXd::Random(n);
+ * cg.setMaxIterations(1);
+ * int i = 0;
+ * do {
+ *   x = cg.solveWithGuess(b,x);
+ *   std::cout << i << " : " << cg.error() << std::endl;
+ *   ++i;
+ * } while (cg.info()!=Success && i<100);
+ * \endcode
+ * Note that such a step by step execution is slightly slower.
+ *
+ * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
+ */
+template<typename _MatrixType, int _UpLo, typename _FilterMatrixType, typename _Preconditioner>
+class ConstrainedConjugateGradient
+    : public IterativeSolverBase<
+          ConstrainedConjugateGradient<_MatrixType, _UpLo, _FilterMatrixType, _Preconditioner>> {
+  typedef IterativeSolverBase<ConstrainedConjugateGradient> Base;
+  using Base::m_error;
+  using Base::m_info;
+  using Base::m_isInitialized;
+  using Base::m_iterations;
+  using Base::mp_matrix;
+
+ public:
+  typedef _MatrixType MatrixType;
+  typedef typename MatrixType::Scalar Scalar;
+  typedef typename MatrixType::Index Index;
+  typedef typename MatrixType::RealScalar RealScalar;
+  typedef _FilterMatrixType FilterMatrixType;
+  typedef _Preconditioner Preconditioner;
+
+  enum { UpLo = _UpLo };
+
+ public:
+  /** Default constructor. */
+  ConstrainedConjugateGradient() : Base()
+  {
+  }
+
+  /** Initialize the solver with matrix \a A for further \c Ax=b solving.
+   *
+   * This constructor is a shortcut for the default constructor followed
+   * by a call to compute().
+   *
+   * \warning this class stores a reference to the matrix A as well as some
+   * precomputed values that depend on it. Therefore, if \a A is changed
+   * this class becomes invalid. Call compute() to update it with the new
+   * matrix A, or modify a copy of A.
+   */
+  ConstrainedConjugateGradient(const MatrixType &A) : Base(A)
+  {
+  }
+
+  ~ConstrainedConjugateGradient()
+  {
+  }
+
+  FilterMatrixType &filter()
+  {
+    return m_filter;
+  }
+  const FilterMatrixType &filter() const
+  {
+    return m_filter;
+  }
+
+  /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A
+   * \a x0 as an initial solution.
+   *
+   * \sa compute()
+   */
+  template<typename Rhs, typename Guess>
+  inline const internal::solve_retval_with_guess<ConstrainedConjugateGradient, Rhs, Guess>
+  solveWithGuess(const MatrixBase<Rhs> &b, const Guess &x0) const
+  {
+    eigen_assert(m_isInitialized && "ConjugateGradient is not initialized.");
+    eigen_assert(
+        Base::rows() == b.rows() &&
+        "ConjugateGradient::solve(): invalid number of rows of the right hand side matrix b");
+    return internal::solve_retval_with_guess<ConstrainedConjugateGradient, Rhs, Guess>(
+        *this, b.derived(), x0);
+  }
+
+  /** \internal */
+  template<typename Rhs, typename Dest> void _solveWithGuess(const Rhs &b, Dest &x) const
+  {
+    m_iterations = Base::maxIterations();
+    m_error = Base::m_tolerance;
+
+    for (int j = 0; j < b.cols(); j++) {
+      m_iterations = Base::maxIterations();
+      m_error = Base::m_tolerance;
+
+      typename Dest::ColXpr xj(x, j);
+      internal::constrained_conjugate_gradient(mp_matrix->template selfadjointView<UpLo>(),
+                                               b.col(j),
+                                               xj,
+                                               m_filter,
+                                               Base::m_preconditioner,
+                                               m_iterations,
+                                               m_error);
+    }
+
+    m_isInitialized = true;
+    m_info = m_error <= Base::m_tolerance ? Success : NoConvergence;
+  }
+
+  /** \internal */
+  template<typename Rhs, typename Dest> void _solve(const Rhs &b, Dest &x) const
+  {
+    x.setOnes();
+    _solveWithGuess(b, x);
+  }
+
+ protected:
+  FilterMatrixType m_filter;
+};
+
+namespace internal {
+
+template<typename _MatrixType, int _UpLo, typename _Filter, typename _Preconditioner, typename Rhs>
+struct solve_retval<ConstrainedConjugateGradient<_MatrixType, _UpLo, _Filter, _Preconditioner>,
+                    Rhs>
+    : solve_retval_base<ConstrainedConjugateGradient<_MatrixType, _UpLo, _Filter, _Preconditioner>,
+                        Rhs> {
+  typedef ConstrainedConjugateGradient<_MatrixType, _UpLo, _Filter, _Preconditioner> Dec;
+  EIGEN_MAKE_SOLVE_HELPERS(Dec, Rhs)
+
+  template<typename Dest> void evalTo(Dest &dst) const
+  {
+    dec()._solve(rhs(), dst);
+  }
+};
+
+}  // end namespace internal
+
+}  // end namespace Eigen
+
+#endif  // __CONSTRAINEDCONJUGATEGRADIENT_H__
diff --git a/source/blender/simulation/intern/eigen_utils.h b/source/blender/simulation/intern/eigen_utils.h
new file mode 100644
index 00000000000..c186cf567df
--- /dev/null
+++ b/source/blender/simulation/intern/eigen_utils.h
@@ -0,0 +1,236 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) Blender Foundation
+ * All rights reserved.
+ */
+
+#ifndef __EIGEN_UTILS_H__
+#define __EIGEN_UTILS_H__
+
+/** \file
+ * \ingroup bph
+ */
+
+#if defined(__GNUC__) && !defined(__clang__)
+#  pragma GCC diagnostic push
+/* XXX suppress verbose warnings in eigen */
+#  pragma GCC diagnostic ignored "-Wlogical-op"
+#endif
+
+#include <Eigen/Sparse>
+#include <Eigen/src/Core/util/DisableStupidWarnings.h>
+
+#ifdef __GNUC__
+#  pragma GCC diagnostic pop
+#endif
+
+#include "BLI_utildefines.h"
+#include "implicit.h"
+
+typedef float Scalar;
+
+/* slightly extended Eigen vector class
+ * with conversion to/from plain C float array
+ */
+class Vector3 : public Eigen::Vector3f {
+ public:
+  typedef float *ctype;
+
+  Vector3()
+  {
+  }
+
+  Vector3(const ctype &v)
+  {
+    for (int k = 0; k < 3; k++) {
+      coeffRef(k) = v[k];
+    }
+  }
+
+  Vector3 &operator=(const ctype &v)
+  {
+    for (int k = 0; k < 3; k++) {
+      coeffRef(k) = v[k];
+    }
+    return *this;
+  }
+
+  operator ctype()
+  {
+    return data();
+  }
+};
+
+/* slightly extended Eigen matrix class
+ * with conversion to/from plain C float array
+ */
+class Matrix3 : public Eigen::Matrix3f {
+ public:
+  typedef float (*ctype)[3];
+
+  Matrix3()
+  {
+  }
+
+  Matrix3(const ctype &v)
+  {
+    for (int k = 0; k < 3; k++) {
+      for (int l = 0; l < 3; l++) {
+        coeffRef(l, k) = v[k][l];
+      }
+    }
+  }
+
+  Matrix3 &operator=(const ctype &v)
+  {
+    for (int k = 0; k < 3; k++) {
+      for (int l = 0; l < 3; l++) {
+        coeffRef(l, k) = v[k][l];
+      }
+    }
+    return *this;
+  }
+
+  operator ctype()
+  {
+    return (ctype)data();
+  }
+};
+
+typedef Eigen::VectorXf lVector;
+
+/* Extension of dense Eigen vectors,
+ * providing 3-float block access for blenlib math functions
+ */
+class lVector3f : public Eigen::VectorXf {
+ public:
+  typedef Eigen::VectorXf base_t;
+
+  lVector3f()
+  {
+  }
+
+  template<typename T> lVector3f &operator=(T rhs)
+  {
+    base_t::operator=(rhs);
+    return *this;
+  }
+
+  float *v3(int vertex)
+  {
+    return &coeffRef(3 * vertex);
+  }
+
+  const float *v3(int vertex) const
+  {
+    return &coeffRef(3 * vertex);
+  }
+};
+
+typedef Eigen::Triplet<Scalar> Triplet;
+typedef std::vector<Triplet> TripletList;
+
+typedef Eigen::SparseMatrix<Scalar> lMatrix;
+
+/* Constructor type that provides more convenient handling of Eigen triplets
+ * for efficient construction of sparse 3x3 block matrices.
+ * This should be used for building lMatrix instead of writing to such lMatrix directly (which is
+ * very inefficient). After all elements have been defined using the set() method, the actual
+ * matrix can be filled using construct().
+ */
+struct lMatrix3fCtor {
+  lMatrix3fCtor()
+  {
+  }
+
+  void reset()
+  {
+    m_trips.clear();
+  }
+
+  void reserve(int numverts)
+  {
+    /* reserve for diagonal entries */
+    m_trips.reserve(numverts * 9);
+  }
+
+  void add(int i, int j, const Matrix3 &m)
+  {
+    i *= 3;
+    j *= 3;
+    for (int k = 0; k < 3; k++) {
+      for (int l = 0; l < 3; l++) {
+        m_trips.push_back(Triplet(i + k, j + l, m.coeff(l, k)));
+      }
+    }
+  }
+
+  void sub(int i, int j, const Matrix3 &m)
+  {
+    i *= 3;
+    j *= 3;
+    for (int k = 0; k < 3; k++) {
+      for (int l = 0; l < 3; l++) {
+        m_trips.push_back(Triplet(i + k, j + l, -m.coeff(l, k)));
+      }
+    }
+  }
+
+  inline void construct(lMatrix &m)
+  {
+    m.setFromTriplets(m_trips.begin(), m_trips.end());
+    m_trips.clear();
+  }
+
+ private:
+  TripletList m_trips;
+};
+
+typedef Eigen::ConjugateGradient<lMatrix, Eigen::Lower, Eigen::DiagonalPreconditioner<Scalar>>
+    ConjugateGradient;
+
+using Eigen::ComputationInfo;
+
+BLI_INLINE void print_lvector(const lVector3f &v)
+{
+  for (int i = 0; i < v.rows(); i++) {
+    if (i > 0 && i % 3 == 0) {
+      printf("\n");
+    }
+
+    printf("%f,\n", v[i]);
+  }
+}
+
+BLI_INLINE void print_lmatrix(const lMatrix &m)
+{
+  for (int j = 0; j < m.rows(); j++) {
+    if (j > 0 && j % 3 == 0) {
+      printf("\n");
+    }
+
+    for (int i = 0; i < m.cols(); i++) {
+      if (i > 0 && i % 3 == 0) {
+        printf("  ");
+      }
+
+      implicit_print_matrix_elem(m.coeff(j, i));
+    }
+    printf("\n");
+  }
+}
+
+#endif
diff --git a/source/blender/simulation/intern/hair_volume.cpp b/source/blender/simulation/intern/hair_volume.cpp
new file mode 100644
index 00000000000..1764d0a910c
--- /dev/null
+++ b/source/blender/simulation/intern/hair_volume.cpp
@@ -0,0 +1,1274 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) Blender Foundation
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup bph
+ */
+
+#include "MEM_guardedalloc.h"
+
+#include "BLI_math.h"
+#include "BLI_utildefines.h"
+
+#include "DNA_texture_types.h"
+
+#include "BKE_effect.h"
+
+#include "eigen_utils.h"
+#include "implicit.h"
+
+/* ================ Volumetric Hair Interaction ================
+ * adapted from
+ *
+ * Volumetric Methods for Simulation and Rendering of Hair
+ *     (Petrovic, Henne, Anderson, Pixar Technical Memo #06-08, Pixar Animation Studios)
+ *
+ * as well as
+ *
+ * "Detail Preserving Continuum Simulation of Straight Hair"
+ *     (McAdams, Selle 2009)
+ */
+
+/* Note about array indexing:
+ * Generally the arrays here are one-dimensional.
+ * The relation between 3D indices and the array offset is
+ *   offset = x + res_x * y + res_x * res_y * z
+ */
+
+static float I[3][3] = {{1, 0, 0}, {0, 1, 0}, {0, 0, 1}};
+
+BLI_INLINE int floor_int(float value)
+{
+  return value > 0.0f ? (int)value : ((int)value) - 1;
+}
+
+BLI_INLINE float floor_mod(float value)
+{
+  return value - floorf(value);
+}
+
+BLI_INLINE int hair_grid_size(const int res[3])
+{
+  return res[0] * res[1] * res[2];
+}
+
+typedef struct HairGridVert {
+  int samples;
+  float velocity[3];
+  float density;
+
+  float velocity_smooth[3];
+} HairGridVert;
+
+typedef struct HairGrid {
+  HairGridVert *verts;
+  int res[3];
+  float gmin[3], gmax[3];
+  float cellsize, inv_cellsize;
+} HairGrid;
+
+#define HAIR_GRID_INDEX_AXIS(vec, res, gmin, scale, axis) \
+  (min_ii(max_ii((int)((vec[axis] - gmin[axis]) * scale), 0), res[axis] - 2))
+
+BLI_INLINE int hair_grid_offset(const float vec[3],
+                                const int res[3],
+                                const float gmin[3],
+                                float scale)
+{
+  int i, j, k;
+  i = HAIR_GRID_INDEX_AXIS(vec, res, gmin, scale, 0);
+  j = HAIR_GRID_INDEX_AXIS(vec, res, gmin, scale, 1);
+  k = HAIR_GRID_INDEX_AXIS(vec, res, gmin, scale, 2);
+  return i + (j + k * res[1]) * res[0];
+}
+
+BLI_INLINE int hair_grid_interp_weights(
+    const int res[3], const float gmin[3], float scale, const float vec[3], float uvw[3])
+{
+  int i, j, k, offset;
+
+  i = HAIR_GRID_INDEX_AXIS(vec, res, gmin, scale, 0);
+  j = HAIR_GRID_INDEX_AXIS(vec, res, gmin, scale, 1);
+  k = HAIR_GRID_INDEX_AXIS(vec, res, gmin, scale, 2);
+  offset = i + (j + k * res[1]) * res[0];
+
+  uvw[0] = (vec[0] - gmin[0]) * scale - (float)i;
+  uvw[1] = (vec[1] - gmin[1]) * scale - (float)j;
+  uvw[2] = (vec[2] - gmin[2]) * scale - (float)k;
+
+  //  BLI_assert(0.0f <= uvw[0] && uvw[0] <= 1.0001f);
+  //  BLI_assert(0.0f <= uvw[1] && uvw[1] <= 1.0001f);
+  //  BLI_assert(0.0f <= uvw[2] && uvw[2] <= 1.0001f);
+
+  return offset;
+}
+
+BLI_INLINE void hair_grid_interpolate(const HairGridVert *grid,
+                                      const int res[3],
+                                      const float gmin[3],
+                                      float scale,
+                                      const float vec[3],
+                                      float *density,
+                                      float velocity[3],
+                                      float vel_smooth[3],
+                                      float density_gradient[3],
+                                      float velocity_gradient[3][3])
+{
+  HairGridVert data[8];
+  float uvw[3], muvw[3];
+  int res2 = res[1] * res[0];
+  int offset;
+
+  offset = hair_grid_interp_weights(res, gmin, scale, vec, uvw);
+  muvw[0] = 1.0f - uvw[0];
+  muvw[1] = 1.0f - uvw[1];
+  muvw[2] = 1.0f - uvw[2];
+
+  data[0] = grid[offset];
+  data[1] = grid[offset + 1];
+  data[2] = grid[offset + res[0]];
+  data[3] = grid[offset + res[0] + 1];
+  data[4] = grid[offset + res2];
+  data[5] = grid[offset + res2 + 1];
+  data[6] = grid[offset + res2 + res[0]];
+  data[7] = grid[offset + res2 + res[0] + 1];
+
+  if (density) {
+    *density = muvw[2] * (muvw[1] * (muvw[0] * data[0].density + uvw[0] * data[1].density) +
+                          uvw[1] * (muvw[0] * data[2].density + uvw[0] * data[3].density)) +
+               uvw[2] * (muvw[1] * (muvw[0] * data[4].density + uvw[0] * data[5].density) +
+                         uvw[1] * (muvw[0] * data[6].density + uvw[0] * data[7].density));
+  }
+
+  if (velocity) {
+    int k;
+    for (k = 0; k < 3; k++) {
+      velocity[k] = muvw[2] *
+                        (muvw[1] * (muvw[0] * data[0].velocity[k] + uvw[0] * data[1].velocity[k]) +
+                         uvw[1] * (muvw[0] * data[2].velocity[k] + uvw[0] * data[3].velocity[k])) +
+                    uvw[2] *
+                        (muvw[1] * (muvw[0] * data[4].velocity[k] + uvw[0] * data[5].velocity[k]) +
+                         uvw[1] * (muvw[0] * data[6].velocity[k] + uvw[0] * data[7].velocity[k]));
+    }
+  }
+
+  if (vel_smooth) {
+    int k;
+    for (k = 0; k < 3; k++) {
+      vel_smooth[k] = muvw[2] * (muvw[1] * (muvw[0] * data[0].velocity_smooth[k] +
+                                            uvw[0] * data[1].velocity_smooth[k]) +
+                                 uvw[1] * (muvw[0] * data[2].velocity_smooth[k] +
+                                           uvw[0] * data[3].velocity_smooth[k])) +
+                      uvw[2] * (muvw[1] * (muvw[0] * data[4].velocity_smooth[k] +
+                                           uvw[0] * data[5].velocity_smooth[k]) +
+                                uvw[1] * (muvw[0] * data[6].velocity_smooth[k] +
+                                          uvw[0] * data[7].velocity_smooth[k]));
+    }
+  }
+
+  if (density_gradient) {
+    density_gradient[0] = muvw[1] * muvw[2] * (data[0].density - data[1].density) +
+                          uvw[1] * muvw[2] * (data[2].density - data[3].density) +
+                          muvw[1] * uvw[2] * (data[4].density - data[5].density) +
+                          uvw[1] * uvw[2] * (data[6].density - data[7].density);
+
+    density_gradient[1] = muvw[2] * muvw[0] * (data[0].density - data[2].density) +
+                          uvw[2] * muvw[0] * (data[4].density - data[6].density) +
+                          muvw[2] * uvw[0] * (data[1].density - data[3].density) +
+                          uvw[2] * uvw[0] * (data[5].density - data[7].density);
+
+    density_gradient[2] = muvw[2] * muvw[0] * (data[0].density - data[4].density) +
+                          uvw[2] * muvw[0] * (data[1].density - data[5].density) +
+                          muvw[2] * uvw[0] * (data[2].density - data[6].density) +
+                          uvw[2] * uvw[0] * (data[3].density - data[7].density);
+  }
+
+  if (velocity_gradient) {
+    /* XXX TODO */
+    zero_m3(velocity_gradient);
+  }
+}
+
+void BPH_hair_volume_vertex_grid_forces(HairGrid *grid,
+                                        const float x[3],
+                                        const float v[3],
+                                        float smoothfac,
+                                        float pressurefac,
+                                        float minpressure,
+                                        float f[3],
+                                        float dfdx[3][3],
+                                        float dfdv[3][3])
+{
+  float gdensity, gvelocity[3], ggrad[3], gvelgrad[3][3], gradlen;
+
+  hair_grid_interpolate(grid->verts,
+                        grid->res,
+                        grid->gmin,
+                        grid->inv_cellsize,
+                        x,
+                        &gdensity,
+                        gvelocity,
+                        NULL,
+                        ggrad,
+                        gvelgrad);
+
+  zero_v3(f);
+  sub_v3_v3(gvelocity, v);
+  mul_v3_v3fl(f, gvelocity, smoothfac);
+
+  gradlen = normalize_v3(ggrad) - minpressure;
+  if (gradlen > 0.0f) {
+    mul_v3_fl(ggrad, gradlen);
+    madd_v3_v3fl(f, ggrad, pressurefac);
+  }
+
+  zero_m3(dfdx);
+
+  sub_m3_m3m3(dfdv, gvelgrad, I);
+  mul_m3_fl(dfdv, smoothfac);
+}
+
+void BPH_hair_volume_grid_interpolate(HairGrid *grid,
+                                      const float x[3],
+                                      float *density,
+                                      float velocity[3],
+                                      float velocity_smooth[3],
+                                      float density_gradient[3],
+                                      float velocity_gradient[3][3])
+{
+  hair_grid_interpolate(grid->verts,
+                        grid->res,
+                        grid->gmin,
+                        grid->inv_cellsize,
+                        x,
+                        density,
+                        velocity,
+                        velocity_smooth,
+                        density_gradient,
+                        velocity_gradient);
+}
+
+void BPH_hair_volume_grid_velocity(
+    HairGrid *grid, const float x[3], const float v[3], float fluid_factor, float r_v[3])
+{
+  float gdensity, gvelocity[3], gvel_smooth[3], ggrad[3], gvelgrad[3][3];
+  float v_pic[3], v_flip[3];
+
+  hair_grid_interpolate(grid->verts,
+                        grid->res,
+                        grid->gmin,
+                        grid->inv_cellsize,
+                        x,
+                        &gdensity,
+                        gvelocity,
+                        gvel_smooth,
+                        ggrad,
+                        gvelgrad);
+
+  /* velocity according to PIC method (Particle-in-Cell) */
+  copy_v3_v3(v_pic, gvel_smooth);
+
+  /* velocity according to FLIP method (Fluid-Implicit-Particle) */
+  sub_v3_v3v3(v_flip, gvel_smooth, gvelocity);
+  add_v3_v3(v_flip, v);
+
+  interp_v3_v3v3(r_v, v_pic, v_flip, fluid_factor);
+}
+
+void BPH_hair_volume_grid_clear(HairGrid *grid)
+{
+  const int size = hair_grid_size(grid->res);
+  int i;
+  for (i = 0; i < size; i++) {
+    zero_v3(grid->verts[i].velocity);
+    zero_v3(grid->verts[i].velocity_smooth);
+    grid->verts[i].density = 0.0f;
+    grid->verts[i].samples = 0;
+  }
+}
+
+BLI_INLINE bool hair_grid_point_valid(const float vec[3], const float gmin[3], const float gmax[3])
+{
+  return !(vec[0] < gmin[0] || vec[1] < gmin[1] || vec[2] < gmin[2] || vec[0] > gmax[0] ||
+           vec[1] > gmax[1] || vec[2] > gmax[2]);
+}
+
+BLI_INLINE float dist_tent_v3f3(const float a[3], float x, float y, float z)
+{
+  float w = (1.0f - fabsf(a[0] - x)) * (1.0f - fabsf(a[1] - y)) * (1.0f - fabsf(a[2] - z));
+  return w;
+}
+
+BLI_INLINE float weights_sum(const float weights[8])
+{
+  float totweight = 0.0f;
+  int i;
+  for (i = 0; i < 8; i++) {
+    totweight += weights[i];
+  }
+  return totweight;
+}
+
+/* returns the grid array offset as well to avoid redundant calculation */
+BLI_INLINE int hair_grid_weights(
+    const int res[3], const float gmin[3], float scale, const float vec[3], float weights[8])
+{
+  int i, j, k, offset;
+  float uvw[3];
+
+  i = HAIR_GRID_INDEX_AXIS(vec, res, gmin, scale, 0);
+  j = HAIR_GRID_INDEX_AXIS(vec, res, gmin, scale, 1);
+  k = HAIR_GRID_INDEX_AXIS(vec, res, gmin, scale, 2);
+  offset = i + (j + k * res[1]) * res[0];
+
+  uvw[0] = (vec[0] - gmin[0]) * scale;
+  uvw[1] = (vec[1] - gmin[1]) * scale;
+  uvw[2] = (vec[2] - gmin[2]) * scale;
+
+  weights[0] = dist_tent_v3f3(uvw, (float)i, (float)j, (float)k);
+  weights[1] = dist_tent_v3f3(uvw, (float)(i + 1), (float)j, (float)k);
+  weights[2] = dist_tent_v3f3(uvw, (float)i, (float)(j + 1), (float)k);
+  weights[3] = dist_tent_v3f3(uvw, (float)(i + 1), (float)(j + 1), (float)k);
+  weights[4] = dist_tent_v3f3(uvw, (float)i, (float)j, (float)(k + 1));
+  weights[5] = dist_tent_v3f3(uvw, (float)(i + 1), (float)j, (float)(k + 1));
+  weights[6] = dist_tent_v3f3(uvw, (float)i, (float)(j + 1), (float)(k + 1));
+  weights[7] = dist_tent_v3f3(uvw, (float)(i + 1), (float)(j + 1), (float)(k + 1));
+
+  //  BLI_assert(fabsf(weights_sum(weights) - 1.0f) < 0.0001f);
+
+  return offset;
+}
+
+BLI_INLINE void grid_to_world(HairGrid *grid, float vecw[3], const float vec[3])
+{
+  copy_v3_v3(vecw, vec);
+  mul_v3_fl(vecw, grid->cellsize);
+  add_v3_v3(vecw, grid->gmin);
+}
+
+void BPH_hair_volume_add_vertex(HairGrid *grid, const float x[3], const float v[3])
+{
+  const int res[3] = {grid->res[0], grid->res[1], grid->res[2]};
+  float weights[8];
+  int di, dj, dk;
+  int offset;
+
+  if (!hair_grid_point_valid(x, grid->gmin, grid->gmax)) {
+    return;
+  }
+
+  offset = hair_grid_weights(res, grid->gmin, grid->inv_cellsize, x, weights);
+
+  for (di = 0; di < 2; di++) {
+    for (dj = 0; dj < 2; dj++) {
+      for (dk = 0; dk < 2; dk++) {
+        int voffset = offset + di + (dj + dk * res[1]) * res[0];
+        int iw = di + dj * 2 + dk * 4;
+
+        grid->verts[voffset].density += weights[iw];
+        madd_v3_v3fl(grid->verts[voffset].velocity, v, weights[iw]);
+      }
+    }
+  }
+}
+
+#if 0
+BLI_INLINE void hair_volume_eval_grid_vertex(HairGridVert *vert,
+                                             const float loc[3],
+                                             float radius,
+                                             float dist_scale,
+                                             const float x2[3],
+                                             const float v2[3],
+                                             const float x3[3],
+                                             const float v3[3])
+{
+  float closest[3], lambda, dist, weight;
+
+  lambda = closest_to_line_v3(closest, loc, x2, x3);
+  dist = len_v3v3(closest, loc);
+
+  weight = (radius - dist) * dist_scale;
+
+  if (weight > 0.0f) {
+    float vel[3];
+
+    interp_v3_v3v3(vel, v2, v3, lambda);
+    madd_v3_v3fl(vert->velocity, vel, weight);
+    vert->density += weight;
+    vert->samples += 1;
+  }
+}
+
+BLI_INLINE int major_axis_v3(const float v[3])
+{
+  const float a = fabsf(v[0]);
+  const float b = fabsf(v[1]);
+  const float c = fabsf(v[2]);
+  return a > b ? (a > c ? 0 : 2) : (b > c ? 1 : 2);
+}
+
+BLI_INLINE void hair_volume_add_segment_2D(HairGrid *grid,
+                                           const float UNUSED(x1[3]),
+                                           const float UNUSED(v1[3]),
+                                           const float x2[3],
+                                           const float v2[3],
+                                           const float x3[3],
+                                           const float v3[3],
+                                           const float UNUSED(x4[3]),
+                                           const float UNUSED(v4[3]),
+                                           const float UNUSED(dir1[3]),
+                                           const float dir2[3],
+                                           const float UNUSED(dir3[3]),
+                                           int resj,
+                                           int resk,
+                                           int jmin,
+                                           int jmax,
+                                           int kmin,
+                                           int kmax,
+                                           HairGridVert *vert,
+                                           int stride_j,
+                                           int stride_k,
+                                           const float loc[3],
+                                           int axis_j,
+                                           int axis_k,
+                                           int debug_i)
+{
+  const float radius = 1.5f;
+  const float dist_scale = grid->inv_cellsize;
+
+  int j, k;
+
+  /* boundary checks to be safe */
+  CLAMP_MIN(jmin, 0);
+  CLAMP_MAX(jmax, resj - 1);
+  CLAMP_MIN(kmin, 0);
+  CLAMP_MAX(kmax, resk - 1);
+
+  HairGridVert *vert_j = vert + jmin * stride_j;
+  float loc_j[3] = {loc[0], loc[1], loc[2]};
+  loc_j[axis_j] += (float)jmin;
+  for (j = jmin; j <= jmax; j++, vert_j += stride_j, loc_j[axis_j] += 1.0f) {
+
+    HairGridVert *vert_k = vert_j + kmin * stride_k;
+    float loc_k[3] = {loc_j[0], loc_j[1], loc_j[2]};
+    loc_k[axis_k] += (float)kmin;
+    for (k = kmin; k <= kmax; k++, vert_k += stride_k, loc_k[axis_k] += 1.0f) {
+
+      hair_volume_eval_grid_vertex(vert_k, loc_k, radius, dist_scale, x2, v2, x3, v3);
+
+#  if 0
+      {
+        float wloc[3], x2w[3], x3w[3];
+        grid_to_world(grid, wloc, loc_k);
+        grid_to_world(grid, x2w, x2);
+        grid_to_world(grid, x3w, x3);
+
+        if (vert_k->samples > 0) {
+          BKE_sim_debug_data_add_circle(wloc, 0.01f, 1.0, 1.0, 0.3, "grid", 2525, debug_i, j, k);
+        }
+
+        if (grid->debug_value) {
+          BKE_sim_debug_data_add_dot(wloc, 1, 0, 0, "grid", 93, debug_i, j, k);
+          BKE_sim_debug_data_add_dot(x2w, 0.1, 0.1, 0.7, "grid", 649, debug_i, j, k);
+          BKE_sim_debug_data_add_line(wloc, x2w, 0.3, 0.8, 0.3, "grid", 253, debug_i, j, k);
+          BKE_sim_debug_data_add_line(wloc, x3w, 0.8, 0.3, 0.3, "grid", 254, debug_i, j, k);
+          // BKE_sim_debug_data_add_circle(
+          //     x2w, len_v3v3(wloc, x2w), 0.2, 0.7, 0.2,
+          //     "grid", 255, i, j, k);
+        }
+      }
+#  endif
+    }
+  }
+}
+
+/* Uses a variation of Bresenham's algorithm for rasterizing a 3D grid with a line segment.
+ *
+ * The radius of influence around a segment is assumed to be at most 2*cellsize,
+ * i.e. only cells containing the segment and their direct neighbors are examined.
+ */
+void BPH_hair_volume_add_segment(HairGrid *grid,
+                                 const float x1[3],
+                                 const float v1[3],
+                                 const float x2[3],
+                                 const float v2[3],
+                                 const float x3[3],
+                                 const float v3[3],
+                                 const float x4[3],
+                                 const float v4[3],
+                                 const float dir1[3],
+                                 const float dir2[3],
+                                 const float dir3[3])
+{
+  const int res[3] = {grid->res[0], grid->res[1], grid->res[2]};
+
+  /* find the primary direction from the major axis of the direction vector */
+  const int axis0 = major_axis_v3(dir2);
+  const int axis1 = (axis0 + 1) % 3;
+  const int axis2 = (axis0 + 2) % 3;
+
+  /* vertex buffer offset factors along cardinal axes */
+  const int strides[3] = {1, res[0], res[0] * res[1]};
+  const int stride0 = strides[axis0];
+  const int stride1 = strides[axis1];
+  const int stride2 = strides[axis2];
+
+  /* increment of secondary directions per step in the primary direction
+   * note: we always go in the positive direction along axis0, so the sign can be inverted
+   */
+  const float inc1 = dir2[axis1] / dir2[axis0];
+  const float inc2 = dir2[axis2] / dir2[axis0];
+
+  /* start/end points, so increment along axis0 is always positive */
+  const float *start = x2[axis0] < x3[axis0] ? x2 : x3;
+  const float *end = x2[axis0] < x3[axis0] ? x3 : x2;
+  const float start0 = start[axis0], start1 = start[axis1], start2 = start[axis2];
+  const float end0 = end[axis0];
+
+  /* range along primary direction */
+  const int imin = max_ii(floor_int(start[axis0]) - 1, 0);
+  const int imax = min_ii(floor_int(end[axis0]) + 2, res[axis0] - 1);
+
+  float h = 0.0f;
+  HairGridVert *vert0;
+  float loc0[3];
+  int j0, k0, j0_prev, k0_prev;
+  int i;
+
+  for (i = imin; i <= imax; i++) {
+    float shift1, shift2; /* fraction of a full cell shift [0.0, 1.0) */
+    int jmin, jmax, kmin, kmax;
+
+    h = CLAMPIS((float)i, start0, end0);
+
+    shift1 = start1 + (h - start0) * inc1;
+    shift2 = start2 + (h - start0) * inc2;
+
+    j0_prev = j0;
+    j0 = floor_int(shift1);
+
+    k0_prev = k0;
+    k0 = floor_int(shift2);
+
+    if (i > imin) {
+      jmin = min_ii(j0, j0_prev);
+      jmax = max_ii(j0, j0_prev);
+      kmin = min_ii(k0, k0_prev);
+      kmax = max_ii(k0, k0_prev);
+    }
+    else {
+      jmin = jmax = j0;
+      kmin = kmax = k0;
+    }
+
+    vert0 = grid->verts + i * stride0;
+    loc0[axis0] = (float)i;
+    loc0[axis1] = 0.0f;
+    loc0[axis2] = 0.0f;
+
+    hair_volume_add_segment_2D(grid,
+                               x1,
+                               v1,
+                               x2,
+                               v2,
+                               x3,
+                               v3,
+                               x4,
+                               v4,
+                               dir1,
+                               dir2,
+                               dir3,
+                               res[axis1],
+                               res[axis2],
+                               jmin - 1,
+                               jmax + 2,
+                               kmin - 1,
+                               kmax + 2,
+                               vert0,
+                               stride1,
+                               stride2,
+                               loc0,
+                               axis1,
+                               axis2,
+                               i);
+  }
+}
+#else
+BLI_INLINE void hair_volume_eval_grid_vertex_sample(HairGridVert *vert,
+                                                    const float loc[3],
+                                                    float radius,
+                                                    float dist_scale,
+                                                    const float x[3],
+                                                    const float v[3])
+{
+  float dist, weight;
+
+  dist = len_v3v3(x, loc);
+
+  weight = (radius - dist) * dist_scale;
+
+  if (weight > 0.0f) {
+    madd_v3_v3fl(vert->velocity, v, weight);
+    vert->density += weight;
+    vert->samples += 1;
+  }
+}
+
+/* XXX simplified test implementation using a series of discrete sample along the segment,
+ * instead of finding the closest point for all affected grid vertices.
+ */
+void BPH_hair_volume_add_segment(HairGrid *grid,
+                                 const float UNUSED(x1[3]),
+                                 const float UNUSED(v1[3]),
+                                 const float x2[3],
+                                 const float v2[3],
+                                 const float x3[3],
+                                 const float v3[3],
+                                 const float UNUSED(x4[3]),
+                                 const float UNUSED(v4[3]),
+                                 const float UNUSED(dir1[3]),
+                                 const float UNUSED(dir2[3]),
+                                 const float UNUSED(dir3[3]))
+{
+  const float radius = 1.5f;
+  const float dist_scale = grid->inv_cellsize;
+
+  const int res[3] = {grid->res[0], grid->res[1], grid->res[2]};
+  const int stride[3] = {1, res[0], res[0] * res[1]};
+  const int num_samples = 10;
+
+  int s;
+
+  for (s = 0; s < num_samples; s++) {
+    float x[3], v[3];
+    int i, j, k;
+
+    float f = (float)s / (float)(num_samples - 1);
+    interp_v3_v3v3(x, x2, x3, f);
+    interp_v3_v3v3(v, v2, v3, f);
+
+    int imin = max_ii(floor_int(x[0]) - 2, 0);
+    int imax = min_ii(floor_int(x[0]) + 2, res[0] - 1);
+    int jmin = max_ii(floor_int(x[1]) - 2, 0);
+    int jmax = min_ii(floor_int(x[1]) + 2, res[1] - 1);
+    int kmin = max_ii(floor_int(x[2]) - 2, 0);
+    int kmax = min_ii(floor_int(x[2]) + 2, res[2] - 1);
+
+    for (k = kmin; k <= kmax; k++) {
+      for (j = jmin; j <= jmax; j++) {
+        for (i = imin; i <= imax; i++) {
+          float loc[3] = {(float)i, (float)j, (float)k};
+          HairGridVert *vert = grid->verts + i * stride[0] + j * stride[1] + k * stride[2];
+
+          hair_volume_eval_grid_vertex_sample(vert, loc, radius, dist_scale, x, v);
+        }
+      }
+    }
+  }
+}
+#endif
+
+void BPH_hair_volume_normalize_vertex_grid(HairGrid *grid)
+{
+  int i, size = hair_grid_size(grid->res);
+  /* divide velocity with density */
+  for (i = 0; i < size; i++) {
+    float density = grid->verts[i].density;
+    if (density > 0.0f) {
+      mul_v3_fl(grid->verts[i].velocity, 1.0f / density);
+    }
+  }
+}
+
+/* Cells with density below this are considered empty. */
+static const float density_threshold = 0.001f;
+
+/* Contribution of target density pressure to the laplacian in the pressure poisson equation.
+ * This is based on the model found in
+ * "Two-way Coupled SPH and Particle Level Set Fluid Simulation" (Losasso et al., 2008)
+ */
+BLI_INLINE float hair_volume_density_divergence(float density,
+                                                float target_density,
+                                                float strength)
+{
+  if (density > density_threshold && density > target_density) {
+    return strength * logf(target_density / density);
+  }
+  else {
+    return 0.0f;
+  }
+}
+
+bool BPH_hair_volume_solve_divergence(HairGrid *grid,
+                                      float /*dt*/,
+                                      float target_density,
+                                      float target_strength)
+{
+  const float flowfac = grid->cellsize;
+  const float inv_flowfac = 1.0f / grid->cellsize;
+
+  /*const int num_cells = hair_grid_size(grid->res);*/
+  const int res[3] = {grid->res[0], grid->res[1], grid->res[2]};
+  const int resA[3] = {grid->res[0] + 2, grid->res[1] + 2, grid->res[2] + 2};
+
+  const int stride0 = 1;
+  const int stride1 = grid->res[0];
+  const int stride2 = grid->res[1] * grid->res[0];
+  const int strideA0 = 1;
+  const int strideA1 = grid->res[0] + 2;
+  const int strideA2 = (grid->res[1] + 2) * (grid->res[0] + 2);
+
+  const int num_cells = res[0] * res[1] * res[2];
+  const int num_cellsA = (res[0] + 2) * (res[1] + 2) * (res[2] + 2);
+
+  HairGridVert *vert_start = grid->verts - (stride0 + stride1 + stride2);
+  HairGridVert *vert;
+  int i, j, k;
+
+#define MARGIN_i0 (i < 1)
+#define MARGIN_j0 (j < 1)
+#define MARGIN_k0 (k < 1)
+#define MARGIN_i1 (i >= resA[0] - 1)
+#define MARGIN_j1 (j >= resA[1] - 1)
+#define MARGIN_k1 (k >= resA[2] - 1)
+
+#define NEIGHBOR_MARGIN_i0 (i < 2)
+#define NEIGHBOR_MARGIN_j0 (j < 2)
+#define NEIGHBOR_MARGIN_k0 (k < 2)
+#define NEIGHBOR_MARGIN_i1 (i >= resA[0] - 2)
+#define NEIGHBOR_MARGIN_j1 (j >= resA[1] - 2)
+#define NEIGHBOR_MARGIN_k1 (k >= resA[2] - 2)
+
+  BLI_assert(num_cells >= 1);
+
+  /* Calculate divergence */
+  lVector B(num_cellsA);
+  for (k = 0; k < resA[2]; k++) {
+    for (j = 0; j < resA[1]; j++) {
+      for (i = 0; i < resA[0]; i++) {
+        int u = i * strideA0 + j * strideA1 + k * strideA2;
+        bool is_margin = MARGIN_i0 || MARGIN_i1 || MARGIN_j0 || MARGIN_j1 || MARGIN_k0 ||
+                         MARGIN_k1;
+
+        if (is_margin) {
+          B[u] = 0.0f;
+          continue;
+        }
+
+        vert = vert_start + i * stride0 + j * stride1 + k * stride2;
+
+        const float *v0 = vert->velocity;
+        float dx = 0.0f, dy = 0.0f, dz = 0.0f;
+        if (!NEIGHBOR_MARGIN_i0) {
+          dx += v0[0] - (vert - stride0)->velocity[0];
+        }
+        if (!NEIGHBOR_MARGIN_i1) {
+          dx += (vert + stride0)->velocity[0] - v0[0];
+        }
+        if (!NEIGHBOR_MARGIN_j0) {
+          dy += v0[1] - (vert - stride1)->velocity[1];
+        }
+        if (!NEIGHBOR_MARGIN_j1) {
+          dy += (vert + stride1)->velocity[1] - v0[1];
+        }
+        if (!NEIGHBOR_MARGIN_k0) {
+          dz += v0[2] - (vert - stride2)->velocity[2];
+        }
+        if (!NEIGHBOR_MARGIN_k1) {
+          dz += (vert + stride2)->velocity[2] - v0[2];
+        }
+
+        float divergence = -0.5f * flowfac * (dx + dy + dz);
+
+        /* adjustment term for target density */
+        float target = hair_volume_density_divergence(
+            vert->density, target_density, target_strength);
+
+        /* B vector contains the finite difference approximation of the velocity divergence.
+         * Note: according to the discretized Navier-Stokes equation the rhs vector
+         * and resulting pressure gradient should be multiplied by the (inverse) density;
+         * however, this is already included in the weighting of hair velocities on the grid!
+         */
+        B[u] = divergence - target;
+
+#if 0
+        {
+          float wloc[3], loc[3];
+          float col0[3] = {0.0, 0.0, 0.0};
+          float colp[3] = {0.0, 1.0, 1.0};
+          float coln[3] = {1.0, 0.0, 1.0};
+          float col[3];
+          float fac;
+
+          loc[0] = (float)(i - 1);
+          loc[1] = (float)(j - 1);
+          loc[2] = (float)(k - 1);
+          grid_to_world(grid, wloc, loc);
+
+          if (divergence > 0.0f) {
+            fac = CLAMPIS(divergence * target_strength, 0.0, 1.0);
+            interp_v3_v3v3(col, col0, colp, fac);
+          }
+          else {
+            fac = CLAMPIS(-divergence * target_strength, 0.0, 1.0);
+            interp_v3_v3v3(col, col0, coln, fac);
+          }
+          if (fac > 0.05f) {
+            BKE_sim_debug_data_add_circle(
+                grid->debug_data, wloc, 0.01f, col[0], col[1], col[2], "grid", 5522, i, j, k);
+          }
+        }
+#endif
+      }
+    }
+  }
+
+  /* Main Poisson equation system:
+   * This is derived from the discretezation of the Poisson equation
+   *   div(grad(p)) = div(v)
+   *
+   * The finite difference approximation yields the linear equation system described here:
+   * https://en.wikipedia.org/wiki/Discrete_Poisson_equation
+   */
+  lMatrix A(num_cellsA, num_cellsA);
+  /* Reserve space for the base equation system (without boundary conditions).
+   * Each column contains a factor 6 on the diagonal
+   * and up to 6 factors -1 on other places.
+   */
+  A.reserve(Eigen::VectorXi::Constant(num_cellsA, 7));
+
+  for (k = 0; k < resA[2]; k++) {
+    for (j = 0; j < resA[1]; j++) {
+      for (i = 0; i < resA[0]; i++) {
+        int u = i * strideA0 + j * strideA1 + k * strideA2;
+        bool is_margin = MARGIN_i0 || MARGIN_i1 || MARGIN_j0 || MARGIN_j1 || MARGIN_k0 ||
+                         MARGIN_k1;
+
+        vert = vert_start + i * stride0 + j * stride1 + k * stride2;
+        if (!is_margin && vert->density > density_threshold) {
+          int neighbors_lo = 0;
+          int neighbors_hi = 0;
+          int non_solid_neighbors = 0;
+          int neighbor_lo_index[3];
+          int neighbor_hi_index[3];
+          int n;
+
+          /* check for upper bounds in advance
+           * to get the correct number of neighbors,
+           * needed for the diagonal element
+           */
+          if (!NEIGHBOR_MARGIN_k0 && (vert - stride2)->density > density_threshold) {
+            neighbor_lo_index[neighbors_lo++] = u - strideA2;
+          }
+          if (!NEIGHBOR_MARGIN_j0 && (vert - stride1)->density > density_threshold) {
+            neighbor_lo_index[neighbors_lo++] = u - strideA1;
+          }
+          if (!NEIGHBOR_MARGIN_i0 && (vert - stride0)->density > density_threshold) {
+            neighbor_lo_index[neighbors_lo++] = u - strideA0;
+          }
+          if (!NEIGHBOR_MARGIN_i1 && (vert + stride0)->density > density_threshold) {
+            neighbor_hi_index[neighbors_hi++] = u + strideA0;
+          }
+          if (!NEIGHBOR_MARGIN_j1 && (vert + stride1)->density > density_threshold) {
+            neighbor_hi_index[neighbors_hi++] = u + strideA1;
+          }
+          if (!NEIGHBOR_MARGIN_k1 && (vert + stride2)->density > density_threshold) {
+            neighbor_hi_index[neighbors_hi++] = u + strideA2;
+          }
+
+          /*int liquid_neighbors = neighbors_lo + neighbors_hi;*/
+          non_solid_neighbors = 6;
+
+          for (n = 0; n < neighbors_lo; n++) {
+            A.insert(neighbor_lo_index[n], u) = -1.0f;
+          }
+          A.insert(u, u) = (float)non_solid_neighbors;
+          for (n = 0; n < neighbors_hi; n++) {
+            A.insert(neighbor_hi_index[n], u) = -1.0f;
+          }
+        }
+        else {
+          A.insert(u, u) = 1.0f;
+        }
+      }
+    }
+  }
+
+  ConjugateGradient cg;
+  cg.setMaxIterations(100);
+  cg.setTolerance(0.01f);
+
+  cg.compute(A);
+
+  lVector p = cg.solve(B);
+
+  if (cg.info() == Eigen::Success) {
+    /* Calculate velocity = grad(p) */
+    for (k = 0; k < resA[2]; k++) {
+      for (j = 0; j < resA[1]; j++) {
+        for (i = 0; i < resA[0]; i++) {
+          int u = i * strideA0 + j * strideA1 + k * strideA2;
+          bool is_margin = MARGIN_i0 || MARGIN_i1 || MARGIN_j0 || MARGIN_j1 || MARGIN_k0 ||
+                           MARGIN_k1;
+          if (is_margin) {
+            continue;
+          }
+
+          vert = vert_start + i * stride0 + j * stride1 + k * stride2;
+          if (vert->density > density_threshold) {
+            float p_left = p[u - strideA0];
+            float p_right = p[u + strideA0];
+            float p_down = p[u - strideA1];
+            float p_up = p[u + strideA1];
+            float p_bottom = p[u - strideA2];
+            float p_top = p[u + strideA2];
+
+            /* finite difference estimate of pressure gradient */
+            float dvel[3];
+            dvel[0] = p_right - p_left;
+            dvel[1] = p_up - p_down;
+            dvel[2] = p_top - p_bottom;
+            mul_v3_fl(dvel, -0.5f * inv_flowfac);
+
+            /* pressure gradient describes velocity delta */
+            add_v3_v3v3(vert->velocity_smooth, vert->velocity, dvel);
+          }
+          else {
+            zero_v3(vert->velocity_smooth);
+          }
+        }
+      }
+    }
+
+#if 0
+    {
+      int axis = 0;
+      float offset = 0.0f;
+
+      int slice = (offset - grid->gmin[axis]) / grid->cellsize;
+
+      for (k = 0; k < resA[2]; k++) {
+        for (j = 0; j < resA[1]; j++) {
+          for (i = 0; i < resA[0]; i++) {
+            int u = i * strideA0 + j * strideA1 + k * strideA2;
+            bool is_margin = MARGIN_i0 || MARGIN_i1 || MARGIN_j0 || MARGIN_j1 || MARGIN_k0 ||
+                             MARGIN_k1;
+            if (i != slice) {
+              continue;
+            }
+
+            vert = vert_start + i * stride0 + j * stride1 + k * stride2;
+
+            float wloc[3], loc[3];
+            float col0[3] = {0.0, 0.0, 0.0};
+            float colp[3] = {0.0, 1.0, 1.0};
+            float coln[3] = {1.0, 0.0, 1.0};
+            float col[3];
+            float fac;
+
+            loc[0] = (float)(i - 1);
+            loc[1] = (float)(j - 1);
+            loc[2] = (float)(k - 1);
+            grid_to_world(grid, wloc, loc);
+
+            float pressure = p[u];
+            if (pressure > 0.0f) {
+              fac = CLAMPIS(pressure * grid->debug1, 0.0, 1.0);
+              interp_v3_v3v3(col, col0, colp, fac);
+            }
+            else {
+              fac = CLAMPIS(-pressure * grid->debug1, 0.0, 1.0);
+              interp_v3_v3v3(col, col0, coln, fac);
+            }
+            if (fac > 0.05f) {
+              BKE_sim_debug_data_add_circle(
+                  grid->debug_data, wloc, 0.01f, col[0], col[1], col[2], "grid", 5533, i, j, k);
+            }
+
+            if (!is_margin) {
+              float dvel[3];
+              sub_v3_v3v3(dvel, vert->velocity_smooth, vert->velocity);
+              // BKE_sim_debug_data_add_vector(
+              //     grid->debug_data, wloc, dvel, 1, 1, 1,
+              //     "grid", 5566, i, j, k);
+            }
+
+            if (!is_margin) {
+              float d = CLAMPIS(vert->density * grid->debug2, 0.0f, 1.0f);
+              float col0[3] = {0.3, 0.3, 0.3};
+              float colp[3] = {0.0, 0.0, 1.0};
+              float col[3];
+
+              interp_v3_v3v3(col, col0, colp, d);
+              // if (d > 0.05f) {
+              // BKE_sim_debug_data_add_dot(
+              //     grid->debug_data, wloc, col[0], col[1], col[2],
+              //     "grid", 5544, i, j, k);
+              // }
+            }
+          }
+        }
+      }
+    }
+#endif
+
+    return true;
+  }
+  else {
+    /* Clear result in case of error */
+    for (i = 0, vert = grid->verts; i < num_cells; i++, vert++) {
+      zero_v3(vert->velocity_smooth);
+    }
+
+    return false;
+  }
+}
+
+#if 0 /* XXX weighting is incorrect, disabled for now */
+/* Velocity filter kernel
+ * See https://en.wikipedia.org/wiki/Filter_%28large_eddy_simulation%29
+ */
+
+BLI_INLINE void hair_volume_filter_box_convolute(
+    HairVertexGrid *grid, float invD, const int kernel_size[3], int i, int j, int k)
+{
+  int res = grid->res;
+  int p, q, r;
+  int minp = max_ii(i - kernel_size[0], 0), maxp = min_ii(i + kernel_size[0], res - 1);
+  int minq = max_ii(j - kernel_size[1], 0), maxq = min_ii(j + kernel_size[1], res - 1);
+  int minr = max_ii(k - kernel_size[2], 0), maxr = min_ii(k + kernel_size[2], res - 1);
+  int offset, kernel_offset, kernel_dq, kernel_dr;
+  HairGridVert *verts;
+  float *vel_smooth;
+
+  offset = i + (j + k * res) * res;
+  verts = grid->verts;
+  vel_smooth = verts[offset].velocity_smooth;
+
+  kernel_offset = minp + (minq + minr * res) * res;
+  kernel_dq = res;
+  kernel_dr = res * res;
+  for (r = minr; r <= maxr; r++) {
+    for (q = minq; q <= maxq; q++) {
+      for (p = minp; p <= maxp; p++) {
+
+        madd_v3_v3fl(vel_smooth, verts[kernel_offset].velocity, invD);
+
+        kernel_offset += 1;
+      }
+      kernel_offset += kernel_dq;
+    }
+    kernel_offset += kernel_dr;
+  }
+}
+
+void BPH_hair_volume_vertex_grid_filter_box(HairVertexGrid *grid, int kernel_size)
+{
+  int size = hair_grid_size(grid->res);
+  int kernel_sizev[3] = {kernel_size, kernel_size, kernel_size};
+  int tot;
+  float invD;
+  int i, j, k;
+
+  if (kernel_size <= 0) {
+    return;
+  }
+
+  tot = kernel_size * 2 + 1;
+  invD = 1.0f / (float)(tot * tot * tot);
+
+  /* clear values for convolution */
+  for (i = 0; i < size; i++) {
+    zero_v3(grid->verts[i].velocity_smooth);
+  }
+
+  for (i = 0; i < grid->res; i++) {
+    for (j = 0; j < grid->res; j++) {
+      for (k = 0; k < grid->res; k++) {
+        hair_volume_filter_box_convolute(grid, invD, kernel_sizev, i, j, k);
+      }
+    }
+  }
+
+  /* apply as new velocity */
+  for (i = 0; i < size; i++) {
+    copy_v3_v3(grid->verts[i].velocity, grid->verts[i].velocity_smooth);
+  }
+}
+#endif
+
+HairGrid *BPH_hair_volume_create_vertex_grid(float cellsize,
+                                             const float gmin[3],
+                                             const float gmax[3])
+{
+  float scale;
+  float extent[3];
+  int resmin[3], resmax[3], res[3];
+  float gmin_margin[3], gmax_margin[3];
+  int size;
+  HairGrid *grid;
+  int i;
+
+  /* sanity check */
+  if (cellsize <= 0.0f) {
+    cellsize = 1.0f;
+  }
+  scale = 1.0f / cellsize;
+
+  sub_v3_v3v3(extent, gmax, gmin);
+  for (i = 0; i < 3; i++) {
+    resmin[i] = floor_int(gmin[i] * scale);
+    resmax[i] = floor_int(gmax[i] * scale) + 1;
+
+    /* add margin of 1 cell */
+    resmin[i] -= 1;
+    resmax[i] += 1;
+
+    res[i] = resmax[i] - resmin[i] + 1;
+    /* sanity check: avoid null-sized grid */
+    if (res[i] < 4) {
+      res[i] = 4;
+      resmax[i] = resmin[i] + 4;
+    }
+    /* sanity check: avoid too large grid size */
+    if (res[i] > MAX_HAIR_GRID_RES) {
+      res[i] = MAX_HAIR_GRID_RES;
+      resmax[i] = resmin[i] + MAX_HAIR_GRID_RES;
+    }
+
+    gmin_margin[i] = (float)resmin[i] * cellsize;
+    gmax_margin[i] = (float)resmax[i] * cellsize;
+  }
+  size = hair_grid_size(res);
+
+  grid = (HairGrid *)MEM_callocN(sizeof(HairGrid), "hair grid");
+  grid->res[0] = res[0];
+  grid->res[1] = res[1];
+  grid->res[2] = res[2];
+  copy_v3_v3(grid->gmin, gmin_margin);
+  copy_v3_v3(grid->gmax, gmax_margin);
+  grid->cellsize = cellsize;
+  grid->inv_cellsize = scale;
+  grid->verts = (HairGridVert *)MEM_callocN(sizeof(HairGridVert) * size, "hair voxel data");
+
+  return grid;
+}
+
+void BPH_hair_volume_free_vertex_grid(HairGrid *grid)
+{
+  if (grid) {
+    if (grid->verts) {
+      MEM_freeN(grid->verts);
+    }
+    MEM_freeN(grid);
+  }
+}
+
+void BPH_hair_volume_grid_geometry(
+    HairGrid *grid, float *cellsize, int res[3], float gmin[3], float gmax[3])
+{
+  if (cellsize) {
+    *cellsize = grid->cellsize;
+  }
+  if (res) {
+    copy_v3_v3_int(res, grid->res);
+  }
+  if (gmin) {
+    copy_v3_v3(gmin, grid->gmin);
+  }
+  if (gmax) {
+    copy_v3_v3(gmax, grid->gmax);
+  }
+}
+
+#if 0
+static HairGridVert *hair_volume_create_collision_grid(ClothModifierData *clmd,
+                                                       lfVector *lX,
+                                                       unsigned int numverts)
+{
+  int res = hair_grid_res;
+  int size = hair_grid_size(res);
+  HairGridVert *collgrid;
+  ListBase *colliders;
+  ColliderCache *col = NULL;
+  float gmin[3], gmax[3], scale[3];
+  /* 2.0f is an experimental value that seems to give good results */
+  float collfac = 2.0f * clmd->sim_parms->collider_friction;
+  unsigned int v = 0;
+  int i = 0;
+
+  hair_volume_get_boundbox(lX, numverts, gmin, gmax);
+  hair_grid_get_scale(res, gmin, gmax, scale);
+
+  collgrid = MEM_mallocN(sizeof(HairGridVert) * size, "hair collider voxel data");
+
+  /* initialize grid */
+  for (i = 0; i < size; i++) {
+    zero_v3(collgrid[i].velocity);
+    collgrid[i].density = 0.0f;
+  }
+
+  /* gather colliders */
+  colliders = BKE_collider_cache_create(depsgraph, NULL, NULL);
+  if (colliders && collfac > 0.0f) {
+    for (col = colliders->first; col; col = col->next) {
+      MVert *loc0 = col->collmd->x;
+      MVert *loc1 = col->collmd->xnew;
+      float vel[3];
+      float weights[8];
+      int di, dj, dk;
+
+      for (v = 0; v < col->collmd->numverts; v++, loc0++, loc1++) {
+        int offset;
+
+        if (!hair_grid_point_valid(loc1->co, gmin, gmax)) {
+          continue;
+        }
+
+        offset = hair_grid_weights(res, gmin, scale, lX[v], weights);
+
+        sub_v3_v3v3(vel, loc1->co, loc0->co);
+
+        for (di = 0; di < 2; di++) {
+          for (dj = 0; dj < 2; dj++) {
+            for (dk = 0; dk < 2; dk++) {
+              int voffset = offset + di + (dj + dk * res) * res;
+              int iw = di + dj * 2 + dk * 4;
+
+              collgrid[voffset].density += weights[iw];
+              madd_v3_v3fl(collgrid[voffset].velocity, vel, weights[iw]);
+            }
+          }
+        }
+      }
+    }
+  }
+  BKE_collider_cache_free(&colliders);
+
+  /* divide velocity with density */
+  for (i = 0; i < size; i++) {
+    float density = collgrid[i].density;
+    if (density > 0.0f) {
+      mul_v3_fl(collgrid[i].velocity, 1.0f / density);
+    }
+  }
+
+  return collgrid;
+}
+#endif
diff --git a/source/blender/simulation/intern/implicit.h b/source/blender/simulation/intern/implicit.h
new file mode 100644
index 00000000000..8bc09755180
--- /dev/null
+++ b/source/blender/simulation/intern/implicit.h
@@ -0,0 +1,272 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) Blender Foundation
+ * All rights reserved.
+ */
+
+#ifndef __IMPLICIT_H__
+#define __IMPLICIT_H__
+
+/** \file
+ * \ingroup bph
+ */
+
+#include "stdio.h"
+
+#include "BLI_utildefines.h"
+
+#include "BKE_collision.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+//#define IMPLICIT_SOLVER_EIGEN
+#define IMPLICIT_SOLVER_BLENDER
+
+#define CLOTH_ROOT_FRAME /* enable use of root frame coordinate transform */
+
+#define CLOTH_FORCE_GRAVITY
+#define CLOTH_FORCE_DRAG
+#define CLOTH_FORCE_SPRING_STRUCTURAL
+#define CLOTH_FORCE_SPRING_SHEAR
+#define CLOTH_FORCE_SPRING_BEND
+#define CLOTH_FORCE_SPRING_GOAL
+#define CLOTH_FORCE_EFFECTORS
+
+//#define IMPLICIT_PRINT_SOLVER_INPUT_OUTPUT
+
+//#define IMPLICIT_ENABLE_EIGEN_DEBUG
+
+struct Implicit_Data;
+
+typedef struct ImplicitSolverResult {
+  int status;
+
+  int iterations;
+  float error;
+} ImplicitSolverResult;
+
+BLI_INLINE void implicit_print_matrix_elem(float v)
+{
+  printf("%-8.3f", v);
+}
+
+void BPH_mass_spring_set_vertex_mass(struct Implicit_Data *data, int index, float mass);
+void BPH_mass_spring_set_rest_transform(struct Implicit_Data *data, int index, float rot[3][3]);
+
+void BPH_mass_spring_set_motion_state(struct Implicit_Data *data,
+                                      int index,
+                                      const float x[3],
+                                      const float v[3]);
+void BPH_mass_spring_set_position(struct Implicit_Data *data, int index, const float x[3]);
+void BPH_mass_spring_set_velocity(struct Implicit_Data *data, int index, const float v[3]);
+void BPH_mass_spring_get_motion_state(struct Implicit_Data *data,
+                                      int index,
+                                      float x[3],
+                                      float v[3]);
+void BPH_mass_spring_get_position(struct Implicit_Data *data, int index, float x[3]);
+void BPH_mass_spring_get_velocity(struct Implicit_Data *data, int index, float v[3]);
+
+/* access to modified motion state during solver step */
+void BPH_mass_spring_get_new_position(struct Implicit_Data *data, int index, float x[3]);
+void BPH_mass_spring_set_new_position(struct Implicit_Data *data, int index, const float x[3]);
+void BPH_mass_spring_get_new_velocity(struct Implicit_Data *data, int index, float v[3]);
+void BPH_mass_spring_set_new_velocity(struct Implicit_Data *data, int index, const float v[3]);
+
+void BPH_mass_spring_clear_constraints(struct Implicit_Data *data);
+void BPH_mass_spring_add_constraint_ndof0(struct Implicit_Data *data,
+                                          int index,
+                                          const float dV[3]);
+void BPH_mass_spring_add_constraint_ndof1(struct Implicit_Data *data,
+                                          int index,
+                                          const float c1[3],
+                                          const float c2[3],
+                                          const float dV[3]);
+void BPH_mass_spring_add_constraint_ndof2(struct Implicit_Data *data,
+                                          int index,
+                                          const float c1[3],
+                                          const float dV[3]);
+
+bool BPH_mass_spring_solve_velocities(struct Implicit_Data *data,
+                                      float dt,
+                                      struct ImplicitSolverResult *result);
+bool BPH_mass_spring_solve_positions(struct Implicit_Data *data, float dt);
+void BPH_mass_spring_apply_result(struct Implicit_Data *data);
+
+/* Clear the force vector at the beginning of the time step */
+void BPH_mass_spring_clear_forces(struct Implicit_Data *data);
+/* Fictitious forces introduced by moving coordinate systems */
+void BPH_mass_spring_force_reference_frame(struct Implicit_Data *data,
+                                           int index,
+                                           const float acceleration[3],
+                                           const float omega[3],
+                                           const float domega_dt[3],
+                                           float mass);
+/* Simple uniform gravity force */
+void BPH_mass_spring_force_gravity(struct Implicit_Data *data,
+                                   int index,
+                                   float mass,
+                                   const float g[3]);
+/* Global drag force (velocity damping) */
+void BPH_mass_spring_force_drag(struct Implicit_Data *data, float drag);
+/* Custom external force */
+void BPH_mass_spring_force_extern(
+    struct Implicit_Data *data, int i, const float f[3], float dfdx[3][3], float dfdv[3][3]);
+/* Wind force, acting on a face (only generates pressure from the normal component) */
+void BPH_mass_spring_force_face_wind(
+    struct Implicit_Data *data, int v1, int v2, int v3, const float (*winvec)[3]);
+/* Arbitrary per-unit-area vector force field acting on a face. */
+void BPH_mass_spring_force_face_extern(
+    struct Implicit_Data *data, int v1, int v2, int v3, const float (*forcevec)[3]);
+/* Wind force, acting on an edge */
+void BPH_mass_spring_force_edge_wind(struct Implicit_Data *data,
+                                     int v1,
+                                     int v2,
+                                     float radius1,
+                                     float radius2,
+                                     const float (*winvec)[3]);
+/* Wind force, acting on a vertex */
+void BPH_mass_spring_force_vertex_wind(struct Implicit_Data *data,
+                                       int v,
+                                       float radius,
+                                       const float (*winvec)[3]);
+/* Linear spring force between two points */
+bool BPH_mass_spring_force_spring_linear(struct Implicit_Data *data,
+                                         int i,
+                                         int j,
+                                         float restlen,
+                                         float stiffness_tension,
+                                         float damping_tension,
+                                         float stiffness_compression,
+                                         float damping_compression,
+                                         bool resist_compress,
+                                         bool new_compress,
+                                         float clamp_force);
+/* Angular spring force between two polygons */
+bool BPH_mass_spring_force_spring_angular(struct Implicit_Data *data,
+                                          int i,
+                                          int j,
+                                          int *i_a,
+                                          int *i_b,
+                                          int len_a,
+                                          int len_b,
+                                          float restang,
+                                          float stiffness,
+                                          float damping);
+/* Bending force, forming a triangle at the base of two structural springs */
+bool BPH_mass_spring_force_spring_bending(
+    struct Implicit_Data *data, int i, int j, float restlen, float kb, float cb);
+/* Angular bending force based on local target vectors */
+bool BPH_mass_spring_force_spring_bending_hair(struct Implicit_Data *data,
+                                               int i,
+                                               int j,
+                                               int k,
+                                               const float target[3],
+                                               float stiffness,
+                                               float damping);
+/* Global goal spring */
+bool BPH_mass_spring_force_spring_goal(struct Implicit_Data *data,
+                                       int i,
+                                       const float goal_x[3],
+                                       const float goal_v[3],
+                                       float stiffness,
+                                       float damping);
+
+float BPH_tri_tetra_volume_signed_6x(struct Implicit_Data *data, int v1, int v2, int v3);
+float BPH_tri_area(struct Implicit_Data *data, int v1, int v2, int v3);
+
+void BPH_mass_spring_force_pressure(struct Implicit_Data *data,
+                                    int v1,
+                                    int v2,
+                                    int v3,
+                                    float common_pressure,
+                                    const float *vertex_pressure,
+                                    const float weights[3]);
+
+/* ======== Hair Volumetric Forces ======== */
+
+struct HairGrid;
+
+#define MAX_HAIR_GRID_RES 256
+
+struct HairGrid *BPH_hair_volume_create_vertex_grid(float cellsize,
+                                                    const float gmin[3],
+                                                    const float gmax[3]);
+void BPH_hair_volume_free_vertex_grid(struct HairGrid *grid);
+void BPH_hair_volume_grid_geometry(
+    struct HairGrid *grid, float *cellsize, int res[3], float gmin[3], float gmax[3]);
+
+void BPH_hair_volume_grid_clear(struct HairGrid *grid);
+void BPH_hair_volume_add_vertex(struct HairGrid *grid, const float x[3], const float v[3]);
+void BPH_hair_volume_add_segment(struct HairGrid *grid,
+                                 const float x1[3],
+                                 const float v1[3],
+                                 const float x2[3],
+                                 const float v2[3],
+                                 const float x3[3],
+                                 const float v3[3],
+                                 const float x4[3],
+                                 const float v4[3],
+                                 const float dir1[3],
+                                 const float dir2[3],
+                                 const float dir3[3]);
+
+void BPH_hair_volume_normalize_vertex_grid(struct HairGrid *grid);
+
+bool BPH_hair_volume_solve_divergence(struct HairGrid *grid,
+                                      float dt,
+                                      float target_density,
+                                      float target_strength);
+#if 0 /* XXX weighting is incorrect, disabled for now */
+void BPH_hair_volume_vertex_grid_filter_box(struct HairVertexGrid *grid, int kernel_size);
+#endif
+
+void BPH_hair_volume_grid_interpolate(struct HairGrid *grid,
+                                      const float x[3],
+                                      float *density,
+                                      float velocity[3],
+                                      float velocity_smooth[3],
+                                      float density_gradient[3],
+                                      float velocity_gradient[3][3]);
+
+/* Effect of fluid simulation grid on velocities.
+ * fluid_factor controls blending between PIC (Particle-in-Cell)
+ *     and FLIP (Fluid-Implicit-Particle) methods (0 = only PIC, 1 = only FLIP)
+ */
+void BPH_hair_volume_grid_velocity(
+    struct HairGrid *grid, const float x[3], const float v[3], float fluid_factor, float r_v[3]);
+/* XXX Warning: expressing grid effects on velocity as a force is not very stable,
+ * due to discontinuities in interpolated values!
+ * Better use hybrid approaches such as described in
+ * "Detail Preserving Continuum Simulation of Straight Hair"
+ * (McAdams, Selle 2009)
+ */
+void BPH_hair_volume_vertex_grid_forces(struct HairGrid *grid,
+                                        const float x[3],
+                                        const float v[3],
+                                        float smoothfac,
+                                        float pressurefac,
+                                        float minpressure,
+                                        float f[3],
+                                        float dfdx[3][3],
+                                        float dfdv[3][3]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/source/blender/simulation/intern/implicit_blender.c b/source/blender/simulation/intern/implicit_blender.c
new file mode 100644
index 00000000000..54d38f3c10b
--- /dev/null
+++ b/source/blender/simulation/intern/implicit_blender.c
@@ -0,0 +1,2360 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) Blender Foundation
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup bph
+ */
+
+#include "implicit.h"
+
+#ifdef IMPLICIT_SOLVER_BLENDER
+
+#  include "MEM_guardedalloc.h"
+
+#  include "DNA_meshdata_types.h"
+#  include "DNA_object_force_types.h"
+#  include "DNA_object_types.h"
+#  include "DNA_scene_types.h"
+#  include "DNA_texture_types.h"
+
+#  include "BLI_math.h"
+#  include "BLI_utildefines.h"
+
+#  include "BKE_cloth.h"
+#  include "BKE_collision.h"
+#  include "BKE_effect.h"
+
+#  include "BPH_mass_spring.h"
+
+#  ifdef __GNUC__
+#    pragma GCC diagnostic ignored "-Wtype-limits"
+#  endif
+
+#  ifdef _OPENMP
+#    define CLOTH_OPENMP_LIMIT 512
+#  endif
+
+//#define DEBUG_TIME
+
+#  ifdef DEBUG_TIME
+#    include "PIL_time.h"
+#  endif
+
+static float I[3][3] = {{1, 0, 0}, {0, 1, 0}, {0, 0, 1}};
+static float ZERO[3][3] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}};
+
+#  if 0
+#    define C99
+#    ifdef C99
+#      defineDO_INLINE inline
+#    else
+#      defineDO_INLINE static
+#    endif
+#  endif /* if 0 */
+
+struct Cloth;
+
+//////////////////////////////////////////
+/* fast vector / matrix library, enhancements are welcome :) -dg */
+/////////////////////////////////////////
+
+/* DEFINITIONS */
+typedef float lfVector[3];
+typedef struct fmatrix3x3 {
+  float m[3][3];     /* 3x3 matrix */
+  unsigned int c, r; /* column and row number */
+  /* int pinned; // is this vertex allowed to move? */
+  float n1, n2, n3;    /* three normal vectors for collision constrains */
+  unsigned int vcount; /* vertex count */
+  unsigned int scount; /* spring count */
+} fmatrix3x3;
+
+///////////////////////////
+// float[3] vector
+///////////////////////////
+/* simple vector code */
+/* STATUS: verified */
+DO_INLINE void mul_fvector_S(float to[3], const float from[3], float scalar)
+{
+  to[0] = from[0] * scalar;
+  to[1] = from[1] * scalar;
+  to[2] = from[2] * scalar;
+}
+/* simple v^T * v product ("outer product") */
+/* STATUS: HAS TO BE verified (*should* work) */
+DO_INLINE void mul_fvectorT_fvector(float to[3][3], float vectorA[3], float vectorB[3])
+{
+  mul_fvector_S(to[0], vectorB, vectorA[0]);
+  mul_fvector_S(to[1], vectorB, vectorA[1]);
+  mul_fvector_S(to[2], vectorB, vectorA[2]);
+}
+/* simple v^T * v product with scalar ("outer product") */
+/* STATUS: HAS TO BE verified (*should* work) */
+DO_INLINE void mul_fvectorT_fvectorS(float to[3][3], float vectorA[3], float vectorB[3], float aS)
+{
+  mul_fvectorT_fvector(to, vectorA, vectorB);
+
+  mul_fvector_S(to[0], to[0], aS);
+  mul_fvector_S(to[1], to[1], aS);
+  mul_fvector_S(to[2], to[2], aS);
+}
+
+#  if 0
+/* printf vector[3] on console: for debug output */
+static void print_fvector(float m3[3])
+{
+  printf("%f\n%f\n%f\n\n", m3[0], m3[1], m3[2]);
+}
+
+///////////////////////////
+// long float vector float (*)[3]
+///////////////////////////
+/* print long vector on console: for debug output */
+DO_INLINE void print_lfvector(float (*fLongVector)[3], unsigned int verts)
+{
+  unsigned int i = 0;
+  for (i = 0; i < verts; i++) {
+    print_fvector(fLongVector[i]);
+  }
+}
+#  endif
+
+/* create long vector */
+DO_INLINE lfVector *create_lfvector(unsigned int verts)
+{
+  /* TODO: check if memory allocation was successful */
+  return (lfVector *)MEM_callocN(verts * sizeof(lfVector), "cloth_implicit_alloc_vector");
+  // return (lfVector *)cloth_aligned_malloc(&MEMORY_BASE, verts * sizeof(lfVector));
+}
+/* delete long vector */
+DO_INLINE void del_lfvector(float (*fLongVector)[3])
+{
+  if (fLongVector != NULL) {
+    MEM_freeN(fLongVector);
+    // cloth_aligned_free(&MEMORY_BASE, fLongVector);
+  }
+}
+/* copy long vector */
+DO_INLINE void cp_lfvector(float (*to)[3], float (*from)[3], unsigned int verts)
+{
+  memcpy(to, from, verts * sizeof(lfVector));
+}
+/* init long vector with float[3] */
+DO_INLINE void init_lfvector(float (*fLongVector)[3], float vector[3], unsigned int verts)
+{
+  unsigned int i = 0;
+  for (i = 0; i < verts; i++) {
+    copy_v3_v3(fLongVector[i], vector);
+  }
+}
+/* zero long vector with float[3] */
+DO_INLINE void zero_lfvector(float (*to)[3], unsigned int verts)
+{
+  memset(to, 0.0f, verts * sizeof(lfVector));
+}
+/* multiply long vector with scalar*/
+DO_INLINE void mul_lfvectorS(float (*to)[3],
+                             float (*fLongVector)[3],
+                             float scalar,
+                             unsigned int verts)
+{
+  unsigned int i = 0;
+
+  for (i = 0; i < verts; i++) {
+    mul_fvector_S(to[i], fLongVector[i], scalar);
+  }
+}
+/* multiply long vector with scalar*/
+/* A -= B * float */
+DO_INLINE void submul_lfvectorS(float (*to)[3],
+                                float (*fLongVector)[3],
+                                float scalar,
+                                unsigned int verts)
+{
+  unsigned int i = 0;
+  for (i = 0; i < verts; i++) {
+    VECSUBMUL(to[i], fLongVector[i], scalar);
+  }
+}
+/* dot product for big vector */
+DO_INLINE float dot_lfvector(float (*fLongVectorA)[3],
+                             float (*fLongVectorB)[3],
+                             unsigned int verts)
+{
+  long i = 0;
+  float temp = 0.0;
+  // XXX brecht, disabled this for now (first schedule line was already disabled),
+  // due to non-commutative nature of floating point ops this makes the sim give
+  // different results each time you run it!
+  // schedule(guided, 2)
+  //#pragma omp parallel for reduction(+: temp) if (verts > CLOTH_OPENMP_LIMIT)
+  for (i = 0; i < (long)verts; i++) {
+    temp += dot_v3v3(fLongVectorA[i], fLongVectorB[i]);
+  }
+  return temp;
+}
+/* A = B + C  --> for big vector */
+DO_INLINE void add_lfvector_lfvector(float (*to)[3],
+                                     float (*fLongVectorA)[3],
+                                     float (*fLongVectorB)[3],
+                                     unsigned int verts)
+{
+  unsigned int i = 0;
+
+  for (i = 0; i < verts; i++) {
+    add_v3_v3v3(to[i], fLongVectorA[i], fLongVectorB[i]);
+  }
+}
+/* A = B + C * float --> for big vector */
+DO_INLINE void add_lfvector_lfvectorS(float (*to)[3],
+                                      float (*fLongVectorA)[3],
+                                      float (*fLongVectorB)[3],
+                                      float bS,
+                                      unsigned int verts)
+{
+  unsigned int i = 0;
+
+  for (i = 0; i < verts; i++) {
+    VECADDS(to[i], fLongVectorA[i], fLongVectorB[i], bS);
+  }
+}
+/* A = B * float + C * float --> for big vector */
+DO_INLINE void add_lfvectorS_lfvectorS(float (*to)[3],
+                                       float (*fLongVectorA)[3],
+                                       float aS,
+                                       float (*fLongVectorB)[3],
+                                       float bS,
+                                       unsigned int verts)
+{
+  unsigned int i = 0;
+
+  for (i = 0; i < verts; i++) {
+    VECADDSS(to[i], fLongVectorA[i], aS, fLongVectorB[i], bS);
+  }
+}
+/* A = B - C * float --> for big vector */
+DO_INLINE void sub_lfvector_lfvectorS(float (*to)[3],
+                                      float (*fLongVectorA)[3],
+                                      float (*fLongVectorB)[3],
+                                      float bS,
+                                      unsigned int verts)
+{
+  unsigned int i = 0;
+  for (i = 0; i < verts; i++) {
+    VECSUBS(to[i], fLongVectorA[i], fLongVectorB[i], bS);
+  }
+}
+/* A = B - C --> for big vector */
+DO_INLINE void sub_lfvector_lfvector(float (*to)[3],
+                                     float (*fLongVectorA)[3],
+                                     float (*fLongVectorB)[3],
+                                     unsigned int verts)
+{
+  unsigned int i = 0;
+
+  for (i = 0; i < verts; i++) {
+    sub_v3_v3v3(to[i], fLongVectorA[i], fLongVectorB[i]);
+  }
+}
+///////////////////////////
+// 3x3 matrix
+///////////////////////////
+#  if 0
+/* printf 3x3 matrix on console: for debug output */
+static void print_fmatrix(float m3[3][3])
+{
+  printf("%f\t%f\t%f\n", m3[0][0], m3[0][1], m3[0][2]);
+  printf("%f\t%f\t%f\n", m3[1][0], m3[1][1], m3[1][2]);
+  printf("%f\t%f\t%f\n\n", m3[2][0], m3[2][1], m3[2][2]);
+}
+
+static void print_sparse_matrix(fmatrix3x3 *m)
+{
+  if (m) {
+    unsigned int i;
+    for (i = 0; i < m[0].vcount + m[0].scount; i++) {
+      printf("%d:\n", i);
+      print_fmatrix(m[i].m);
+    }
+  }
+}
+#  endif
+
+#  if 0
+static void print_lvector(lfVector *v, int numverts)
+{
+  int i;
+  for (i = 0; i < numverts; i++) {
+    if (i > 0) {
+      printf("\n");
+    }
+
+    printf("%f,\n", v[i][0]);
+    printf("%f,\n", v[i][1]);
+    printf("%f,\n", v[i][2]);
+  }
+}
+#  endif
+
+#  if 0
+static void print_bfmatrix(fmatrix3x3 *m)
+{
+  int tot = m[0].vcount + m[0].scount;
+  int size = m[0].vcount * 3;
+  float *t = MEM_callocN(sizeof(float) * size * size, "bfmatrix");
+  int q, i, j;
+
+  for (q = 0; q < tot; q++) {
+    int k = 3 * m[q].r;
+    int l = 3 * m[q].c;
+
+    for (j = 0; j < 3; j++) {
+      for (i = 0; i < 3; i++) {
+        //              if (t[k + i + (l + j) * size] != 0.0f) {
+        //                  printf("warning: overwriting value at %d, %d\n", m[q].r, m[q].c);
+        //              }
+        if (k == l) {
+          t[k + i + (k + j) * size] += m[q].m[i][j];
+        }
+        else {
+          t[k + i + (l + j) * size] += m[q].m[i][j];
+          t[l + j + (k + i) * size] += m[q].m[j][i];
+        }
+      }
+    }
+  }
+
+  for (j = 0; j < size; j++) {
+    if (j > 0 && j % 3 == 0) {
+      printf("\n");
+    }
+
+    for (i = 0; i < size; i++) {
+      if (i > 0 && i % 3 == 0) {
+        printf("  ");
+      }
+
+      implicit_print_matrix_elem(t[i + j * size]);
+    }
+    printf("\n");
+  }
+
+  MEM_freeN(t);
+}
+#  endif
+
+/* copy 3x3 matrix */
+DO_INLINE void cp_fmatrix(float to[3][3], float from[3][3])
+{
+  // memcpy(to, from, sizeof (float) * 9);
+  copy_v3_v3(to[0], from[0]);
+  copy_v3_v3(to[1], from[1]);
+  copy_v3_v3(to[2], from[2]);
+}
+
+/* copy 3x3 matrix */
+DO_INLINE void initdiag_fmatrixS(float to[3][3], float aS)
+{
+  cp_fmatrix(to, ZERO);
+
+  to[0][0] = aS;
+  to[1][1] = aS;
+  to[2][2] = aS;
+}
+
+#  if 0
+/* calculate determinant of 3x3 matrix */
+DO_INLINE float det_fmatrix(float m[3][3])
+{
+  return m[0][0] * m[1][1] * m[2][2] + m[1][0] * m[2][1] * m[0][2] + m[0][1] * m[1][2] * m[2][0] -
+         m[0][0] * m[1][2] * m[2][1] - m[0][1] * m[1][0] * m[2][2] - m[2][0] * m[1][1] * m[0][2];
+}
+
+DO_INLINE void inverse_fmatrix(float to[3][3], float from[3][3])
+{
+  unsigned int i, j;
+  float d;
+
+  if ((d = det_fmatrix(from)) == 0) {
+    printf("can't build inverse");
+    exit(0);
+  }
+  for (i = 0; i < 3; i++) {
+    for (j = 0; j < 3; j++) {
+      int i1 = (i + 1) % 3;
+      int i2 = (i + 2) % 3;
+      int j1 = (j + 1) % 3;
+      int j2 = (j + 2) % 3;
+      /** Reverse indexes i&j to take transpose. */
+      to[j][i] = (from[i1][j1] * from[i2][j2] - from[i1][j2] * from[i2][j1]) / d;
+      /**
+       * <pre>
+       * if (i == j) {
+       *     to[i][j] = 1.0f / from[i][j];
+       * }
+       * else {
+       *     to[i][j] = 0;
+       * }
+       * </pre>
+       */
+    }
+  }
+}
+#  endif
+
+/* 3x3 matrix multiplied by a scalar */
+/* STATUS: verified */
+DO_INLINE void mul_fmatrix_S(float matrix[3][3], float scalar)
+{
+  mul_fvector_S(matrix[0], matrix[0], scalar);
+  mul_fvector_S(matrix[1], matrix[1], scalar);
+  mul_fvector_S(matrix[2], matrix[2], scalar);
+}
+
+/* a vector multiplied by a 3x3 matrix */
+/* STATUS: verified */
+DO_INLINE void mul_fvector_fmatrix(float *to, const float *from, float matrix[3][3])
+{
+  to[0] = matrix[0][0] * from[0] + matrix[1][0] * from[1] + matrix[2][0] * from[2];
+  to[1] = matrix[0][1] * from[0] + matrix[1][1] * from[1] + matrix[2][1] * from[2];
+  to[2] = matrix[0][2] * from[0] + matrix[1][2] * from[1] + matrix[2][2] * from[2];
+}
+
+/* 3x3 matrix multiplied by a vector */
+/* STATUS: verified */
+DO_INLINE void mul_fmatrix_fvector(float *to, float matrix[3][3], float from[3])
+{
+  to[0] = dot_v3v3(matrix[0], from);
+  to[1] = dot_v3v3(matrix[1], from);
+  to[2] = dot_v3v3(matrix[2], from);
+}
+/* 3x3 matrix addition with 3x3 matrix */
+DO_INLINE void add_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
+{
+  add_v3_v3v3(to[0], matrixA[0], matrixB[0]);
+  add_v3_v3v3(to[1], matrixA[1], matrixB[1]);
+  add_v3_v3v3(to[2], matrixA[2], matrixB[2]);
+}
+/* A -= B*x + C*y (3x3 matrix sub-addition with 3x3 matrix) */
+DO_INLINE void subadd_fmatrixS_fmatrixS(
+    float to[3][3], float matrixA[3][3], float aS, float matrixB[3][3], float bS)
+{
+  VECSUBADDSS(to[0], matrixA[0], aS, matrixB[0], bS);
+  VECSUBADDSS(to[1], matrixA[1], aS, matrixB[1], bS);
+  VECSUBADDSS(to[2], matrixA[2], aS, matrixB[2], bS);
+}
+/* A = B - C (3x3 matrix subtraction with 3x3 matrix) */
+DO_INLINE void sub_fmatrix_fmatrix(float to[3][3], float matrixA[3][3], float matrixB[3][3])
+{
+  sub_v3_v3v3(to[0], matrixA[0], matrixB[0]);
+  sub_v3_v3v3(to[1], matrixA[1], matrixB[1]);
+  sub_v3_v3v3(to[2], matrixA[2], matrixB[2]);
+}
+/////////////////////////////////////////////////////////////////
+// special functions
+/////////////////////////////////////////////////////////////////
+/* 3x3 matrix multiplied+added by a vector */
+/* STATUS: verified */
+DO_INLINE void muladd_fmatrix_fvector(float to[3], float matrix[3][3], float from[3])
+{
+  to[0] += dot_v3v3(matrix[0], from);
+  to[1] += dot_v3v3(matrix[1], from);
+  to[2] += dot_v3v3(matrix[2], from);
+}
+
+DO_INLINE void muladd_fmatrixT_fvector(float to[3], float matrix[3][3], const float from[3])
+{
+  to[0] += matrix[0][0] * from[0] + matrix[1][0] * from[1] + matrix[2][0] * from[2];
+  to[1] += matrix[0][1] * from[0] + matrix[1][1] * from[1] + matrix[2][1] * from[2];
+  to[2] += matrix[0][2] * from[0] + matrix[1][2] * from[1] + matrix[2][2] * from[2];
+}
+
+BLI_INLINE void outerproduct(float r[3][3], const float a[3], const float b[3])
+{
+  mul_v3_v3fl(r[0], a, b[0]);
+  mul_v3_v3fl(r[1], a, b[1]);
+  mul_v3_v3fl(r[2], a, b[2]);
+}
+
+BLI_INLINE void cross_m3_v3m3(float r[3][3], const float v[3], float m[3][3])
+{
+  cross_v3_v3v3(r[0], v, m[0]);
+  cross_v3_v3v3(r[1], v, m[1]);
+  cross_v3_v3v3(r[2], v, m[2]);
+}
+
+BLI_INLINE void cross_v3_identity(float r[3][3], const float v[3])
+{
+  r[0][0] = 0.0f;
+  r[1][0] = v[2];
+  r[2][0] = -v[1];
+  r[0][1] = -v[2];
+  r[1][1] = 0.0f;
+  r[2][1] = v[0];
+  r[0][2] = v[1];
+  r[1][2] = -v[0];
+  r[2][2] = 0.0f;
+}
+
+BLI_INLINE void madd_m3_m3fl(float r[3][3], float m[3][3], float f)
+{
+  r[0][0] += m[0][0] * f;
+  r[0][1] += m[0][1] * f;
+  r[0][2] += m[0][2] * f;
+  r[1][0] += m[1][0] * f;
+  r[1][1] += m[1][1] * f;
+  r[1][2] += m[1][2] * f;
+  r[2][0] += m[2][0] * f;
+  r[2][1] += m[2][1] * f;
+  r[2][2] += m[2][2] * f;
+}
+
+/////////////////////////////////////////////////////////////////
+
+///////////////////////////
+// SPARSE SYMMETRIC big matrix with 3x3 matrix entries
+///////////////////////////
+/* printf a big matrix on console: for debug output */
+#  if 0
+static void print_bfmatrix(fmatrix3x3 *m3)
+{
+  unsigned int i = 0;
+
+  for (i = 0; i < m3[0].vcount + m3[0].scount; i++) {
+    print_fmatrix(m3[i].m);
+  }
+}
+#  endif
+
+BLI_INLINE void init_fmatrix(fmatrix3x3 *matrix, int r, int c)
+{
+  matrix->r = r;
+  matrix->c = c;
+}
+
+/* create big matrix */
+DO_INLINE fmatrix3x3 *create_bfmatrix(unsigned int verts, unsigned int springs)
+{
+  // TODO: check if memory allocation was successful */
+  fmatrix3x3 *temp = (fmatrix3x3 *)MEM_callocN(sizeof(fmatrix3x3) * (verts + springs),
+                                               "cloth_implicit_alloc_matrix");
+  int i;
+
+  temp[0].vcount = verts;
+  temp[0].scount = springs;
+
+  /* vertex part of the matrix is diagonal blocks */
+  for (i = 0; i < verts; i++) {
+    init_fmatrix(temp + i, i, i);
+  }
+
+  return temp;
+}
+/* delete big matrix */
+DO_INLINE void del_bfmatrix(fmatrix3x3 *matrix)
+{
+  if (matrix != NULL) {
+    MEM_freeN(matrix);
+  }
+}
+
+/* copy big matrix */
+DO_INLINE void cp_bfmatrix(fmatrix3x3 *to, fmatrix3x3 *from)
+{
+  // TODO bounds checking
+  memcpy(to, from, sizeof(fmatrix3x3) * (from[0].vcount + from[0].scount));
+}
+
+/* init big matrix */
+// slow in parallel
+DO_INLINE void init_bfmatrix(fmatrix3x3 *matrix, float m3[3][3])
+{
+  unsigned int i;
+
+  for (i = 0; i < matrix[0].vcount + matrix[0].scount; i++) {
+    cp_fmatrix(matrix[i].m, m3);
+  }
+}
+
+/* init the diagonal of big matrix */
+// slow in parallel
+DO_INLINE void initdiag_bfmatrix(fmatrix3x3 *matrix, float m3[3][3])
+{
+  unsigned int i, j;
+  float tmatrix[3][3] = {{0, 0, 0}, {0, 0, 0}, {0, 0, 0}};
+
+  for (i = 0; i < matrix[0].vcount; i++) {
+    cp_fmatrix(matrix[i].m, m3);
+  }
+  for (j = matrix[0].vcount; j < matrix[0].vcount + matrix[0].scount; j++) {
+    cp_fmatrix(matrix[j].m, tmatrix);
+  }
+}
+
+/* SPARSE SYMMETRIC multiply big matrix with long vector*/
+/* STATUS: verified */
+DO_INLINE void mul_bfmatrix_lfvector(float (*to)[3], fmatrix3x3 *from, lfVector *fLongVector)
+{
+  unsigned int vcount = from[0].vcount;
+  lfVector *temp = create_lfvector(vcount);
+
+  zero_lfvector(to, vcount);
+
+#  pragma omp parallel sections if (vcount > CLOTH_OPENMP_LIMIT)
+  {
+#  pragma omp section
+    {
+      for (unsigned int i = from[0].vcount; i < from[0].vcount + from[0].scount; i++) {
+        /* This is the lower triangle of the sparse matrix,
+         * therefore multiplication occurs with transposed submatrices. */
+        muladd_fmatrixT_fvector(to[from[i].c], from[i].m, fLongVector[from[i].r]);
+      }
+    }
+#  pragma omp section
+    {
+      for (unsigned int i = 0; i < from[0].vcount + from[0].scount; i++) {
+        muladd_fmatrix_fvector(temp[from[i].r], from[i].m, fLongVector[from[i].c]);
+      }
+    }
+  }
+  add_lfvector_lfvector(to, to, temp, from[0].vcount);
+
+  del_lfvector(temp);
+}
+
+/* SPARSE SYMMETRIC sub big matrix with big matrix*/
+/* A -= B * float + C * float --> for big matrix */
+/* VERIFIED */
+DO_INLINE void subadd_bfmatrixS_bfmatrixS(
+    fmatrix3x3 *to, fmatrix3x3 *from, float aS, fmatrix3x3 *matrix, float bS)
+{
+  unsigned int i = 0;
+
+  /* process diagonal elements */
+  for (i = 0; i < matrix[0].vcount + matrix[0].scount; i++) {
+    subadd_fmatrixS_fmatrixS(to[i].m, from[i].m, aS, matrix[i].m, bS);
+  }
+}
+
+///////////////////////////////////////////////////////////////////
+// simulator start
+///////////////////////////////////////////////////////////////////
+
+typedef struct Implicit_Data {
+  /* inputs */
+  fmatrix3x3 *bigI;        /* identity (constant) */
+  fmatrix3x3 *tfm;         /* local coordinate transform */
+  fmatrix3x3 *M;           /* masses */
+  lfVector *F;             /* forces */
+  fmatrix3x3 *dFdV, *dFdX; /* force jacobians */
+  int num_blocks;          /* number of off-diagonal blocks (springs) */
+
+  /* motion state data */
+  lfVector *X, *Xnew; /* positions */
+  lfVector *V, *Vnew; /* velocities */
+
+  /* internal solver data */
+  lfVector *B;   /* B for A*dV = B */
+  fmatrix3x3 *A; /* A for A*dV = B */
+
+  lfVector *dV;         /* velocity change (solution of A*dV = B) */
+  lfVector *z;          /* target velocity in constrained directions */
+  fmatrix3x3 *S;        /* filtering matrix for constraints */
+  fmatrix3x3 *P, *Pinv; /* pre-conditioning matrix */
+} Implicit_Data;
+
+Implicit_Data *BPH_mass_spring_solver_create(int numverts, int numsprings)
+{
+  Implicit_Data *id = (Implicit_Data *)MEM_callocN(sizeof(Implicit_Data), "implicit vecmat");
+
+  /* process diagonal elements */
+  id->tfm = create_bfmatrix(numverts, 0);
+  id->A = create_bfmatrix(numverts, numsprings);
+  id->dFdV = create_bfmatrix(numverts, numsprings);
+  id->dFdX = create_bfmatrix(numverts, numsprings);
+  id->S = create_bfmatrix(numverts, 0);
+  id->Pinv = create_bfmatrix(numverts, numsprings);
+  id->P = create_bfmatrix(numverts, numsprings);
+  id->bigI = create_bfmatrix(numverts, numsprings);  // TODO 0 springs
+  id->M = create_bfmatrix(numverts, numsprings);
+  id->X = create_lfvector(numverts);
+  id->Xnew = create_lfvector(numverts);
+  id->V = create_lfvector(numverts);
+  id->Vnew = create_lfvector(numverts);
+  id->F = create_lfvector(numverts);
+  id->B = create_lfvector(numverts);
+  id->dV = create_lfvector(numverts);
+  id->z = create_lfvector(numverts);
+
+  initdiag_bfmatrix(id->bigI, I);
+
+  return id;
+}
+
+void BPH_mass_spring_solver_free(Implicit_Data *id)
+{
+  del_bfmatrix(id->tfm);
+  del_bfmatrix(id->A);
+  del_bfmatrix(id->dFdV);
+  del_bfmatrix(id->dFdX);
+  del_bfmatrix(id->S);
+  del_bfmatrix(id->P);
+  del_bfmatrix(id->Pinv);
+  del_bfmatrix(id->bigI);
+  del_bfmatrix(id->M);
+
+  del_lfvector(id->X);
+  del_lfvector(id->Xnew);
+  del_lfvector(id->V);
+  del_lfvector(id->Vnew);
+  del_lfvector(id->F);
+  del_lfvector(id->B);
+  del_lfvector(id->dV);
+  del_lfvector(id->z);
+
+  MEM_freeN(id);
+}
+
+/* ==== Transformation from/to root reference frames ==== */
+
+BLI_INLINE void world_to_root_v3(Implicit_Data *data, int index, float r[3], const float v[3])
+{
+  copy_v3_v3(r, v);
+  mul_transposed_m3_v3(data->tfm[index].m, r);
+}
+
+BLI_INLINE void root_to_world_v3(Implicit_Data *data, int index, float r[3], const float v[3])
+{
+  mul_v3_m3v3(r, data->tfm[index].m, v);
+}
+
+BLI_INLINE void world_to_root_m3(Implicit_Data *data, int index, float r[3][3], float m[3][3])
+{
+  float trot[3][3];
+  copy_m3_m3(trot, data->tfm[index].m);
+  transpose_m3(trot);
+  mul_m3_m3m3(r, trot, m);
+}
+
+BLI_INLINE void root_to_world_m3(Implicit_Data *data, int index, float r[3][3], float m[3][3])
+{
+  mul_m3_m3m3(r, data->tfm[index].m, m);
+}
+
+/* ================================ */
+
+DO_INLINE void filter(lfVector *V, fmatrix3x3 *S)
+{
+  unsigned int i = 0;
+
+  for (i = 0; i < S[0].vcount; i++) {
+    mul_m3_v3(S[i].m, V[S[i].r]);
+  }
+}
+
+/* this version of the CG algorithm does not work very well with partial constraints
+ * (where S has non-zero elements). */
+#  if 0
+static int cg_filtered(lfVector *ldV, fmatrix3x3 *lA, lfVector *lB, lfVector *z, fmatrix3x3 *S)
+{
+  // Solves for unknown X in equation AX=B
+  unsigned int conjgrad_loopcount = 0, conjgrad_looplimit = 100;
+  float conjgrad_epsilon = 0.0001f /* , conjgrad_lasterror=0 */ /* UNUSED */;
+  lfVector *q, *d, *tmp, *r;
+  float s, starget, a, s_prev;
+  unsigned int numverts = lA[0].vcount;
+  q = create_lfvector(numverts);
+  d = create_lfvector(numverts);
+  tmp = create_lfvector(numverts);
+  r = create_lfvector(numverts);
+
+  // zero_lfvector(ldV, CLOTHPARTICLES);
+  filter(ldV, S);
+
+  add_lfvector_lfvector(ldV, ldV, z, numverts);
+
+  // r = B - Mul(tmp, A, X);    // just use B if X known to be zero
+  cp_lfvector(r, lB, numverts);
+  mul_bfmatrix_lfvector(tmp, lA, ldV);
+  sub_lfvector_lfvector(r, r, tmp, numverts);
+
+  filter(r, S);
+
+  cp_lfvector(d, r, numverts);
+
+  s = dot_lfvector(r, r, numverts);
+  starget = s * sqrtf(conjgrad_epsilon);
+
+  while (s > starget && conjgrad_loopcount < conjgrad_looplimit) {
+    // Mul(q, A, d); // q = A*d;
+    mul_bfmatrix_lfvector(q, lA, d);
+
+    filter(q, S);
+
+    a = s / dot_lfvector(d, q, numverts);
+
+    // X = X + d*a;
+    add_lfvector_lfvectorS(ldV, ldV, d, a, numverts);
+
+    // r = r - q*a;
+    sub_lfvector_lfvectorS(r, r, q, a, numverts);
+
+    s_prev = s;
+    s = dot_lfvector(r, r, numverts);
+
+    //d = r+d*(s/s_prev);
+    add_lfvector_lfvectorS(d, r, d, (s / s_prev), numverts);
+
+    filter(d, S);
+
+    conjgrad_loopcount++;
+  }
+  /* conjgrad_lasterror = s; */ /* UNUSED */
+
+  del_lfvector(q);
+  del_lfvector(d);
+  del_lfvector(tmp);
+  del_lfvector(r);
+  // printf("W/O conjgrad_loopcount: %d\n", conjgrad_loopcount);
+
+  return conjgrad_loopcount <
+         conjgrad_looplimit;  // true means we reached desired accuracy in given time - ie stable
+}
+#  endif
+
+static int cg_filtered(lfVector *ldV,
+                       fmatrix3x3 *lA,
+                       lfVector *lB,
+                       lfVector *z,
+                       fmatrix3x3 *S,
+                       ImplicitSolverResult *result)
+{
+  // Solves for unknown X in equation AX=B
+  unsigned int conjgrad_loopcount = 0, conjgrad_looplimit = 100;
+  float conjgrad_epsilon = 0.01f;
+
+  unsigned int numverts = lA[0].vcount;
+  lfVector *fB = create_lfvector(numverts);
+  lfVector *AdV = create_lfvector(numverts);
+  lfVector *r = create_lfvector(numverts);
+  lfVector *c = create_lfvector(numverts);
+  lfVector *q = create_lfvector(numverts);
+  lfVector *s = create_lfvector(numverts);
+  float bnorm2, delta_new, delta_old, delta_target, alpha;
+
+  cp_lfvector(ldV, z, numverts);
+
+  /* d0 = filter(B)^T * P * filter(B) */
+  cp_lfvector(fB, lB, numverts);
+  filter(fB, S);
+  bnorm2 = dot_lfvector(fB, fB, numverts);
+  delta_target = conjgrad_epsilon * conjgrad_epsilon * bnorm2;
+
+  /* r = filter(B - A * dV) */
+  mul_bfmatrix_lfvector(AdV, lA, ldV);
+  sub_lfvector_lfvector(r, lB, AdV, numverts);
+  filter(r, S);
+
+  /* c = filter(P^-1 * r) */
+  cp_lfvector(c, r, numverts);
+  filter(c, S);
+
+  /* delta = r^T * c */
+  delta_new = dot_lfvector(r, c, numverts);
+
+#  ifdef IMPLICIT_PRINT_SOLVER_INPUT_OUTPUT
+  printf("==== A ====\n");
+  print_bfmatrix(lA);
+  printf("==== z ====\n");
+  print_lvector(z, numverts);
+  printf("==== B ====\n");
+  print_lvector(lB, numverts);
+  printf("==== S ====\n");
+  print_bfmatrix(S);
+#  endif
+
+  while (delta_new > delta_target && conjgrad_loopcount < conjgrad_looplimit) {
+    mul_bfmatrix_lfvector(q, lA, c);
+    filter(q, S);
+
+    alpha = delta_new / dot_lfvector(c, q, numverts);
+
+    add_lfvector_lfvectorS(ldV, ldV, c, alpha, numverts);
+
+    add_lfvector_lfvectorS(r, r, q, -alpha, numverts);
+
+    /* s = P^-1 * r */
+    cp_lfvector(s, r, numverts);
+    delta_old = delta_new;
+    delta_new = dot_lfvector(r, s, numverts);
+
+    add_lfvector_lfvectorS(c, s, c, delta_new / delta_old, numverts);
+    filter(c, S);
+
+    conjgrad_loopcount++;
+  }
+
+#  ifdef IMPLICIT_PRINT_SOLVER_INPUT_OUTPUT
+  printf("==== dV ====\n");
+  print_lvector(ldV, numverts);
+  printf("========\n");
+#  endif
+
+  del_lfvector(fB);
+  del_lfvector(AdV);
+  del_lfvector(r);
+  del_lfvector(c);
+  del_lfvector(q);
+  del_lfvector(s);
+  // printf("W/O conjgrad_loopcount: %d\n", conjgrad_loopcount);
+
+  result->status = conjgrad_loopcount < conjgrad_looplimit ? BPH_SOLVER_SUCCESS :
+                                                             BPH_SOLVER_NO_CONVERGENCE;
+  result->iterations = conjgrad_loopcount;
+  result->error = bnorm2 > 0.0f ? sqrtf(delta_new / bnorm2) : 0.0f;
+
+  return conjgrad_loopcount <
+         conjgrad_looplimit;  // true means we reached desired accuracy in given time - ie stable
+}
+
+#  if 0
+// block diagonalizer
+DO_INLINE void BuildPPinv(fmatrix3x3 *lA, fmatrix3x3 *P, fmatrix3x3 *Pinv)
+{
+  unsigned int i = 0;
+
+  // Take only the diagonal blocks of A
+  // #pragma omp parallel for private(i) if (lA[0].vcount > CLOTH_OPENMP_LIMIT)
+  for (i = 0; i < lA[0].vcount; i++) {
+    // block diagonalizer
+    cp_fmatrix(P[i].m, lA[i].m);
+    inverse_fmatrix(Pinv[i].m, P[i].m);
+  }
+}
+
+#    if 0
+// version 1.3
+static int cg_filtered_pre(lfVector *dv,
+                           fmatrix3x3 *lA,
+                           lfVector *lB,
+                           lfVector *z,
+                           fmatrix3x3 *S,
+                           fmatrix3x3 *P,
+                           fmatrix3x3 *Pinv)
+{
+  unsigned int numverts = lA[0].vcount, iterations = 0, conjgrad_looplimit = 100;
+  float delta0 = 0, deltaNew = 0, deltaOld = 0, alpha = 0;
+  float conjgrad_epsilon = 0.0001;  // 0.2 is dt for steps=5
+  lfVector *r = create_lfvector(numverts);
+  lfVector *p = create_lfvector(numverts);
+  lfVector *s = create_lfvector(numverts);
+  lfVector *h = create_lfvector(numverts);
+
+  BuildPPinv(lA, P, Pinv);
+
+  filter(dv, S);
+  add_lfvector_lfvector(dv, dv, z, numverts);
+
+  mul_bfmatrix_lfvector(r, lA, dv);
+  sub_lfvector_lfvector(r, lB, r, numverts);
+  filter(r, S);
+
+  mul_prevfmatrix_lfvector(p, Pinv, r);
+  filter(p, S);
+
+  deltaNew = dot_lfvector(r, p, numverts);
+
+  delta0 = deltaNew * sqrt(conjgrad_epsilon);
+
+#      ifdef DEBUG_TIME
+  double start = PIL_check_seconds_timer();
+#      endif
+
+  while ((deltaNew > delta0) && (iterations < conjgrad_looplimit)) {
+    iterations++;
+
+    mul_bfmatrix_lfvector(s, lA, p);
+    filter(s, S);
+
+    alpha = deltaNew / dot_lfvector(p, s, numverts);
+
+    add_lfvector_lfvectorS(dv, dv, p, alpha, numverts);
+
+    add_lfvector_lfvectorS(r, r, s, -alpha, numverts);
+
+    mul_prevfmatrix_lfvector(h, Pinv, r);
+    filter(h, S);
+
+    deltaOld = deltaNew;
+
+    deltaNew = dot_lfvector(r, h, numverts);
+
+    add_lfvector_lfvectorS(p, h, p, deltaNew / deltaOld, numverts);
+
+    filter(p, S);
+  }
+
+#      ifdef DEBUG_TIME
+  double end = PIL_check_seconds_timer();
+  printf("cg_filtered_pre time: %f\n", (float)(end - start));
+#      endif
+
+  del_lfvector(h);
+  del_lfvector(s);
+  del_lfvector(p);
+  del_lfvector(r);
+
+  printf("iterations: %d\n", iterations);
+
+  return iterations < conjgrad_looplimit;
+}
+#    endif
+
+// version 1.4
+static int cg_filtered_pre(lfVector *dv,
+                           fmatrix3x3 *lA,
+                           lfVector *lB,
+                           lfVector *z,
+                           fmatrix3x3 *S,
+                           fmatrix3x3 *P,
+                           fmatrix3x3 *Pinv,
+                           fmatrix3x3 *bigI)
+{
+  unsigned int numverts = lA[0].vcount, iterations = 0, conjgrad_looplimit = 100;
+  float delta0 = 0, deltaNew = 0, deltaOld = 0, alpha = 0, tol = 0;
+  lfVector *r = create_lfvector(numverts);
+  lfVector *p = create_lfvector(numverts);
+  lfVector *s = create_lfvector(numverts);
+  lfVector *h = create_lfvector(numverts);
+  lfVector *bhat = create_lfvector(numverts);
+  lfVector *btemp = create_lfvector(numverts);
+
+  BuildPPinv(lA, P, Pinv);
+
+  initdiag_bfmatrix(bigI, I);
+  sub_bfmatrix_Smatrix(bigI, bigI, S);
+
+  // x = Sx_0+(I-S)z
+  filter(dv, S);
+  add_lfvector_lfvector(dv, dv, z, numverts);
+
+  // b_hat = S(b-A(I-S)z)
+  mul_bfmatrix_lfvector(r, lA, z);
+  mul_bfmatrix_lfvector(bhat, bigI, r);
+  sub_lfvector_lfvector(bhat, lB, bhat, numverts);
+
+  // r = S(b-Ax)
+  mul_bfmatrix_lfvector(r, lA, dv);
+  sub_lfvector_lfvector(r, lB, r, numverts);
+  filter(r, S);
+
+  // p = SP^-1r
+  mul_prevfmatrix_lfvector(p, Pinv, r);
+  filter(p, S);
+
+  // delta0 = bhat^TP^-1bhat
+  mul_prevfmatrix_lfvector(btemp, Pinv, bhat);
+  delta0 = dot_lfvector(bhat, btemp, numverts);
+
+  // deltaNew = r^TP
+  deltaNew = dot_lfvector(r, p, numverts);
+
+#    if 0
+  filter(dv, S);
+  add_lfvector_lfvector(dv, dv, z, numverts);
+
+  mul_bfmatrix_lfvector(r, lA, dv);
+  sub_lfvector_lfvector(r, lB, r, numverts);
+  filter(r, S);
+
+  mul_prevfmatrix_lfvector(p, Pinv, r);
+  filter(p, S);
+
+  deltaNew = dot_lfvector(r, p, numverts);
+
+  delta0 = deltaNew * sqrt(conjgrad_epsilon);
+#    endif
+
+#    ifdef DEBUG_TIME
+  double start = PIL_check_seconds_timer();
+#    endif
+
+  tol = (0.01 * 0.2);
+
+  while ((deltaNew > delta0 * tol * tol) && (iterations < conjgrad_looplimit)) {
+    iterations++;
+
+    mul_bfmatrix_lfvector(s, lA, p);
+    filter(s, S);
+
+    alpha = deltaNew / dot_lfvector(p, s, numverts);
+
+    add_lfvector_lfvectorS(dv, dv, p, alpha, numverts);
+
+    add_lfvector_lfvectorS(r, r, s, -alpha, numverts);
+
+    mul_prevfmatrix_lfvector(h, Pinv, r);
+    filter(h, S);
+
+    deltaOld = deltaNew;
+
+    deltaNew = dot_lfvector(r, h, numverts);
+
+    add_lfvector_lfvectorS(p, h, p, deltaNew / deltaOld, numverts);
+
+    filter(p, S);
+  }
+
+#    ifdef DEBUG_TIME
+  double end = PIL_check_seconds_timer();
+  printf("cg_filtered_pre time: %f\n", (float)(end - start));
+#    endif
+
+  del_lfvector(btemp);
+  del_lfvector(bhat);
+  del_lfvector(h);
+  del_lfvector(s);
+  del_lfvector(p);
+  del_lfvector(r);
+
+  // printf("iterations: %d\n", iterations);
+
+  return iterations < conjgrad_looplimit;
+}
+#  endif
+
+bool BPH_mass_spring_solve_velocities(Implicit_Data *data, float dt, ImplicitSolverResult *result)
+{
+  unsigned int numverts = data->dFdV[0].vcount;
+
+  lfVector *dFdXmV = create_lfvector(numverts);
+  zero_lfvector(data->dV, numverts);
+
+  cp_bfmatrix(data->A, data->M);
+
+  subadd_bfmatrixS_bfmatrixS(data->A, data->dFdV, dt, data->dFdX, (dt * dt));
+
+  mul_bfmatrix_lfvector(dFdXmV, data->dFdX, data->V);
+
+  add_lfvectorS_lfvectorS(data->B, data->F, dt, dFdXmV, (dt * dt), numverts);
+
+#  ifdef DEBUG_TIME
+  double start = PIL_check_seconds_timer();
+#  endif
+
+  /* Conjugate gradient algorithm to solve Ax=b. */
+  cg_filtered(data->dV, data->A, data->B, data->z, data->S, result);
+
+  // cg_filtered_pre(id->dV, id->A, id->B, id->z, id->S, id->P, id->Pinv, id->bigI);
+
+#  ifdef DEBUG_TIME
+  double end = PIL_check_seconds_timer();
+  printf("cg_filtered calc time: %f\n", (float)(end - start));
+#  endif
+
+  // advance velocities
+  add_lfvector_lfvector(data->Vnew, data->V, data->dV, numverts);
+
+  del_lfvector(dFdXmV);
+
+  return result->status == BPH_SOLVER_SUCCESS;
+}
+
+bool BPH_mass_spring_solve_positions(Implicit_Data *data, float dt)
+{
+  int numverts = data->M[0].vcount;
+
+  // advance positions
+  add_lfvector_lfvectorS(data->Xnew, data->X, data->Vnew, dt, numverts);
+
+  return true;
+}
+
+void BPH_mass_spring_apply_result(Implicit_Data *data)
+{
+  int numverts = data->M[0].vcount;
+  cp_lfvector(data->X, data->Xnew, numverts);
+  cp_lfvector(data->V, data->Vnew, numverts);
+}
+
+void BPH_mass_spring_set_vertex_mass(Implicit_Data *data, int index, float mass)
+{
+  unit_m3(data->M[index].m);
+  mul_m3_fl(data->M[index].m, mass);
+}
+
+void BPH_mass_spring_set_rest_transform(Implicit_Data *data, int index, float tfm[3][3])
+{
+#  ifdef CLOTH_ROOT_FRAME
+  copy_m3_m3(data->tfm[index].m, tfm);
+#  else
+  unit_m3(data->tfm[index].m);
+  (void)tfm;
+#  endif
+}
+
+void BPH_mass_spring_set_motion_state(Implicit_Data *data,
+                                      int index,
+                                      const float x[3],
+                                      const float v[3])
+{
+  world_to_root_v3(data, index, data->X[index], x);
+  world_to_root_v3(data, index, data->V[index], v);
+}
+
+void BPH_mass_spring_set_position(Implicit_Data *data, int index, const float x[3])
+{
+  world_to_root_v3(data, index, data->X[index], x);
+}
+
+void BPH_mass_spring_set_velocity(Implicit_Data *data, int index, const float v[3])
+{
+  world_to_root_v3(data, index, data->V[index], v);
+}
+
+void BPH_mass_spring_get_motion_state(struct Implicit_Data *data,
+                                      int index,
+                                      float x[3],
+                                      float v[3])
+{
+  if (x) {
+    root_to_world_v3(data, index, x, data->X[index]);
+  }
+  if (v) {
+    root_to_world_v3(data, index, v, data->V[index]);
+  }
+}
+
+void BPH_mass_spring_get_position(struct Implicit_Data *data, int index, float x[3])
+{
+  root_to_world_v3(data, index, x, data->X[index]);
+}
+
+void BPH_mass_spring_get_velocity(struct Implicit_Data *data, int index, float v[3])
+{
+  root_to_world_v3(data, index, v, data->V[index]);
+}
+
+void BPH_mass_spring_get_new_position(struct Implicit_Data *data, int index, float x[3])
+{
+  root_to_world_v3(data, index, x, data->Xnew[index]);
+}
+
+void BPH_mass_spring_set_new_position(struct Implicit_Data *data, int index, const float x[3])
+{
+  world_to_root_v3(data, index, data->Xnew[index], x);
+}
+
+void BPH_mass_spring_get_new_velocity(struct Implicit_Data *data, int index, float v[3])
+{
+  root_to_world_v3(data, index, v, data->Vnew[index]);
+}
+
+void BPH_mass_spring_set_new_velocity(struct Implicit_Data *data, int index, const float v[3])
+{
+  world_to_root_v3(data, index, data->Vnew[index], v);
+}
+
+/* -------------------------------- */
+
+static int BPH_mass_spring_add_block(Implicit_Data *data, int v1, int v2)
+{
+  int s = data->M[0].vcount + data->num_blocks; /* index from array start */
+  BLI_assert(s < data->M[0].vcount + data->M[0].scount);
+  ++data->num_blocks;
+
+  /* tfm and S don't have spring entries (diagonal blocks only) */
+  init_fmatrix(data->bigI + s, v1, v2);
+  init_fmatrix(data->M + s, v1, v2);
+  init_fmatrix(data->dFdX + s, v1, v2);
+  init_fmatrix(data->dFdV + s, v1, v2);
+  init_fmatrix(data->A + s, v1, v2);
+  init_fmatrix(data->P + s, v1, v2);
+  init_fmatrix(data->Pinv + s, v1, v2);
+
+  return s;
+}
+
+void BPH_mass_spring_clear_constraints(Implicit_Data *data)
+{
+  int i, numverts = data->S[0].vcount;
+  for (i = 0; i < numverts; i++) {
+    unit_m3(data->S[i].m);
+    zero_v3(data->z[i]);
+  }
+}
+
+void BPH_mass_spring_add_constraint_ndof0(Implicit_Data *data, int index, const float dV[3])
+{
+  zero_m3(data->S[index].m);
+
+  world_to_root_v3(data, index, data->z[index], dV);
+}
+
+void BPH_mass_spring_add_constraint_ndof1(
+    Implicit_Data *data, int index, const float c1[3], const float c2[3], const float dV[3])
+{
+  float m[3][3], p[3], q[3], u[3], cmat[3][3];
+
+  world_to_root_v3(data, index, p, c1);
+  mul_fvectorT_fvector(cmat, p, p);
+  sub_m3_m3m3(m, I, cmat);
+
+  world_to_root_v3(data, index, q, c2);
+  mul_fvectorT_fvector(cmat, q, q);
+  sub_m3_m3m3(m, m, cmat);
+
+  /* XXX not sure but multiplication should work here */
+  copy_m3_m3(data->S[index].m, m);
+  //  mul_m3_m3m3(data->S[index].m, data->S[index].m, m);
+
+  world_to_root_v3(data, index, u, dV);
+  add_v3_v3(data->z[index], u);
+}
+
+void BPH_mass_spring_add_constraint_ndof2(Implicit_Data *data,
+                                          int index,
+                                          const float c1[3],
+                                          const float dV[3])
+{
+  float m[3][3], p[3], u[3], cmat[3][3];
+
+  world_to_root_v3(data, index, p, c1);
+  mul_fvectorT_fvector(cmat, p, p);
+  sub_m3_m3m3(m, I, cmat);
+
+  copy_m3_m3(data->S[index].m, m);
+  //  mul_m3_m3m3(data->S[index].m, data->S[index].m, m);
+
+  world_to_root_v3(data, index, u, dV);
+  add_v3_v3(data->z[index], u);
+}
+
+void BPH_mass_spring_clear_forces(Implicit_Data *data)
+{
+  int numverts = data->M[0].vcount;
+  zero_lfvector(data->F, numverts);
+  init_bfmatrix(data->dFdX, ZERO);
+  init_bfmatrix(data->dFdV, ZERO);
+
+  data->num_blocks = 0;
+}
+
+void BPH_mass_spring_force_reference_frame(Implicit_Data *data,
+                                           int index,
+                                           const float acceleration[3],
+                                           const float omega[3],
+                                           const float domega_dt[3],
+                                           float mass)
+{
+#  ifdef CLOTH_ROOT_FRAME
+  float acc[3], w[3], dwdt[3];
+  float f[3], dfdx[3][3], dfdv[3][3];
+  float euler[3], coriolis[3], centrifugal[3], rotvel[3];
+  float deuler[3][3], dcoriolis[3][3], dcentrifugal[3][3], drotvel[3][3];
+
+  world_to_root_v3(data, index, acc, acceleration);
+  world_to_root_v3(data, index, w, omega);
+  world_to_root_v3(data, index, dwdt, domega_dt);
+
+  cross_v3_v3v3(euler, dwdt, data->X[index]);
+  cross_v3_v3v3(coriolis, w, data->V[index]);
+  mul_v3_fl(coriolis, 2.0f);
+  cross_v3_v3v3(rotvel, w, data->X[index]);
+  cross_v3_v3v3(centrifugal, w, rotvel);
+
+  sub_v3_v3v3(f, acc, euler);
+  sub_v3_v3(f, coriolis);
+  sub_v3_v3(f, centrifugal);
+
+  mul_v3_fl(f, mass); /* F = m * a */
+
+  cross_v3_identity(deuler, dwdt);
+  cross_v3_identity(dcoriolis, w);
+  mul_m3_fl(dcoriolis, 2.0f);
+  cross_v3_identity(drotvel, w);
+  cross_m3_v3m3(dcentrifugal, w, drotvel);
+
+  add_m3_m3m3(dfdx, deuler, dcentrifugal);
+  negate_m3(dfdx);
+  mul_m3_fl(dfdx, mass);
+
+  copy_m3_m3(dfdv, dcoriolis);
+  negate_m3(dfdv);
+  mul_m3_fl(dfdv, mass);
+
+  add_v3_v3(data->F[index], f);
+  add_m3_m3m3(data->dFdX[index].m, data->dFdX[index].m, dfdx);
+  add_m3_m3m3(data->dFdV[index].m, data->dFdV[index].m, dfdv);
+#  else
+  (void)data;
+  (void)index;
+  (void)acceleration;
+  (void)omega;
+  (void)domega_dt;
+#  endif
+}
+
+void BPH_mass_spring_force_gravity(Implicit_Data *data, int index, float mass, const float g[3])
+{
+  /* force = mass * acceleration (in this case: gravity) */
+  float f[3];
+  world_to_root_v3(data, index, f, g);
+  mul_v3_fl(f, mass);
+
+  add_v3_v3(data->F[index], f);
+}
+
+void BPH_mass_spring_force_drag(Implicit_Data *data, float drag)
+{
+  int i, numverts = data->M[0].vcount;
+  for (i = 0; i < numverts; i++) {
+    float tmp[3][3];
+
+    /* NB: uses root space velocity, no need to transform */
+    madd_v3_v3fl(data->F[i], data->V[i], -drag);
+
+    copy_m3_m3(tmp, I);
+    mul_m3_fl(tmp, -drag);
+    add_m3_m3m3(data->dFdV[i].m, data->dFdV[i].m, tmp);
+  }
+}
+
+void BPH_mass_spring_force_extern(
+    struct Implicit_Data *data, int i, const float f[3], float dfdx[3][3], float dfdv[3][3])
+{
+  float tf[3], tdfdx[3][3], tdfdv[3][3];
+  world_to_root_v3(data, i, tf, f);
+  world_to_root_m3(data, i, tdfdx, dfdx);
+  world_to_root_m3(data, i, tdfdv, dfdv);
+
+  add_v3_v3(data->F[i], tf);
+  add_m3_m3m3(data->dFdX[i].m, data->dFdX[i].m, tdfdx);
+  add_m3_m3m3(data->dFdV[i].m, data->dFdV[i].m, tdfdv);
+}
+
+static float calc_nor_area_tri(float nor[3],
+                               const float v1[3],
+                               const float v2[3],
+                               const float v3[3])
+{
+  float n1[3], n2[3];
+
+  sub_v3_v3v3(n1, v1, v2);
+  sub_v3_v3v3(n2, v2, v3);
+
+  cross_v3_v3v3(nor, n1, n2);
+  return normalize_v3(nor) / 2.0f;
+}
+
+/* XXX does not support force jacobians yet, since the effector system does not provide them either
+ */
+void BPH_mass_spring_force_face_wind(
+    Implicit_Data *data, int v1, int v2, int v3, const float (*winvec)[3])
+{
+  const float effector_scale = 0.02f;
+  int vs[3] = {v1, v2, v3};
+  float win[3], nor[3], area;
+  float factor, base_force;
+  float force[3];
+
+  /* calculate face normal and area */
+  area = calc_nor_area_tri(nor, data->X[v1], data->X[v2], data->X[v3]);
+  /* The force is calculated and split up evenly for each of the three face verts */
+  factor = effector_scale * area / 3.0f;
+
+  /* Calculate wind pressure at each vertex by projecting the wind field on the normal. */
+  for (int i = 0; i < 3; i++) {
+    world_to_root_v3(data, vs[i], win, winvec[vs[i]]);
+
+    force[i] = dot_v3v3(win, nor);
+  }
+
+  /* Compute per-vertex force values from local pressures.
+   * From integrating the pressure over the triangle and deriving
+   * equivalent vertex forces, it follows that:
+   *
+   * force[idx] = (sum(pressure) + pressure[idx]) * area / 12
+   *
+   * Effectively, 1/4 of the pressure acts just on its vertex,
+   * while 3/4 is split evenly over all three.
+   */
+  mul_v3_fl(force, factor / 4.0f);
+
+  base_force = force[0] + force[1] + force[2];
+
+  /* add pressure to each of the face verts */
+  madd_v3_v3fl(data->F[v1], nor, base_force + force[0]);
+  madd_v3_v3fl(data->F[v2], nor, base_force + force[1]);
+  madd_v3_v3fl(data->F[v3], nor, base_force + force[2]);
+}
+
+void BPH_mass_spring_force_face_extern(
+    Implicit_Data *data, int v1, int v2, int v3, const float (*forcevec)[3])
+{
+  const float effector_scale = 0.02f;
+  int vs[3] = {v1, v2, v3};
+  float nor[3], area;
+  float factor, base_force[3];
+  float force[3][3];
+
+  /* calculate face normal and area */
+  area = calc_nor_area_tri(nor, data->X[v1], data->X[v2], data->X[v3]);
+  /* The force is calculated and split up evenly for each of the three face verts */
+  factor = effector_scale * area / 3.0f;
+
+  /* Compute common and per-vertex force vectors from the original inputs. */
+  zero_v3(base_force);
+
+  for (int i = 0; i < 3; i++) {
+    world_to_root_v3(data, vs[i], force[i], forcevec[vs[i]]);
+
+    mul_v3_fl(force[i], factor / 4.0f);
+    add_v3_v3(base_force, force[i]);
+  }
+
+  /* Apply the common and vertex components to all vertices. */
+  for (int i = 0; i < 3; i++) {
+    add_v3_v3(force[i], base_force);
+    add_v3_v3(data->F[vs[i]], force[i]);
+  }
+}
+
+float BPH_tri_tetra_volume_signed_6x(Implicit_Data *data, int v1, int v2, int v3)
+{
+  /* The result will be 6x the volume */
+  return volume_tri_tetrahedron_signed_v3_6x(data->X[v1], data->X[v2], data->X[v3]);
+}
+
+float BPH_tri_area(struct Implicit_Data *data, int v1, int v2, int v3)
+{
+  float nor[3];
+
+  return calc_nor_area_tri(nor, data->X[v1], data->X[v2], data->X[v3]);
+}
+
+void BPH_mass_spring_force_pressure(Implicit_Data *data,
+                                    int v1,
+                                    int v2,
+                                    int v3,
+                                    float common_pressure,
+                                    const float *vertex_pressure,
+                                    const float weights[3])
+{
+  float nor[3], area;
+  float factor, base_force;
+  float force[3];
+
+  /* calculate face normal and area */
+  area = calc_nor_area_tri(nor, data->X[v1], data->X[v2], data->X[v3]);
+  /* The force is calculated and split up evenly for each of the three face verts */
+  factor = area / 3.0f;
+  base_force = common_pressure * factor;
+
+  /* Compute per-vertex force values from local pressures.
+   * From integrating the pressure over the triangle and deriving
+   * equivalent vertex forces, it follows that:
+   *
+   * force[idx] = (sum(pressure) + pressure[idx]) * area / 12
+   *
+   * Effectively, 1/4 of the pressure acts just on its vertex,
+   * while 3/4 is split evenly over all three.
+   */
+  if (vertex_pressure) {
+    copy_v3_fl3(force, vertex_pressure[v1], vertex_pressure[v2], vertex_pressure[v3]);
+    mul_v3_fl(force, factor / 4.0f);
+
+    base_force += force[0] + force[1] + force[2];
+  }
+  else {
+    zero_v3(force);
+  }
+
+  /* add pressure to each of the face verts */
+  madd_v3_v3fl(data->F[v1], nor, (base_force + force[0]) * weights[0]);
+  madd_v3_v3fl(data->F[v2], nor, (base_force + force[1]) * weights[1]);
+  madd_v3_v3fl(data->F[v3], nor, (base_force + force[2]) * weights[2]);
+}
+
+static void edge_wind_vertex(const float dir[3],
+                             float length,
+                             float radius,
+                             const float wind[3],
+                             float f[3],
+                             float UNUSED(dfdx[3][3]),
+                             float UNUSED(dfdv[3][3]))
+{
+  const float density = 0.01f; /* XXX arbitrary value, corresponds to effect of air density */
+  float cos_alpha, sin_alpha, cross_section;
+  float windlen = len_v3(wind);
+
+  if (windlen == 0.0f) {
+    zero_v3(f);
+    return;
+  }
+
+  /* angle of wind direction to edge */
+  cos_alpha = dot_v3v3(wind, dir) / windlen;
+  sin_alpha = sqrtf(1.0f - cos_alpha * cos_alpha);
+  cross_section = radius * ((float)M_PI * radius * sin_alpha + length * cos_alpha);
+
+  mul_v3_v3fl(f, wind, density * cross_section);
+}
+
+void BPH_mass_spring_force_edge_wind(
+    Implicit_Data *data, int v1, int v2, float radius1, float radius2, const float (*winvec)[3])
+{
+  float win[3], dir[3], length;
+  float f[3], dfdx[3][3], dfdv[3][3];
+
+  sub_v3_v3v3(dir, data->X[v1], data->X[v2]);
+  length = normalize_v3(dir);
+
+  world_to_root_v3(data, v1, win, winvec[v1]);
+  edge_wind_vertex(dir, length, radius1, win, f, dfdx, dfdv);
+  add_v3_v3(data->F[v1], f);
+
+  world_to_root_v3(data, v2, win, winvec[v2]);
+  edge_wind_vertex(dir, length, radius2, win, f, dfdx, dfdv);
+  add_v3_v3(data->F[v2], f);
+}
+
+void BPH_mass_spring_force_vertex_wind(Implicit_Data *data,
+                                       int v,
+                                       float UNUSED(radius),
+                                       const float (*winvec)[3])
+{
+  const float density = 0.01f; /* XXX arbitrary value, corresponds to effect of air density */
+
+  float wind[3];
+  float f[3];
+
+  world_to_root_v3(data, v, wind, winvec[v]);
+  mul_v3_v3fl(f, wind, density);
+  add_v3_v3(data->F[v], f);
+}
+
+BLI_INLINE void dfdx_spring(float to[3][3], const float dir[3], float length, float L, float k)
+{
+  // dir is unit length direction, rest is spring's restlength, k is spring constant.
+  // return  ( (I-outerprod(dir, dir))*Min(1.0f, rest/length) - I) * -k;
+  outerproduct(to, dir, dir);
+  sub_m3_m3m3(to, I, to);
+
+  mul_m3_fl(to, (L / length));
+  sub_m3_m3m3(to, to, I);
+  mul_m3_fl(to, k);
+}
+
+/* unused */
+#  if 0
+BLI_INLINE void dfdx_damp(float to[3][3],
+                          const float dir[3],
+                          float length,
+                          const float vel[3],
+                          float rest,
+                          float damping)
+{
+  // inner spring damping   vel is the relative velocity  of the endpoints.
+  //  return (I-outerprod(dir, dir)) * (-damping * -(dot(dir, vel)/Max(length, rest)));
+  mul_fvectorT_fvector(to, dir, dir);
+  sub_fmatrix_fmatrix(to, I, to);
+  mul_fmatrix_S(to, (-damping * -(dot_v3v3(dir, vel) / MAX2(length, rest))));
+}
+#  endif
+
+BLI_INLINE void dfdv_damp(float to[3][3], const float dir[3], float damping)
+{
+  // derivative of force wrt velocity
+  outerproduct(to, dir, dir);
+  mul_m3_fl(to, -damping);
+}
+
+BLI_INLINE float fb(float length, float L)
+{
+  float x = length / L;
+  float xx = x * x;
+  float xxx = xx * x;
+  float xxxx = xxx * x;
+  return (-11.541f * xxxx + 34.193f * xxx - 39.083f * xx + 23.116f * x - 9.713f);
+}
+
+BLI_INLINE float fbderiv(float length, float L)
+{
+  float x = length / L;
+  float xx = x * x;
+  float xxx = xx * x;
+  return (-46.164f * xxx + 102.579f * xx - 78.166f * x + 23.116f);
+}
+
+BLI_INLINE float fbstar(float length, float L, float kb, float cb)
+{
+  float tempfb_fl = kb * fb(length, L);
+  float fbstar_fl = cb * (length - L);
+
+  if (tempfb_fl < fbstar_fl) {
+    return fbstar_fl;
+  }
+  else {
+    return tempfb_fl;
+  }
+}
+
+// function to calculae bending spring force (taken from Choi & Co)
+BLI_INLINE float fbstar_jacobi(float length, float L, float kb, float cb)
+{
+  float tempfb_fl = kb * fb(length, L);
+  float fbstar_fl = cb * (length - L);
+
+  if (tempfb_fl < fbstar_fl) {
+    return -cb;
+  }
+  else {
+    return -kb * fbderiv(length, L);
+  }
+}
+
+/* calculate elonglation */
+BLI_INLINE bool spring_length(Implicit_Data *data,
+                              int i,
+                              int j,
+                              float r_extent[3],
+                              float r_dir[3],
+                              float *r_length,
+                              float r_vel[3])
+{
+  sub_v3_v3v3(r_extent, data->X[j], data->X[i]);
+  sub_v3_v3v3(r_vel, data->V[j], data->V[i]);
+  *r_length = len_v3(r_extent);
+
+  if (*r_length > ALMOST_ZERO) {
+#  if 0
+    if (length > L) {
+      if ((clmd->sim_parms->flags & CSIMSETT_FLAG_TEARING_ENABLED) &&
+          (((length - L) * 100.0f / L) > clmd->sim_parms->maxspringlen)) {
+        // cut spring!
+        s->flags |= CSPRING_FLAG_DEACTIVATE;
+        return false;
+      }
+    }
+#  endif
+    mul_v3_v3fl(r_dir, r_extent, 1.0f / (*r_length));
+  }
+  else {
+    zero_v3(r_dir);
+  }
+
+  return true;
+}
+
+BLI_INLINE void apply_spring(
+    Implicit_Data *data, int i, int j, const float f[3], float dfdx[3][3], float dfdv[3][3])
+{
+  int block_ij = BPH_mass_spring_add_block(data, i, j);
+
+  add_v3_v3(data->F[i], f);
+  sub_v3_v3(data->F[j], f);
+
+  add_m3_m3m3(data->dFdX[i].m, data->dFdX[i].m, dfdx);
+  add_m3_m3m3(data->dFdX[j].m, data->dFdX[j].m, dfdx);
+  sub_m3_m3m3(data->dFdX[block_ij].m, data->dFdX[block_ij].m, dfdx);
+
+  add_m3_m3m3(data->dFdV[i].m, data->dFdV[i].m, dfdv);
+  add_m3_m3m3(data->dFdV[j].m, data->dFdV[j].m, dfdv);
+  sub_m3_m3m3(data->dFdV[block_ij].m, data->dFdV[block_ij].m, dfdv);
+}
+
+bool BPH_mass_spring_force_spring_linear(Implicit_Data *data,
+                                         int i,
+                                         int j,
+                                         float restlen,
+                                         float stiffness_tension,
+                                         float damping_tension,
+                                         float stiffness_compression,
+                                         float damping_compression,
+                                         bool resist_compress,
+                                         bool new_compress,
+                                         float clamp_force)
+{
+  float extent[3], length, dir[3], vel[3];
+  float f[3], dfdx[3][3], dfdv[3][3];
+  float damping = 0;
+
+  // calculate elonglation
+  spring_length(data, i, j, extent, dir, &length, vel);
+
+  /* This code computes not only the force, but also its derivative.
+   * Zero derivative effectively disables the spring for the implicit solver.
+   * Thus length > restlen makes cloth unconstrained at the start of simulation. */
+  if ((length >= restlen && length > 0) || resist_compress) {
+    float stretch_force;
+
+    damping = damping_tension;
+
+    stretch_force = stiffness_tension * (length - restlen);
+    if (clamp_force > 0.0f && stretch_force > clamp_force) {
+      stretch_force = clamp_force;
+    }
+    mul_v3_v3fl(f, dir, stretch_force);
+
+    dfdx_spring(dfdx, dir, length, restlen, stiffness_tension);
+  }
+  else if (new_compress) {
+    /* This is based on the Choi and Ko bending model,
+     * which works surprisingly well for compression. */
+    float kb = stiffness_compression;
+    float cb = kb; /* cb equal to kb seems to work, but a factor can be added if necessary */
+
+    damping = damping_compression;
+
+    mul_v3_v3fl(f, dir, fbstar(length, restlen, kb, cb));
+
+    outerproduct(dfdx, dir, dir);
+    mul_m3_fl(dfdx, fbstar_jacobi(length, restlen, kb, cb));
+  }
+  else {
+    return false;
+  }
+
+  madd_v3_v3fl(f, dir, damping * dot_v3v3(vel, dir));
+  dfdv_damp(dfdv, dir, damping);
+
+  apply_spring(data, i, j, f, dfdx, dfdv);
+
+  return true;
+}
+
+/* See "Stable but Responsive Cloth" (Choi, Ko 2005) */
+bool BPH_mass_spring_force_spring_bending(
+    Implicit_Data *data, int i, int j, float restlen, float kb, float cb)
+{
+  float extent[3], length, dir[3], vel[3];
+
+  // calculate elonglation
+  spring_length(data, i, j, extent, dir, &length, vel);
+
+  if (length < restlen) {
+    float f[3], dfdx[3][3], dfdv[3][3];
+
+    mul_v3_v3fl(f, dir, fbstar(length, restlen, kb, cb));
+
+    outerproduct(dfdx, dir, dir);
+    mul_m3_fl(dfdx, fbstar_jacobi(length, restlen, kb, cb));
+
+    /* XXX damping not supported */
+    zero_m3(dfdv);
+
+    apply_spring(data, i, j, f, dfdx, dfdv);
+
+    return true;
+  }
+  else {
+    return false;
+  }
+}
+
+BLI_INLINE void poly_avg(lfVector *data, const int *inds, int len, float r_avg[3])
+{
+  float fact = 1.0f / (float)len;
+
+  zero_v3(r_avg);
+
+  for (int i = 0; i < len; i++) {
+    madd_v3_v3fl(r_avg, data[inds[i]], fact);
+  }
+}
+
+BLI_INLINE void poly_norm(lfVector *data, int i, int j, int *inds, int len, float r_dir[3])
+{
+  float mid[3];
+
+  poly_avg(data, inds, len, mid);
+
+  normal_tri_v3(r_dir, data[i], data[j], mid);
+}
+
+BLI_INLINE void edge_avg(lfVector *data, int i, int j, float r_avg[3])
+{
+  r_avg[0] = (data[i][0] + data[j][0]) * 0.5f;
+  r_avg[1] = (data[i][1] + data[j][1]) * 0.5f;
+  r_avg[2] = (data[i][2] + data[j][2]) * 0.5f;
+}
+
+BLI_INLINE void edge_norm(lfVector *data, int i, int j, float r_dir[3])
+{
+  sub_v3_v3v3(r_dir, data[i], data[j]);
+  normalize_v3(r_dir);
+}
+
+BLI_INLINE float bend_angle(float dir_a[3], float dir_b[3], float dir_e[3])
+{
+  float cos, sin;
+  float tmp[3];
+
+  cos = dot_v3v3(dir_a, dir_b);
+
+  cross_v3_v3v3(tmp, dir_a, dir_b);
+  sin = dot_v3v3(tmp, dir_e);
+
+  return atan2f(sin, cos);
+}
+
+BLI_INLINE void spring_angle(Implicit_Data *data,
+                             int i,
+                             int j,
+                             int *i_a,
+                             int *i_b,
+                             int len_a,
+                             int len_b,
+                             float r_dir_a[3],
+                             float r_dir_b[3],
+                             float *r_angle,
+                             float r_vel_a[3],
+                             float r_vel_b[3])
+{
+  float dir_e[3], vel_e[3];
+
+  poly_norm(data->X, j, i, i_a, len_a, r_dir_a);
+  poly_norm(data->X, i, j, i_b, len_b, r_dir_b);
+
+  edge_norm(data->X, i, j, dir_e);
+
+  *r_angle = bend_angle(r_dir_a, r_dir_b, dir_e);
+
+  poly_avg(data->V, i_a, len_a, r_vel_a);
+  poly_avg(data->V, i_b, len_b, r_vel_b);
+
+  edge_avg(data->V, i, j, vel_e);
+
+  sub_v3_v3(r_vel_a, vel_e);
+  sub_v3_v3(r_vel_b, vel_e);
+}
+
+/* Angular springs roughly based on the bending model proposed by Baraff and Witkin in "Large Steps
+ * in Cloth Simulation". */
+bool BPH_mass_spring_force_spring_angular(Implicit_Data *data,
+                                          int i,
+                                          int j,
+                                          int *i_a,
+                                          int *i_b,
+                                          int len_a,
+                                          int len_b,
+                                          float restang,
+                                          float stiffness,
+                                          float damping)
+{
+  float angle, dir_a[3], dir_b[3], vel_a[3], vel_b[3];
+  float f_a[3], f_b[3], f_e[3];
+  float force;
+  int x;
+
+  spring_angle(data, i, j, i_a, i_b, len_a, len_b, dir_a, dir_b, &angle, vel_a, vel_b);
+
+  /* spring force */
+  force = stiffness * (angle - restang);
+
+  /* damping force */
+  force += -damping * (dot_v3v3(vel_a, dir_a) + dot_v3v3(vel_b, dir_b));
+
+  mul_v3_v3fl(f_a, dir_a, force / len_a);
+  mul_v3_v3fl(f_b, dir_b, force / len_b);
+
+  for (x = 0; x < len_a; x++) {
+    add_v3_v3(data->F[i_a[x]], f_a);
+  }
+
+  for (x = 0; x < len_b; x++) {
+    add_v3_v3(data->F[i_b[x]], f_b);
+  }
+
+  mul_v3_v3fl(f_a, dir_a, force * 0.5f);
+  mul_v3_v3fl(f_b, dir_b, force * 0.5f);
+
+  add_v3_v3v3(f_e, f_a, f_b);
+
+  sub_v3_v3(data->F[i], f_e);
+  sub_v3_v3(data->F[j], f_e);
+
+  return true;
+}
+
+/* Jacobian of a direction vector.
+ * Basically the part of the differential orthogonal to the direction,
+ * inversely proportional to the length of the edge.
+ *
+ * dD_ij/dx_i = -dD_ij/dx_j = (D_ij * D_ij^T - I) / len_ij
+ */
+BLI_INLINE void spring_grad_dir(
+    Implicit_Data *data, int i, int j, float edge[3], float dir[3], float grad_dir[3][3])
+{
+  float length;
+
+  sub_v3_v3v3(edge, data->X[j], data->X[i]);
+  length = normalize_v3_v3(dir, edge);
+
+  if (length > ALMOST_ZERO) {
+    outerproduct(grad_dir, dir, dir);
+    sub_m3_m3m3(grad_dir, I, grad_dir);
+    mul_m3_fl(grad_dir, 1.0f / length);
+  }
+  else {
+    zero_m3(grad_dir);
+  }
+}
+
+BLI_INLINE void spring_hairbend_forces(Implicit_Data *data,
+                                       int i,
+                                       int j,
+                                       int k,
+                                       const float goal[3],
+                                       float stiffness,
+                                       float damping,
+                                       int q,
+                                       const float dx[3],
+                                       const float dv[3],
+                                       float r_f[3])
+{
+  float edge_ij[3], dir_ij[3];
+  float edge_jk[3], dir_jk[3];
+  float vel_ij[3], vel_jk[3], vel_ortho[3];
+  float f_bend[3], f_damp[3];
+  float fk[3];
+  float dist[3];
+
+  zero_v3(fk);
+
+  sub_v3_v3v3(edge_ij, data->X[j], data->X[i]);
+  if (q == i) {
+    sub_v3_v3(edge_ij, dx);
+  }
+  if (q == j) {
+    add_v3_v3(edge_ij, dx);
+  }
+  normalize_v3_v3(dir_ij, edge_ij);
+
+  sub_v3_v3v3(edge_jk, data->X[k], data->X[j]);
+  if (q == j) {
+    sub_v3_v3(edge_jk, dx);
+  }
+  if (q == k) {
+    add_v3_v3(edge_jk, dx);
+  }
+  normalize_v3_v3(dir_jk, edge_jk);
+
+  sub_v3_v3v3(vel_ij, data->V[j], data->V[i]);
+  if (q == i) {
+    sub_v3_v3(vel_ij, dv);
+  }
+  if (q == j) {
+    add_v3_v3(vel_ij, dv);
+  }
+
+  sub_v3_v3v3(vel_jk, data->V[k], data->V[j]);
+  if (q == j) {
+    sub_v3_v3(vel_jk, dv);
+  }
+  if (q == k) {
+    add_v3_v3(vel_jk, dv);
+  }
+
+  /* bending force */
+  sub_v3_v3v3(dist, goal, edge_jk);
+  mul_v3_v3fl(f_bend, dist, stiffness);
+
+  add_v3_v3(fk, f_bend);
+
+  /* damping force */
+  madd_v3_v3v3fl(vel_ortho, vel_jk, dir_jk, -dot_v3v3(vel_jk, dir_jk));
+  mul_v3_v3fl(f_damp, vel_ortho, damping);
+
+  sub_v3_v3(fk, f_damp);
+
+  copy_v3_v3(r_f, fk);
+}
+
+/* Finite Differences method for estimating the jacobian of the force */
+BLI_INLINE void spring_hairbend_estimate_dfdx(Implicit_Data *data,
+                                              int i,
+                                              int j,
+                                              int k,
+                                              const float goal[3],
+                                              float stiffness,
+                                              float damping,
+                                              int q,
+                                              float dfdx[3][3])
+{
+  const float delta = 0.00001f;  // TODO find a good heuristic for this
+  float dvec_null[3][3], dvec_pos[3][3], dvec_neg[3][3];
+  float f[3];
+  int a, b;
+
+  zero_m3(dvec_null);
+  unit_m3(dvec_pos);
+  mul_m3_fl(dvec_pos, delta * 0.5f);
+  copy_m3_m3(dvec_neg, dvec_pos);
+  negate_m3(dvec_neg);
+
+  /* XXX TODO offset targets to account for position dependency */
+
+  for (a = 0; a < 3; a++) {
+    spring_hairbend_forces(
+        data, i, j, k, goal, stiffness, damping, q, dvec_pos[a], dvec_null[a], f);
+    copy_v3_v3(dfdx[a], f);
+
+    spring_hairbend_forces(
+        data, i, j, k, goal, stiffness, damping, q, dvec_neg[a], dvec_null[a], f);
+    sub_v3_v3(dfdx[a], f);
+
+    for (b = 0; b < 3; b++) {
+      dfdx[a][b] /= delta;
+    }
+  }
+}
+
+/* Finite Differences method for estimating the jacobian of the force */
+BLI_INLINE void spring_hairbend_estimate_dfdv(Implicit_Data *data,
+                                              int i,
+                                              int j,
+                                              int k,
+                                              const float goal[3],
+                                              float stiffness,
+                                              float damping,
+                                              int q,
+                                              float dfdv[3][3])
+{
+  const float delta = 0.00001f;  // TODO find a good heuristic for this
+  float dvec_null[3][3], dvec_pos[3][3], dvec_neg[3][3];
+  float f[3];
+  int a, b;
+
+  zero_m3(dvec_null);
+  unit_m3(dvec_pos);
+  mul_m3_fl(dvec_pos, delta * 0.5f);
+  copy_m3_m3(dvec_neg, dvec_pos);
+  negate_m3(dvec_neg);
+
+  /* XXX TODO offset targets to account for position dependency */
+
+  for (a = 0; a < 3; a++) {
+    spring_hairbend_forces(
+        data, i, j, k, goal, stiffness, damping, q, dvec_null[a], dvec_pos[a], f);
+    copy_v3_v3(dfdv[a], f);
+
+    spring_hairbend_forces(
+        data, i, j, k, goal, stiffness, damping, q, dvec_null[a], dvec_neg[a], f);
+    sub_v3_v3(dfdv[a], f);
+
+    for (b = 0; b < 3; b++) {
+      dfdv[a][b] /= delta;
+    }
+  }
+}
+
+/* Angular spring that pulls the vertex toward the local target
+ * See "Artistic Simulation of Curly Hair" (Pixar technical memo #12-03a)
+ */
+bool BPH_mass_spring_force_spring_bending_hair(Implicit_Data *data,
+                                               int i,
+                                               int j,
+                                               int k,
+                                               const float target[3],
+                                               float stiffness,
+                                               float damping)
+{
+  float goal[3];
+  float fj[3], fk[3];
+  float dfj_dxi[3][3], dfj_dxj[3][3], dfk_dxi[3][3], dfk_dxj[3][3], dfk_dxk[3][3];
+  float dfj_dvi[3][3], dfj_dvj[3][3], dfk_dvi[3][3], dfk_dvj[3][3], dfk_dvk[3][3];
+
+  const float vecnull[3] = {0.0f, 0.0f, 0.0f};
+
+  int block_ij = BPH_mass_spring_add_block(data, i, j);
+  int block_jk = BPH_mass_spring_add_block(data, j, k);
+  int block_ik = BPH_mass_spring_add_block(data, i, k);
+
+  world_to_root_v3(data, j, goal, target);
+
+  spring_hairbend_forces(data, i, j, k, goal, stiffness, damping, k, vecnull, vecnull, fk);
+  negate_v3_v3(fj, fk); /* counterforce */
+
+  spring_hairbend_estimate_dfdx(data, i, j, k, goal, stiffness, damping, i, dfk_dxi);
+  spring_hairbend_estimate_dfdx(data, i, j, k, goal, stiffness, damping, j, dfk_dxj);
+  spring_hairbend_estimate_dfdx(data, i, j, k, goal, stiffness, damping, k, dfk_dxk);
+  copy_m3_m3(dfj_dxi, dfk_dxi);
+  negate_m3(dfj_dxi);
+  copy_m3_m3(dfj_dxj, dfk_dxj);
+  negate_m3(dfj_dxj);
+
+  spring_hairbend_estimate_dfdv(data, i, j, k, goal, stiffness, damping, i, dfk_dvi);
+  spring_hairbend_estimate_dfdv(data, i, j, k, goal, stiffness, damping, j, dfk_dvj);
+  spring_hairbend_estimate_dfdv(data, i, j, k, goal, stiffness, damping, k, dfk_dvk);
+  copy_m3_m3(dfj_dvi, dfk_dvi);
+  negate_m3(dfj_dvi);
+  copy_m3_m3(dfj_dvj, dfk_dvj);
+  negate_m3(dfj_dvj);
+
+  /* add forces and jacobians to the solver data */
+
+  add_v3_v3(data->F[j], fj);
+  add_v3_v3(data->F[k], fk);
+
+  add_m3_m3m3(data->dFdX[j].m, data->dFdX[j].m, dfj_dxj);
+  add_m3_m3m3(data->dFdX[k].m, data->dFdX[k].m, dfk_dxk);
+
+  add_m3_m3m3(data->dFdX[block_ij].m, data->dFdX[block_ij].m, dfj_dxi);
+  add_m3_m3m3(data->dFdX[block_jk].m, data->dFdX[block_jk].m, dfk_dxj);
+  add_m3_m3m3(data->dFdX[block_ik].m, data->dFdX[block_ik].m, dfk_dxi);
+
+  add_m3_m3m3(data->dFdV[j].m, data->dFdV[j].m, dfj_dvj);
+  add_m3_m3m3(data->dFdV[k].m, data->dFdV[k].m, dfk_dvk);
+
+  add_m3_m3m3(data->dFdV[block_ij].m, data->dFdV[block_ij].m, dfj_dvi);
+  add_m3_m3m3(data->dFdV[block_jk].m, data->dFdV[block_jk].m, dfk_dvj);
+  add_m3_m3m3(data->dFdV[block_ik].m, data->dFdV[block_ik].m, dfk_dvi);
+
+  /* XXX analytical calculation of derivatives below is incorrect.
+   * This proved to be difficult, but for now just using the finite difference method for
+   * estimating the jacobians should be sufficient.
+   */
+#  if 0
+  float edge_ij[3], dir_ij[3], grad_dir_ij[3][3];
+  float edge_jk[3], dir_jk[3], grad_dir_jk[3][3];
+  float dist[3], vel_jk[3], vel_jk_ortho[3], projvel[3];
+  float target[3];
+  float tmp[3][3];
+  float fi[3], fj[3], fk[3];
+  float dfi_dxi[3][3], dfj_dxi[3][3], dfj_dxj[3][3], dfk_dxi[3][3], dfk_dxj[3][3], dfk_dxk[3][3];
+  float dfdvi[3][3];
+
+  // TESTING
+  damping = 0.0f;
+
+  zero_v3(fi);
+  zero_v3(fj);
+  zero_v3(fk);
+  zero_m3(dfi_dxi);
+  zero_m3(dfj_dxi);
+  zero_m3(dfk_dxi);
+  zero_m3(dfk_dxj);
+  zero_m3(dfk_dxk);
+
+  /* jacobian of direction vectors */
+  spring_grad_dir(data, i, j, edge_ij, dir_ij, grad_dir_ij);
+  spring_grad_dir(data, j, k, edge_jk, dir_jk, grad_dir_jk);
+
+  sub_v3_v3v3(vel_jk, data->V[k], data->V[j]);
+
+  /* bending force */
+  mul_v3_v3fl(target, dir_ij, restlen);
+  sub_v3_v3v3(dist, target, edge_jk);
+  mul_v3_v3fl(fk, dist, stiffness);
+
+  /* damping force */
+  madd_v3_v3v3fl(vel_jk_ortho, vel_jk, dir_jk, -dot_v3v3(vel_jk, dir_jk));
+  madd_v3_v3fl(fk, vel_jk_ortho, damping);
+
+  /* XXX this only holds true as long as we assume straight rest shape!
+   * eventually will become a bit more involved since the opposite segment
+   * gets its own target, under condition of having equal torque on both sides.
+   */
+  copy_v3_v3(fi, fk);
+
+  /* counterforce on the middle point */
+  sub_v3_v3(fj, fi);
+  sub_v3_v3(fj, fk);
+
+  /* === derivatives === */
+
+  madd_m3_m3fl(dfk_dxi, grad_dir_ij, stiffness * restlen);
+
+  madd_m3_m3fl(dfk_dxj, grad_dir_ij, -stiffness * restlen);
+  madd_m3_m3fl(dfk_dxj, I, stiffness);
+
+  madd_m3_m3fl(dfk_dxk, I, -stiffness);
+
+  copy_m3_m3(dfi_dxi, dfk_dxk);
+  negate_m3(dfi_dxi);
+
+  /* dfj_dfi == dfi_dfj due to symmetry,
+   * dfi_dfj == dfk_dfj due to fi == fk
+   * XXX see comment above on future bent rest shapes
+   */
+  copy_m3_m3(dfj_dxi, dfk_dxj);
+
+  /* dfj_dxj == -(dfi_dxj + dfk_dxj) due to fj == -(fi + fk) */
+  sub_m3_m3m3(dfj_dxj, dfj_dxj, dfj_dxi);
+  sub_m3_m3m3(dfj_dxj, dfj_dxj, dfk_dxj);
+
+  /* add forces and jacobians to the solver data */
+  add_v3_v3(data->F[i], fi);
+  add_v3_v3(data->F[j], fj);
+  add_v3_v3(data->F[k], fk);
+
+  add_m3_m3m3(data->dFdX[i].m, data->dFdX[i].m, dfi_dxi);
+  add_m3_m3m3(data->dFdX[j].m, data->dFdX[j].m, dfj_dxj);
+  add_m3_m3m3(data->dFdX[k].m, data->dFdX[k].m, dfk_dxk);
+
+  add_m3_m3m3(data->dFdX[block_ij].m, data->dFdX[block_ij].m, dfj_dxi);
+  add_m3_m3m3(data->dFdX[block_jk].m, data->dFdX[block_jk].m, dfk_dxj);
+  add_m3_m3m3(data->dFdX[block_ik].m, data->dFdX[block_ik].m, dfk_dxi);
+#  endif
+
+  return true;
+}
+
+bool BPH_mass_spring_force_spring_goal(Implicit_Data *data,
+                                       int i,
+                                       const float goal_x[3],
+                                       const float goal_v[3],
+                                       float stiffness,
+                                       float damping)
+{
+  float root_goal_x[3], root_goal_v[3], extent[3], length, dir[3], vel[3];
+  float f[3], dfdx[3][3], dfdv[3][3];
+
+  /* goal is in world space */
+  world_to_root_v3(data, i, root_goal_x, goal_x);
+  world_to_root_v3(data, i, root_goal_v, goal_v);
+
+  sub_v3_v3v3(extent, root_goal_x, data->X[i]);
+  sub_v3_v3v3(vel, root_goal_v, data->V[i]);
+  length = normalize_v3_v3(dir, extent);
+
+  if (length > ALMOST_ZERO) {
+    mul_v3_v3fl(f, dir, stiffness * length);
+
+    // Ascher & Boxman, p.21: Damping only during elonglation
+    // something wrong with it...
+    madd_v3_v3fl(f, dir, damping * dot_v3v3(vel, dir));
+
+    dfdx_spring(dfdx, dir, length, 0.0f, stiffness);
+    dfdv_damp(dfdv, dir, damping);
+
+    add_v3_v3(data->F[i], f);
+    add_m3_m3m3(data->dFdX[i].m, data->dFdX[i].m, dfdx);
+    add_m3_m3m3(data->dFdV[i].m, data->dFdV[i].m, dfdv);
+
+    return true;
+  }
+  else {
+    return false;
+  }
+}
+
+#endif /* IMPLICIT_SOLVER_BLENDER */
diff --git a/source/blender/simulation/intern/implicit_eigen.cpp b/source/blender/simulation/intern/implicit_eigen.cpp
new file mode 100644
index 00000000000..58538c13116
--- /dev/null
+++ b/source/blender/simulation/intern/implicit_eigen.cpp
@@ -0,0 +1,1509 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ *
+ * The Original Code is Copyright (C) Blender Foundation
+ * All rights reserved.
+ */
+
+/** \file
+ * \ingroup bph
+ */
+
+#include "implicit.h"
+
+#ifdef IMPLICIT_SOLVER_EIGEN
+
+//#define USE_EIGEN_CORE
+#  define USE_EIGEN_CONSTRAINED_CG
+
+#  ifdef __GNUC__
+#    pragma GCC diagnostic push
+/* XXX suppress verbose warnings in eigen */
+//#  pragma GCC diagnostic ignored "-Wlogical-op"
+#  endif
+
+#  ifndef IMPLICIT_ENABLE_EIGEN_DEBUG
+#    ifdef NDEBUG
+#      define IMPLICIT_NDEBUG
+#    endif
+#    define NDEBUG
+#  endif
+
+#  include <Eigen/Sparse>
+#  include <Eigen/src/Core/util/DisableStupidWarnings.h>
+
+#  ifdef USE_EIGEN_CONSTRAINED_CG
+#    include <intern/ConstrainedConjugateGradient.h>
+#  endif
+
+#  ifndef IMPLICIT_ENABLE_EIGEN_DEBUG
+#    ifndef IMPLICIT_NDEBUG
+#      undef NDEBUG
+#    else
+#      undef IMPLICIT_NDEBUG
+#    endif
+#  endif
+
+#  ifdef __GNUC__
+#    pragma GCC diagnostic pop
+#  endif
+
+#  include "MEM_guardedalloc.h"
+
+extern "C" {
+#  include "DNA_meshdata_types.h"
+#  include "DNA_object_force_types.h"
+#  include "DNA_object_types.h"
+#  include "DNA_scene_types.h"
+#  include "DNA_texture_types.h"
+
+#  include "BLI_linklist.h"
+#  include "BLI_math.h"
+#  include "BLI_utildefines.h"
+
+#  include "BKE_cloth.h"
+#  include "BKE_collision.h"
+#  include "BKE_effect.h"
+#  include "BKE_global.h"
+
+#  include "BPH_mass_spring.h"
+}
+
+typedef float Scalar;
+
+static float I[3][3] = {{1, 0, 0}, {0, 1, 0}, {0, 0, 1}};
+
+/* slightly extended Eigen vector class
+ * with conversion to/from plain C float array
+ */
+class fVector : public Eigen::Vector3f {
+ public:
+  typedef float *ctype;
+
+  fVector()
+  {
+  }
+
+  fVector(const ctype &v)
+  {
+    for (int k = 0; k < 3; k++) {
+      coeffRef(k) = v[k];
+    }
+  }
+
+  fVector &operator=(const ctype &v)
+  {
+    for (int k = 0; k < 3; k++) {
+      coeffRef(k) = v[k];
+    }
+    return *this;
+  }
+
+  operator ctype()
+  {
+    return data();
+  }
+};
+
+/* slightly extended Eigen matrix class
+ * with conversion to/from plain C float array
+ */
+class fMatrix : public Eigen::Matrix3f {
+ public:
+  typedef float (*ctype)[3];
+
+  fMatrix()
+  {
+  }
+
+  fMatrix(const ctype &v)
+  {
+    for (int k = 0; k < 3; k++) {
+      for (int l = 0; l < 3; l++) {
+        coeffRef(l, k) = v[k][l];
+      }
+    }
+  }
+
+  fMatrix &operator=(const ctype &v)
+  {
+    for (int k = 0; k < 3; k++) {
+      for (int l = 0; l < 3; l++) {
+        coeffRef(l, k) = v[k][l];
+      }
+    }
+    return *this;
+  }
+
+  operator ctype()
+  {
+    return (ctype)data();
+  }
+};
+
+/* Extension of dense Eigen vectors,
+ * providing 3-float block access for blenlib math functions
+ */
+class lVector : public Eigen::VectorXf {
+ public:
+  typedef Eigen::VectorXf base_t;
+
+  lVector()
+  {
+  }
+
+  template<typename T> lVector &operator=(T rhs)
+  {
+    base_t::operator=(rhs);
+    return *this;
+  }
+
+  float *v3(int vertex)
+  {
+    return &coeffRef(3 * vertex);
+  }
+
+  const float *v3(int vertex) const
+  {
+    return &coeffRef(3 * vertex);
+  }
+};
+
+typedef Eigen::Triplet<Scalar> Triplet;
+typedef std::vector<Triplet> TripletList;
+
+typedef Eigen::SparseMatrix<Scalar> lMatrix;
+
+/* Constructor type that provides more convenient handling of Eigen triplets
+ * for efficient construction of sparse 3x3 block matrices.
+ * This should be used for building lMatrix instead of writing to such lMatrix directly (which is
+ * very inefficient). After all elements have been defined using the set() method, the actual
+ * matrix can be filled using construct().
+ */
+struct lMatrixCtor {
+  lMatrixCtor()
+  {
+  }
+
+  void reset()
+  {
+    m_trips.clear();
+  }
+
+  void reserve(int numverts)
+  {
+    /* reserve for diagonal entries */
+    m_trips.reserve(numverts * 9);
+  }
+
+  void add(int i, int j, const fMatrix &m)
+  {
+    i *= 3;
+    j *= 3;
+    for (int k = 0; k < 3; k++) {
+      for (int l = 0; l < 3; l++) {
+        m_trips.push_back(Triplet(i + k, j + l, m.coeff(l, k)));
+      }
+    }
+  }
+
+  void sub(int i, int j, const fMatrix &m)
+  {
+    i *= 3;
+    j *= 3;
+    for (int k = 0; k < 3; k++) {
+      for (int l = 0; l < 3; l++) {
+        m_trips.push_back(Triplet(i + k, j + l, -m.coeff(l, k)));
+      }
+    }
+  }
+
+  inline void construct(lMatrix &m)
+  {
+    m.setFromTriplets(m_trips.begin(), m_trips.end());
+    m_trips.clear();
+  }
+
+ private:
+  TripletList m_trips;
+};
+
+#  ifdef USE_EIGEN_CORE
+typedef Eigen::ConjugateGradient<lMatrix, Eigen::Lower, Eigen::DiagonalPreconditioner<Scalar>>
+    ConjugateGradient;
+#  endif
+#  ifdef USE_EIGEN_CONSTRAINED_CG
+typedef Eigen::ConstrainedConjugateGradient<lMatrix,
+                                            Eigen::Lower,
+                                            lMatrix,
+                                            Eigen::DiagonalPreconditioner<Scalar>>
+    ConstraintConjGrad;
+#  endif
+using Eigen::ComputationInfo;
+
+static void print_lvector(const lVector &v)
+{
+  for (int i = 0; i < v.rows(); i++) {
+    if (i > 0 && i % 3 == 0) {
+      printf("\n");
+    }
+
+    printf("%f,\n", v[i]);
+  }
+}
+
+static void print_lmatrix(const lMatrix &m)
+{
+  for (int j = 0; j < m.rows(); j++) {
+    if (j > 0 && j % 3 == 0) {
+      printf("\n");
+    }
+
+    for (int i = 0; i < m.cols(); i++) {
+      if (i > 0 && i % 3 == 0) {
+        printf("  ");
+      }
+
+      implicit_print_matrix_elem(m.coeff(j, i));
+    }
+    printf("\n");
+  }
+}
+
+BLI_INLINE void lMatrix_reserve_elems(lMatrix &m, int num)
+{
+  m.reserve(Eigen::VectorXi::Constant(m.cols(), num));
+}
+
+BLI_INLINE float *lVector_v3(lVector &v, int vertex)
+{
+  return v.data() + 3 * vertex;
+}
+
+BLI_INLINE const float *lVector_v3(const lVector &v, int vertex)
+{
+  return v.data() + 3 * vertex;
+}
+
+#  if 0
+BLI_INLINE void triplets_m3(TripletList &tlist, float m[3][3], int i, int j)
+{
+  i *= 3;
+  j *= 3;
+  for (int l = 0; l < 3; l++) {
+    for (int k = 0; k < 3; k++) {
+      tlist.push_back(Triplet(i + k, j + l, m[k][l]));
+    }
+  }
+}
+
+BLI_INLINE void triplets_m3fl(TripletList &tlist, float m[3][3], int i, int j, float factor)
+{
+  i *= 3;
+  j *= 3;
+  for (int l = 0; l < 3; l++) {
+    for (int k = 0; k < 3; k++) {
+      tlist.push_back(Triplet(i + k, j + l, m[k][l] * factor));
+    }
+  }
+}
+
+BLI_INLINE void lMatrix_add_triplets(lMatrix &r, const TripletList &tlist)
+{
+  lMatrix t(r.rows(), r.cols());
+  t.setFromTriplets(tlist.begin(), tlist.end());
+  r += t;
+}
+
+BLI_INLINE void lMatrix_madd_triplets(lMatrix &r, const TripletList &tlist, float f)
+{
+  lMatrix t(r.rows(), r.cols());
+  t.setFromTriplets(tlist.begin(), tlist.end());
+  r += f * t;
+}
+
+BLI_INLINE void lMatrix_sub_triplets(lMatrix &r, const TripletList &tlist)
+{
+  lMatrix t(r.rows(), r.cols());
+  t.setFromTriplets(tlist.begin(), tlist.end());
+  r -= t;
+}
+#  endif
+
+BLI_INLINE void outerproduct(float r[3][3], const float a[3], const float b[3])
+{
+  mul_v3_v3fl(r[0], a, b[0]);
+  mul_v3_v3fl(r[1], a, b[1]);
+  mul_v3_v3fl(r[2], a, b[2]);
+}
+
+BLI_INLINE void cross_m3_v3m3(float r[3][3], const float v[3], float m[3][3])
+{
+  cross_v3_v3v3(r[0], v, m[0]);
+  cross_v3_v3v3(r[1], v, m[1]);
+  cross_v3_v3v3(r[2], v, m[2]);
+}
+
+BLI_INLINE void cross_v3_identity(float r[3][3], const float v[3])
+{
+  r[0][0] = 0.0f;
+  r[1][0] = v[2];
+  r[2][0] = -v[1];
+  r[0][1] = -v[2];
+  r[1][1] = 0.0f;
+  r[2][1] = v[0];
+  r[0][2] = v[1];
+  r[1][2] = -v[0];
+  r[2][2] = 0.0f;
+}
+
+BLI_INLINE void madd_m3_m3fl(float r[3][3], float m[3][3], float f)
+{
+  r[0][0] += m[0][0] * f;
+  r[0][1] += m[0][1] * f;
+  r[0][2] += m[0][2] * f;
+  r[1][0] += m[1][0] * f;
+  r[1][1] += m[1][1] * f;
+  r[1][2] += m[1][2] * f;
+  r[2][0] += m[2][0] * f;
+  r[2][1] += m[2][1] * f;
+  r[2][2] += m[2][2] * f;
+}
+
+BLI_INLINE void madd_m3_m3m3fl(float r[3][3], float a[3][3], float b[3][3], float f)
+{
+  r[0][0] = a[0][0] + b[0][0] * f;
+  r[0][1] = a[0][1] + b[0][1] * f;
+  r[0][2] = a[0][2] + b[0][2] * f;
+  r[1][0] = a[1][0] + b[1][0] * f;
+  r[1][1] = a[1][1] + b[1][1] * f;
+  r[1][2] = a[1][2] + b[1][2] * f;
+  r[2][0] = a[2][0] + b[2][0] * f;
+  r[2][1] = a[2][1] + b[2][1] * f;
+  r[2][2] = a[2][2] + b[2][2] * f;
+}
+
+struct Implicit_Data {
+  typedef std::vector<fMatrix> fMatrixVector;
+
+  Implicit_Data(int numverts)
+  {
+    resize(numverts);
+  }
+
+  void resize(int numverts)
+  {
+    this->numverts = numverts;
+    int tot = 3 * numverts;
+
+    M.resize(tot, tot);
+    F.resize(tot);
+    dFdX.resize(tot, tot);
+    dFdV.resize(tot, tot);
+
+    tfm.resize(numverts, I);
+
+    X.resize(tot);
+    Xnew.resize(tot);
+    V.resize(tot);
+    Vnew.resize(tot);
+
+    A.resize(tot, tot);
+    B.resize(tot);
+
+    dV.resize(tot);
+    z.resize(tot);
+    S.resize(tot, tot);
+
+    iM.reserve(numverts);
+    idFdX.reserve(numverts);
+    idFdV.reserve(numverts);
+    iS.reserve(numverts);
+  }
+
+  int numverts;
+
+  /* inputs */
+  lMatrix M;          /* masses */
+  lVector F;          /* forces */
+  lMatrix dFdX, dFdV; /* force jacobians */
+
+  fMatrixVector tfm; /* local coordinate transform */
+
+  /* motion state data */
+  lVector X, Xnew; /* positions */
+  lVector V, Vnew; /* velocities */
+
+  /* internal solver data */
+  lVector B; /* B for A*dV = B */
+  lMatrix A; /* A for A*dV = B */
+
+  lVector dV; /* velocity change (solution of A*dV = B) */
+  lVector z;  /* target velocity in constrained directions */
+  lMatrix S;  /* filtering matrix for constraints */
+
+  /* temporary constructors */
+  lMatrixCtor iM;           /* masses */
+  lMatrixCtor idFdX, idFdV; /* force jacobians */
+  lMatrixCtor iS;           /* filtering matrix for constraints */
+};
+
+Implicit_Data *BPH_mass_spring_solver_create(int numverts, int numsprings)
+{
+  Implicit_Data *id = new Implicit_Data(numverts);
+  return id;
+}
+
+void BPH_mass_spring_solver_free(Implicit_Data *id)
+{
+  if (id) {
+    delete id;
+  }
+}
+
+int BPH_mass_spring_solver_numvert(Implicit_Data *id)
+{
+  if (id) {
+    return id->numverts;
+  }
+  else {
+    return 0;
+  }
+}
+
+/* ==== Transformation from/to root reference frames ==== */
+
+BLI_INLINE void world_to_root_v3(Implicit_Data *data, int index, float r[3], const float v[3])
+{
+  copy_v3_v3(r, v);
+  mul_transposed_m3_v3(data->tfm[index], r);
+}
+
+BLI_INLINE void root_to_world_v3(Implicit_Data *data, int index, float r[3], const float v[3])
+{
+  mul_v3_m3v3(r, data->tfm[index], v);
+}
+
+BLI_INLINE void world_to_root_m3(Implicit_Data *data, int index, float r[3][3], float m[3][3])
+{
+  float trot[3][3];
+  copy_m3_m3(trot, data->tfm[index]);
+  transpose_m3(trot);
+  mul_m3_m3m3(r, trot, m);
+}
+
+BLI_INLINE void root_to_world_m3(Implicit_Data *data, int index, float r[3][3], float m[3][3])
+{
+  mul_m3_m3m3(r, data->tfm[index], m);
+}
+
+/* ================================ */
+
+bool BPH_mass_spring_solve_velocities(Implicit_Data *data, float dt, ImplicitSolverResult *result)
+{
+#  ifdef USE_EIGEN_CORE
+  typedef ConjugateGradient solver_t;
+#  endif
+#  ifdef USE_EIGEN_CONSTRAINED_CG
+  typedef ConstraintConjGrad solver_t;
+#  endif
+
+  data->iM.construct(data->M);
+  data->idFdX.construct(data->dFdX);
+  data->idFdV.construct(data->dFdV);
+  data->iS.construct(data->S);
+
+  solver_t cg;
+  cg.setMaxIterations(100);
+  cg.setTolerance(0.01f);
+
+#  ifdef USE_EIGEN_CONSTRAINED_CG
+  cg.filter() = data->S;
+#  endif
+
+  data->A = data->M - dt * data->dFdV - dt * dt * data->dFdX;
+  cg.compute(data->A);
+
+  data->B = dt * data->F + dt * dt * data->dFdX * data->V;
+
+#  ifdef IMPLICIT_PRINT_SOLVER_INPUT_OUTPUT
+  printf("==== A ====\n");
+  print_lmatrix(id->A);
+  printf("==== z ====\n");
+  print_lvector(id->z);
+  printf("==== B ====\n");
+  print_lvector(id->B);
+  printf("==== S ====\n");
+  print_lmatrix(id->S);
+#  endif
+
+#  ifdef USE_EIGEN_CORE
+  data->dV = cg.solve(data->B);
+#  endif
+#  ifdef USE_EIGEN_CONSTRAINED_CG
+  data->dV = cg.solveWithGuess(data->B, data->z);
+#  endif
+
+#  ifdef IMPLICIT_PRINT_SOLVER_INPUT_OUTPUT
+  printf("==== dV ====\n");
+  print_lvector(id->dV);
+  printf("========\n");
+#  endif
+
+  data->Vnew = data->V + data->dV;
+
+  switch (cg.info()) {
+    case Eigen::Success:
+      result->status = BPH_SOLVER_SUCCESS;
+      break;
+    case Eigen::NoConvergence:
+      result->status = BPH_SOLVER_NO_CONVERGENCE;
+      break;
+    case Eigen::InvalidInput:
+      result->status = BPH_SOLVER_INVALID_INPUT;
+      break;
+    case Eigen::NumericalIssue:
+      result->status = BPH_SOLVER_NUMERICAL_ISSUE;
+      break;
+  }
+
+  result->iterations = cg.iterations();
+  result->error = cg.error();
+
+  return cg.info() == Eigen::Success;
+}
+
+bool BPH_mass_spring_solve_positions(Implicit_Data *data, float dt)
+{
+  data->Xnew = data->X + data->Vnew * dt;
+  return true;
+}
+
+/* ================================ */
+
+void BPH_mass_spring_apply_result(Implicit_Data *data)
+{
+  data->X = data->Xnew;
+  data->V = data->Vnew;
+}
+
+void BPH_mass_spring_set_vertex_mass(Implicit_Data *data, int index, float mass)
+{
+  float m[3][3];
+  copy_m3_m3(m, I);
+  mul_m3_fl(m, mass);
+  data->iM.add(index, index, m);
+}
+
+void BPH_mass_spring_set_rest_transform(Implicit_Data *data, int index, float tfm[3][3])
+{
+#  ifdef CLOTH_ROOT_FRAME
+  copy_m3_m3(data->tfm[index], tfm);
+#  else
+  unit_m3(data->tfm[index]);
+  (void)tfm;
+#  endif
+}
+
+void BPH_mass_spring_set_motion_state(Implicit_Data *data,
+                                      int index,
+                                      const float x[3],
+                                      const float v[3])
+{
+  world_to_root_v3(data, index, data->X.v3(index), x);
+  world_to_root_v3(data, index, data->V.v3(index), v);
+}
+
+void BPH_mass_spring_set_position(Implicit_Data *data, int index, const float x[3])
+{
+  world_to_root_v3(data, index, data->X.v3(index), x);
+}
+
+void BPH_mass_spring_set_velocity(Implicit_Data *data, int index, const float v[3])
+{
+  world_to_root_v3(data, index, data->V.v3(index), v);
+}
+
+void BPH_mass_spring_get_motion_state(struct Implicit_Data *data,
+                                      int index,
+                                      float x[3],
+                                      float v[3])
+{
+  if (x) {
+    root_to_world_v3(data, index, x, data->X.v3(index));
+  }
+  if (v) {
+    root_to_world_v3(data, index, v, data->V.v3(index));
+  }
+}
+
+void BPH_mass_spring_get_position(struct Implicit_Data *data, int index, float x[3])
+{
+  root_to_world_v3(data, index, x, data->X.v3(index));
+}
+
+void BPH_mass_spring_get_new_velocity(Implicit_Data *data, int index, float v[3])
+{
+  root_to_world_v3(data, index, v, data->V.v3(index));
+}
+
+void BPH_mass_spring_set_new_velocity(Implicit_Data *data, int index, const float v[3])
+{
+  world_to_root_v3(data, index, data->V.v3(index), v);
+}
+
+void BPH_mass_spring_clear_constraints(Implicit_Data *data)
+{
+  int numverts = data->numverts;
+  for (int i = 0; i < numverts; i++) {
+    data->iS.add(i, i, I);
+    zero_v3(data->z.v3(i));
+  }
+}
+
+void BPH_mass_spring_add_constraint_ndof0(Implicit_Data *data, int index, const float dV[3])
+{
+  data->iS.sub(index, index, I);
+
+  world_to_root_v3(data, index, data->z.v3(index), dV);
+}
+
+void BPH_mass_spring_add_constraint_ndof1(
+    Implicit_Data *data, int index, const float c1[3], const float c2[3], const float dV[3])
+{
+  float m[3][3], p[3], q[3], u[3], cmat[3][3];
+
+  world_to_root_v3(data, index, p, c1);
+  outerproduct(cmat, p, p);
+  copy_m3_m3(m, cmat);
+
+  world_to_root_v3(data, index, q, c2);
+  outerproduct(cmat, q, q);
+  add_m3_m3m3(m, m, cmat);
+
+  /* XXX not sure but multiplication should work here */
+  data->iS.sub(index, index, m);
+  //  mul_m3_m3m3(data->S[index].m, data->S[index].m, m);
+
+  world_to_root_v3(data, index, u, dV);
+  add_v3_v3(data->z.v3(index), u);
+}
+
+void BPH_mass_spring_add_constraint_ndof2(Implicit_Data *data,
+                                          int index,
+                                          const float c1[3],
+                                          const float dV[3])
+{
+  float m[3][3], p[3], u[3], cmat[3][3];
+
+  world_to_root_v3(data, index, p, c1);
+  outerproduct(cmat, p, p);
+  copy_m3_m3(m, cmat);
+
+  data->iS.sub(index, index, m);
+  //  mul_m3_m3m3(data->S[index].m, data->S[index].m, m);
+
+  world_to_root_v3(data, index, u, dV);
+  add_v3_v3(data->z.v3(index), u);
+}
+
+void BPH_mass_spring_clear_forces(Implicit_Data *data)
+{
+  data->F.setZero();
+  data->dFdX.setZero();
+  data->dFdV.setZero();
+}
+
+void BPH_mass_spring_force_reference_frame(Implicit_Data *data,
+                                           int index,
+                                           const float acceleration[3],
+                                           const float omega[3],
+                                           const float domega_dt[3],
+                                           float mass)
+{
+#  ifdef CLOTH_ROOT_FRAME
+  float acc[3], w[3], dwdt[3];
+  float f[3], dfdx[3][3], dfdv[3][3];
+  float euler[3], coriolis[3], centrifugal[3], rotvel[3];
+  float deuler[3][3], dcoriolis[3][3], dcentrifugal[3][3], drotvel[3][3];
+
+  world_to_root_v3(data, index, acc, acceleration);
+  world_to_root_v3(data, index, w, omega);
+  world_to_root_v3(data, index, dwdt, domega_dt);
+
+  cross_v3_v3v3(euler, dwdt, data->X.v3(index));
+  cross_v3_v3v3(coriolis, w, data->V.v3(index));
+  mul_v3_fl(coriolis, 2.0f);
+  cross_v3_v3v3(rotvel, w, data->X.v3(index));
+  cross_v3_v3v3(centrifugal, w, rotvel);
+
+  sub_v3_v3v3(f, acc, euler);
+  sub_v3_v3(f, coriolis);
+  sub_v3_v3(f, centrifugal);
+
+  mul_v3_fl(f, mass); /* F = m * a */
+
+  cross_v3_identity(deuler, dwdt);
+  cross_v3_identity(dcoriolis, w);
+  mul_m3_fl(dcoriolis, 2.0f);
+  cross_v3_identity(drotvel, w);
+  cross_m3_v3m3(dcentrifugal, w, drotvel);
+
+  add_m3_m3m3(dfdx, deuler, dcentrifugal);
+  negate_m3(dfdx);
+  mul_m3_fl(dfdx, mass);
+
+  copy_m3_m3(dfdv, dcoriolis);
+  negate_m3(dfdv);
+  mul_m3_fl(dfdv, mass);
+
+  add_v3_v3(data->F.v3(index), f);
+  data->idFdX.add(index, index, dfdx);
+  data->idFdV.add(index, index, dfdv);
+#  else
+  (void)data;
+  (void)index;
+  (void)acceleration;
+  (void)omega;
+  (void)domega_dt;
+#  endif
+}
+
+void BPH_mass_spring_force_gravity(Implicit_Data *data, int index, float mass, const float g[3])
+{
+  /* force = mass * acceleration (in this case: gravity) */
+  float f[3];
+  world_to_root_v3(data, index, f, g);
+  mul_v3_fl(f, mass);
+
+  add_v3_v3(data->F.v3(index), f);
+}
+
+void BPH_mass_spring_force_drag(Implicit_Data *data, float drag)
+{
+  int numverts = data->numverts;
+  for (int i = 0; i < numverts; i++) {
+    float tmp[3][3];
+
+    /* NB: uses root space velocity, no need to transform */
+    madd_v3_v3fl(data->F.v3(i), data->V.v3(i), -drag);
+
+    copy_m3_m3(tmp, I);
+    mul_m3_fl(tmp, -drag);
+    data->idFdV.add(i, i, tmp);
+  }
+}
+
+void BPH_mass_spring_force_extern(
+    struct Implicit_Data *data, int i, const float f[3], float dfdx[3][3], float dfdv[3][3])
+{
+  float tf[3], tdfdx[3][3], tdfdv[3][3];
+  world_to_root_v3(data, i, tf, f);
+  world_to_root_m3(data, i, tdfdx, dfdx);
+  world_to_root_m3(data, i, tdfdv, dfdv);
+
+  add_v3_v3(data->F.v3(i), tf);
+  data->idFdX.add(i, i, tdfdx);
+  data->idFdV.add(i, i, tdfdv);
+}
+
+static float calc_nor_area_tri(float nor[3],
+                               const float v1[3],
+                               const float v2[3],
+                               const float v3[3])
+{
+  float n1[3], n2[3];
+
+  sub_v3_v3v3(n1, v1, v2);
+  sub_v3_v3v3(n2, v2, v3);
+
+  cross_v3_v3v3(nor, n1, n2);
+  return normalize_v3(nor) / 2.0f;
+}
+
+/* XXX does not support force jacobians yet,
+ * since the effector system does not provide them either. */
+void BPH_mass_spring_force_face_wind(
+    Implicit_Data *data, int v1, int v2, int v3, const float (*winvec)[3])
+{
+  const float effector_scale = 0.02f;
+  float win[3], nor[3], area;
+  float factor;
+
+  // calculate face normal and area
+  area = calc_nor_area_tri(nor, data->X.v3(v1), data->X.v3(v2), data->X.v3(v3));
+  factor = effector_scale * area / 3.0f;
+
+  world_to_root_v3(data, v1, win, winvec[v1]);
+  madd_v3_v3fl(data->F.v3(v1), nor, factor * dot_v3v3(win, nor));
+
+  world_to_root_v3(data, v2, win, winvec[v2]);
+  madd_v3_v3fl(data->F.v3(v2), nor, factor * dot_v3v3(win, nor));
+
+  world_to_root_v3(data, v3, win, winvec[v3]);
+  madd_v3_v3fl(data->F.v3(v3), nor, factor * dot_v3v3(win, nor));
+}
+
+void BPH_mass_spring_force_edge_wind(Implicit_Data *data, int v1, int v2, const float (*winvec)[3])
+{
+  const float effector_scale = 0.01;
+  float win[3], dir[3], nor[3], length;
+
+  sub_v3_v3v3(dir, data->X.v3(v1), data->X.v3(v2));
+  length = normalize_v3(dir);
+
+  world_to_root_v3(data, v1, win, winvec[v1]);
+  madd_v3_v3v3fl(nor, win, dir, -dot_v3v3(win, dir));
+  madd_v3_v3fl(data->F.v3(v1), nor, effector_scale * length);
+
+  world_to_root_v3(data, v2, win, winvec[v2]);
+  madd_v3_v3v3fl(nor, win, dir, -dot_v3v3(win, dir));
+  madd_v3_v3fl(data->F.v3(v2), nor, effector_scale * length);
+}
+
+BLI_INLINE void dfdx_spring(float to[3][3], const float dir[3], float length, float L, float k)
+{
+  /* dir is unit length direction, rest is spring's restlength, k is spring constant. */
+  // return ((I - outerprod(dir, dir)) * Min(1.0f, rest / length) - I) * -k;
+  outerproduct(to, dir, dir);
+  sub_m3_m3m3(to, I, to);
+
+  mul_m3_fl(to, (L / length));
+  sub_m3_m3m3(to, to, I);
+  mul_m3_fl(to, k);
+}
+
+/* unused */
+#  if 0
+BLI_INLINE void dfdx_damp(float to[3][3],
+                          const float dir[3],
+                          float length,
+                          const float vel[3],
+                          float rest,
+                          float damping)
+{
+  // inner spring damping   vel is the relative velocity  of the endpoints.
+  //  return (I-outerprod(dir, dir)) * (-damping * -(dot(dir, vel)/Max(length, rest)));
+  mul_fvectorT_fvector(to, dir, dir);
+  sub_fmatrix_fmatrix(to, I, to);
+  mul_fmatrix_S(to, (-damping * -(dot_v3v3(dir, vel) / MAX2(length, rest))));
+}
+#  endif
+
+BLI_INLINE void dfdv_damp(float to[3][3], const float dir[3], float damping)
+{
+  // derivative of force wrt velocity
+  outerproduct(to, dir, dir);
+  mul_m3_fl(to, -damping);
+}
+
+BLI_INLINE float fb(float length, float L)
+{
+  float x = length / L;
+  return (-11.541f * powf(x, 4) + 34.193f * powf(x, 3) - 39.083f * powf(x, 2) + 23.116f * x -
+          9.713f);
+}
+
+BLI_INLINE float fbderiv(float length, float L)
+{
+  float x = length / L;
+
+  return (-46.164f * powf(x, 3) + 102.579f * powf(x, 2) - 78.166f * x + 23.116f);
+}
+
+BLI_INLINE float fbstar(float length, float L, float kb, float cb)
+{
+  float tempfb_fl = kb * fb(length, L);
+  float fbstar_fl = cb * (length - L);
+
+  if (tempfb_fl < fbstar_fl) {
+    return fbstar_fl;
+  }
+  else {
+    return tempfb_fl;
+  }
+}
+
+// function to calculae bending spring force (taken from Choi & Co)
+BLI_INLINE float fbstar_jacobi(float length, float L, float kb, float cb)
+{
+  float tempfb_fl = kb * fb(length, L);
+  float fbstar_fl = cb * (length - L);
+
+  if (tempfb_fl < fbstar_fl) {
+    return -cb;
+  }
+  else {
+    return -kb * fbderiv(length, L);
+  }
+}
+
+/* calculate elonglation */
+BLI_INLINE bool spring_length(Implicit_Data *data,
+                              int i,
+                              int j,
+                              float r_extent[3],
+                              float r_dir[3],
+                              float *r_length,
+                              float r_vel[3])
+{
+  sub_v3_v3v3(r_extent, data->X.v3(j), data->X.v3(i));
+  sub_v3_v3v3(r_vel, data->V.v3(j), data->V.v3(i));
+  *r_length = len_v3(r_extent);
+
+  if (*r_length > ALMOST_ZERO) {
+#  if 0
+    if (length > L) {
+      if ((clmd->sim_parms->flags & CSIMSETT_FLAG_TEARING_ENABLED) &&
+          (((length - L) * 100.0f / L) > clmd->sim_parms->maxspringlen)) {
+        // cut spring!
+        s->flags |= CSPRING_FLAG_DEACTIVATE;
+        return false;
+      }
+    }
+#  endif
+    mul_v3_v3fl(r_dir, r_extent, 1.0f / (*r_length));
+  }
+  else {
+    zero_v3(r_dir);
+  }
+
+  return true;
+}
+
+BLI_INLINE void apply_spring(
+    Implicit_Data *data, int i, int j, const float f[3], float dfdx[3][3], float dfdv[3][3])
+{
+  add_v3_v3(data->F.v3(i), f);
+  sub_v3_v3(data->F.v3(j), f);
+
+  data->idFdX.add(i, i, dfdx);
+  data->idFdX.add(j, j, dfdx);
+  data->idFdX.sub(i, j, dfdx);
+  data->idFdX.sub(j, i, dfdx);
+
+  data->idFdV.add(i, i, dfdv);
+  data->idFdV.add(j, j, dfdv);
+  data->idFdV.sub(i, j, dfdv);
+  data->idFdV.sub(j, i, dfdv);
+}
+
+bool BPH_mass_spring_force_spring_linear(Implicit_Data *data,
+                                         int i,
+                                         int j,
+                                         float restlen,
+                                         float stiffness,
+                                         float damping,
+                                         bool no_compress,
+                                         float clamp_force,
+                                         float r_f[3],
+                                         float r_dfdx[3][3],
+                                         float r_dfdv[3][3])
+{
+  float extent[3], length, dir[3], vel[3];
+
+  // calculate elonglation
+  spring_length(data, i, j, extent, dir, &length, vel);
+
+  if (length > restlen || no_compress) {
+    float stretch_force, f[3], dfdx[3][3], dfdv[3][3];
+
+    stretch_force = stiffness * (length - restlen);
+    if (clamp_force > 0.0f && stretch_force > clamp_force) {
+      stretch_force = clamp_force;
+    }
+    mul_v3_v3fl(f, dir, stretch_force);
+
+    // Ascher & Boxman, p.21: Damping only during elonglation
+    // something wrong with it...
+    madd_v3_v3fl(f, dir, damping * dot_v3v3(vel, dir));
+
+    dfdx_spring(dfdx, dir, length, restlen, stiffness);
+    dfdv_damp(dfdv, dir, damping);
+
+    apply_spring(data, i, j, f, dfdx, dfdv);
+
+    if (r_f) {
+      copy_v3_v3(r_f, f);
+    }
+    if (r_dfdx) {
+      copy_m3_m3(r_dfdx, dfdx);
+    }
+    if (r_dfdv) {
+      copy_m3_m3(r_dfdv, dfdv);
+    }
+
+    return true;
+  }
+  else {
+    if (r_f) {
+      zero_v3(r_f);
+    }
+    if (r_dfdx) {
+      zero_m3(r_dfdx);
+    }
+    if (r_dfdv) {
+      zero_m3(r_dfdv);
+    }
+
+    return false;
+  }
+}
+
+/* See "Stable but Responsive Cloth" (Choi, Ko 2005) */
+bool BPH_mass_spring_force_spring_bending(Implicit_Data *data,
+                                          int i,
+                                          int j,
+                                          float restlen,
+                                          float kb,
+                                          float cb,
+                                          float r_f[3],
+                                          float r_dfdx[3][3],
+                                          float r_dfdv[3][3])
+{
+  float extent[3], length, dir[3], vel[3];
+
+  // calculate elonglation
+  spring_length(data, i, j, extent, dir, &length, vel);
+
+  if (length < restlen) {
+    float f[3], dfdx[3][3], dfdv[3][3];
+
+    mul_v3_v3fl(f, dir, fbstar(length, restlen, kb, cb));
+
+    outerproduct(dfdx, dir, dir);
+    mul_m3_fl(dfdx, fbstar_jacobi(length, restlen, kb, cb));
+
+    /* XXX damping not supported */
+    zero_m3(dfdv);
+
+    apply_spring(data, i, j, f, dfdx, dfdv);
+
+    if (r_f) {
+      copy_v3_v3(r_f, f);
+    }
+    if (r_dfdx) {
+      copy_m3_m3(r_dfdx, dfdx);
+    }
+    if (r_dfdv) {
+      copy_m3_m3(r_dfdv, dfdv);
+    }
+
+    return true;
+  }
+  else {
+    if (r_f) {
+      zero_v3(r_f);
+    }
+    if (r_dfdx) {
+      zero_m3(r_dfdx);
+    }
+    if (r_dfdv) {
+      zero_m3(r_dfdv);
+    }
+
+    return false;
+  }
+}
+
+/* Jacobian of a direction vector.
+ * Basically the part of the differential orthogonal to the direction,
+ * inversely proportional to the length of the edge.
+ *
+ * dD_ij/dx_i = -dD_ij/dx_j = (D_ij * D_ij^T - I) / len_ij
+ */
+BLI_INLINE void spring_grad_dir(
+    Implicit_Data *data, int i, int j, float edge[3], float dir[3], float grad_dir[3][3])
+{
+  float length;
+
+  sub_v3_v3v3(edge, data->X.v3(j), data->X.v3(i));
+  length = normalize_v3_v3(dir, edge);
+
+  if (length > ALMOST_ZERO) {
+    outerproduct(grad_dir, dir, dir);
+    sub_m3_m3m3(grad_dir, I, grad_dir);
+    mul_m3_fl(grad_dir, 1.0f / length);
+  }
+  else {
+    zero_m3(grad_dir);
+  }
+}
+
+BLI_INLINE void spring_angbend_forces(Implicit_Data *data,
+                                      int i,
+                                      int j,
+                                      int k,
+                                      const float goal[3],
+                                      float stiffness,
+                                      float damping,
+                                      int q,
+                                      const float dx[3],
+                                      const float dv[3],
+                                      float r_f[3])
+{
+  float edge_ij[3], dir_ij[3];
+  float edge_jk[3], dir_jk[3];
+  float vel_ij[3], vel_jk[3], vel_ortho[3];
+  float f_bend[3], f_damp[3];
+  float fk[3];
+  float dist[3];
+
+  zero_v3(fk);
+
+  sub_v3_v3v3(edge_ij, data->X.v3(j), data->X.v3(i));
+  if (q == i) {
+    sub_v3_v3(edge_ij, dx);
+  }
+  if (q == j) {
+    add_v3_v3(edge_ij, dx);
+  }
+  normalize_v3_v3(dir_ij, edge_ij);
+
+  sub_v3_v3v3(edge_jk, data->X.v3(k), data->X.v3(j));
+  if (q == j) {
+    sub_v3_v3(edge_jk, dx);
+  }
+  if (q == k) {
+    add_v3_v3(edge_jk, dx);
+  }
+  normalize_v3_v3(dir_jk, edge_jk);
+
+  sub_v3_v3v3(vel_ij, data->V.v3(j), data->V.v3(i));
+  if (q == i) {
+    sub_v3_v3(vel_ij, dv);
+  }
+  if (q == j) {
+    add_v3_v3(vel_ij, dv);
+  }
+
+  sub_v3_v3v3(vel_jk, data->V.v3(k), data->V.v3(j));
+  if (q == j) {
+    sub_v3_v3(vel_jk, dv);
+  }
+  if (q == k) {
+    add_v3_v3(vel_jk, dv);
+  }
+
+  /* bending force */
+  sub_v3_v3v3(dist, goal, edge_jk);
+  mul_v3_v3fl(f_bend, dist, stiffness);
+
+  add_v3_v3(fk, f_bend);
+
+  /* damping force */
+  madd_v3_v3v3fl(vel_ortho, vel_jk, dir_jk, -dot_v3v3(vel_jk, dir_jk));
+  mul_v3_v3fl(f_damp, vel_ortho, damping);
+
+  sub_v3_v3(fk, f_damp);
+
+  copy_v3_v3(r_f, fk);
+}
+
+/* Finite Differences method for estimating the jacobian of the force */
+BLI_INLINE void spring_angbend_estimate_dfdx(Implicit_Data *data,
+                                             int i,
+                                             int j,
+                                             int k,
+                                             const float goal[3],
+                                             float stiffness,
+                                             float damping,
+                                             int q,
+                                             float dfdx[3][3])
+{
+  const float delta = 0.00001f;  // TODO find a good heuristic for this
+  float dvec_null[3][3], dvec_pos[3][3], dvec_neg[3][3];
+  float f[3];
+  int a, b;
+
+  zero_m3(dvec_null);
+  unit_m3(dvec_pos);
+  mul_m3_fl(dvec_pos, delta * 0.5f);
+  copy_m3_m3(dvec_neg, dvec_pos);
+  negate_m3(dvec_neg);
+
+  /* XXX TODO offset targets to account for position dependency */
+
+  for (a = 0; a < 3; a++) {
+    spring_angbend_forces(
+        data, i, j, k, goal, stiffness, damping, q, dvec_pos[a], dvec_null[a], f);
+    copy_v3_v3(dfdx[a], f);
+
+    spring_angbend_forces(
+        data, i, j, k, goal, stiffness, damping, q, dvec_neg[a], dvec_null[a], f);
+    sub_v3_v3(dfdx[a], f);
+
+    for (b = 0; b < 3; b++) {
+      dfdx[a][b] /= delta;
+    }
+  }
+}
+
+/* Finite Differences method for estimating the jacobian of the force */
+BLI_INLINE void spring_angbend_estimate_dfdv(Implicit_Data *data,
+                                             int i,
+                                             int j,
+                                             int k,
+                                             const float goal[3],
+                                             float stiffness,
+                                             float damping,
+                                             int q,
+                                             float dfdv[3][3])
+{
+  const float delta = 0.00001f;  // TODO find a good heuristic for this
+  float dvec_null[3][3], dvec_pos[3][3], dvec_neg[3][3];
+  float f[3];
+  int a, b;
+
+  zero_m3(dvec_null);
+  unit_m3(dvec_pos);
+  mul_m3_fl(dvec_pos, delta * 0.5f);
+  copy_m3_m3(dvec_neg, dvec_pos);
+  negate_m3(dvec_neg);
+
+  /* XXX TODO offset targets to account for position dependency */
+
+  for (a = 0; a < 3; a++) {
+    spring_angbend_forces(
+        data, i, j, k, goal, stiffness, damping, q, dvec_null[a], dvec_pos[a], f);
+    copy_v3_v3(dfdv[a], f);
+
+    spring_angbend_forces(
+        data, i, j, k, goal, stiffness, damping, q, dvec_null[a], dvec_neg[a], f);
+    sub_v3_v3(dfdv[a], f);
+
+    for (b = 0; b < 3; b++) {
+      dfdv[a][b] /= delta;
+    }
+  }
+}
+
+/* Angular spring that pulls the vertex toward the local target
+ * See "Artistic Simulation of Curly Hair" (Pixar technical memo #12-03a)
+ */
+bool BPH_mass_spring_force_spring_bending_angular(Implicit_Data *data,
+                                                  int i,
+                                                  int j,
+                                                  int k,
+                                                  const float target[3],
+                                                  float stiffness,
+                                                  float damping)
+{
+  float goal[3];
+  float fj[3], fk[3];
+  float dfj_dxi[3][3], dfj_dxj[3][3], dfk_dxi[3][3], dfk_dxj[3][3], dfk_dxk[3][3];
+  float dfj_dvi[3][3], dfj_dvj[3][3], dfk_dvi[3][3], dfk_dvj[3][3], dfk_dvk[3][3];
+
+  const float vecnull[3] = {0.0f, 0.0f, 0.0f};
+
+  world_to_root_v3(data, j, goal, target);
+
+  spring_angbend_forces(data, i, j, k, goal, stiffness, damping, k, vecnull, vecnull, fk);
+  negate_v3_v3(fj, fk); /* counterforce */
+
+  spring_angbend_estimate_dfdx(data, i, j, k, goal, stiffness, damping, i, dfk_dxi);
+  spring_angbend_estimate_dfdx(data, i, j, k, goal, stiffness, damping, j, dfk_dxj);
+  spring_angbend_estimate_dfdx(data, i, j, k, goal, stiffness, damping, k, dfk_dxk);
+  copy_m3_m3(dfj_dxi, dfk_dxi);
+  negate_m3(dfj_dxi);
+  copy_m3_m3(dfj_dxj, dfk_dxj);
+  negate_m3(dfj_dxj);
+
+  spring_angbend_estimate_dfdv(data, i, j, k, goal, stiffness, damping, i, dfk_dvi);
+  spring_angbend_estimate_dfdv(data, i, j, k, goal, stiffness, damping, j, dfk_dvj);
+  spring_angbend_estimate_dfdv(data, i, j, k, goal, stiffness, damping, k, dfk_dvk);
+  copy_m3_m3(dfj_dvi, dfk_dvi);
+  negate_m3(dfj_dvi);
+  copy_m3_m3(dfj_dvj, dfk_dvj);
+  negate_m3(dfj_dvj);
+
+  /* add forces and jacobians to the solver data */
+
+  add_v3_v3(data->F.v3(j), fj);
+  add_v3_v3(data->F.v3(k), fk);
+
+  data->idFdX.add(j, j, dfj_dxj);
+  data->idFdX.add(k, k, dfk_dxk);
+
+  data->idFdX.add(i, j, dfj_dxi);
+  data->idFdX.add(j, i, dfj_dxi);
+  data->idFdX.add(j, k, dfk_dxj);
+  data->idFdX.add(k, j, dfk_dxj);
+  data->idFdX.add(i, k, dfk_dxi);
+  data->idFdX.add(k, i, dfk_dxi);
+
+  data->idFdV.add(j, j, dfj_dvj);
+  data->idFdV.add(k, k, dfk_dvk);
+
+  data->idFdV.add(i, j, dfj_dvi);
+  data->idFdV.add(j, i, dfj_dvi);
+  data->idFdV.add(j, k, dfk_dvj);
+  data->idFdV.add(k, j, dfk_dvj);
+  data->idFdV.add(i, k, dfk_dvi);
+  data->idFdV.add(k, i, dfk_dvi);
+
+  /* XXX analytical calculation of derivatives below is incorrect.
+   * This proved to be difficult, but for now just using the finite difference method for
+   * estimating the jacobians should be sufficient.
+   */
+#  if 0
+  float edge_ij[3], dir_ij[3], grad_dir_ij[3][3];
+  float edge_jk[3], dir_jk[3], grad_dir_jk[3][3];
+  float dist[3], vel_jk[3], vel_jk_ortho[3], projvel[3];
+  float target[3];
+  float tmp[3][3];
+  float fi[3], fj[3], fk[3];
+  float dfi_dxi[3][3], dfj_dxi[3][3], dfj_dxj[3][3], dfk_dxi[3][3], dfk_dxj[3][3], dfk_dxk[3][3];
+  float dfdvi[3][3];
+
+  // TESTING
+  damping = 0.0f;
+
+  zero_v3(fi);
+  zero_v3(fj);
+  zero_v3(fk);
+  zero_m3(dfi_dxi);
+  zero_m3(dfj_dxi);
+  zero_m3(dfk_dxi);
+  zero_m3(dfk_dxj);
+  zero_m3(dfk_dxk);
+
+  /* jacobian of direction vectors */
+  spring_grad_dir(data, i, j, edge_ij, dir_ij, grad_dir_ij);
+  spring_grad_dir(data, j, k, edge_jk, dir_jk, grad_dir_jk);
+
+  sub_v3_v3v3(vel_jk, data->V[k], data->V[j]);
+
+  /* bending force */
+  mul_v3_v3fl(target, dir_ij, restlen);
+  sub_v3_v3v3(dist, target, edge_jk);
+  mul_v3_v3fl(fk, dist, stiffness);
+
+  /* damping force */
+  madd_v3_v3v3fl(vel_jk_ortho, vel_jk, dir_jk, -dot_v3v3(vel_jk, dir_jk));
+  madd_v3_v3fl(fk, vel_jk_ortho, damping);
+
+  /* XXX this only holds true as long as we assume straight rest shape!
+   * eventually will become a bit more involved since the opposite segment
+   * gets its own target, under condition of having equal torque on both sides.
+   */
+  copy_v3_v3(fi, fk);
+
+  /* counterforce on the middle point */
+  sub_v3_v3(fj, fi);
+  sub_v3_v3(fj, fk);
+
+  /* === derivatives === */
+
+  madd_m3_m3fl(dfk_dxi, grad_dir_ij, stiffness * restlen);
+
+  madd_m3_m3fl(dfk_dxj, grad_dir_ij, -stiffness * restlen);
+  madd_m3_m3fl(dfk_dxj, I, stiffness);
+
+  madd_m3_m3fl(dfk_dxk, I, -stiffness);
+
+  copy_m3_m3(dfi_dxi, dfk_dxk);
+  negate_m3(dfi_dxi);
+
+  /* dfj_dfi == dfi_dfj due to symmetry,
+   * dfi_dfj == dfk_dfj due to fi == fk
+   * XXX see comment above on future bent rest shapes
+   */
+  copy_m3_m3(dfj_dxi, dfk_dxj);
+
+  /* dfj_dxj == -(dfi_dxj + dfk_dxj) due to fj == -(fi + fk) */
+  sub_m3_m3m3(dfj_dxj, dfj_dxj, dfj_dxi);
+  sub_m3_m3m3(dfj_dxj, dfj_dxj, dfk_dxj);
+
+  /* add forces and jacobians to the solver data */
+  add_v3_v3(data->F[i], fi);
+  add_v3_v3(data->F[j], fj);
+  add_v3_v3(data->F[k], fk);
+
+  add_m3_m3m3(data->dFdX[i].m, data->dFdX[i].m, dfi_dxi);
+  add_m3_m3m3(data->dFdX[j].m, data->dFdX[j].m, dfj_dxj);
+  add_m3_m3m3(data->dFdX[k].m, data->dFdX[k].m, dfk_dxk);
+
+  add_m3_m3m3(data->dFdX[block_ij].m, data->dFdX[block_ij].m, dfj_dxi);
+  add_m3_m3m3(data->dFdX[block_jk].m, data->dFdX[block_jk].m, dfk_dxj);
+  add_m3_m3m3(data->dFdX[block_ik].m, data->dFdX[block_ik].m, dfk_dxi);
+#  endif
+
+  return true;
+}
+
+bool BPH_mass_spring_force_spring_goal(Implicit_Data *data,
+                                       int i,
+                                       const float goal_x[3],
+                                       const float goal_v[3],
+                                       float stiffness,
+                                       float damping,
+                                       float r_f[3],
+                                       float r_dfdx[3][3],
+                                       float r_dfdv[3][3])
+{
+  float root_goal_x[3], root_goal_v[3], extent[3], length, dir[3], vel[3];
+  float f[3], dfdx[3][3], dfdv[3][3];
+
+  /* goal is in world space */
+  world_to_root_v3(data, i, root_goal_x, goal_x);
+  world_to_root_v3(data, i, root_goal_v, goal_v);
+
+  sub_v3_v3v3(extent, root_goal_x, data->X.v3(i));
+  sub_v3_v3v3(vel, root_goal_v, data->V.v3(i));
+  length = normalize_v3_v3(dir, extent);
+
+  if (length > ALMOST_ZERO) {
+    mul_v3_v3fl(f, dir, stiffness * length);
+
+    // Ascher & Boxman, p.21: Damping only during elonglation
+    // something wrong with it...
+    madd_v3_v3fl(f, dir, damping * dot_v3v3(vel, dir));
+
+    dfdx_spring(dfdx, dir, length, 0.0f, stiffness);
+    dfdv_damp(dfdv, dir, damping);
+
+    add_v3_v3(data->F.v3(i), f);
+    data->idFdX.add(i, i, dfdx);
+    data->idFdV.add(i, i, dfdv);
+
+    if (r_f) {
+      copy_v3_v3(r_f, f);
+    }
+    if (r_dfdx) {
+      copy_m3_m3(r_dfdx, dfdx);
+    }
+    if (r_dfdv) {
+      copy_m3_m3(r_dfdv, dfdv);
+    }
+
+    return true;
+  }
+  else {
+    if (r_f) {
+      zero_v3(r_f);
+    }
+    if (r_dfdx) {
+      zero_m3(r_dfdx);
+    }
+    if (r_dfdv) {
+      zero_m3(r_dfdv);
+    }
+
+    return false;
+  }
+}
+
+#endif /* IMPLICIT_SOLVER_EIGEN */
author	Jacques Lucke <jacques@blender.org>	2020-07-16 15:27:47 +0300
committer	Jacques Lucke <jacques@blender.org>	2020-07-16 15:28:38 +0300
commit	9363c4de0635394548fa2eb8d205581313029775 (patch)
tree	18f0a4be6ed78a8c8f8f5a33d575f2465ed95ee9 /source/blender/simulation
parent	66b48ad8fb3bea4c0b643fe23fd25e70843067cd (diff)