Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2014-10-21 13:37:29 +0400
committerSergey Sharybin <sergey.vfx@gmail.com>2014-10-22 14:21:11 +0400
commitba7e504448fc7aa2abedfdfd26a18eb5442a68e7 (patch)
tree3de1ebff0e815c157358af8691950ea5e02e964d
parentdfc4de036e39f74aa0a3b1e1734a02f4a9530a78 (diff)
Meshdeform: Use SSE intrinsics in meshdeform_dynamic_bind()
Quite straightforward change, don't think some extra explanation is needed. This gives about 15% speedup of the modifier evaluation on my laptop. Reviewers: campbellbarton Differential Revision: https://developer.blender.org/D836
-rw-r--r--source/blender/modifiers/intern/MOD_meshdeform.c33
1 files changed, 30 insertions, 3 deletions
diff --git a/source/blender/modifiers/intern/MOD_meshdeform.c b/source/blender/modifiers/intern/MOD_meshdeform.c
index bd6ce26103c..c3cbc0761e9 100644
--- a/source/blender/modifiers/intern/MOD_meshdeform.c
+++ b/source/blender/modifiers/intern/MOD_meshdeform.c
@@ -54,6 +54,9 @@
#include "MOD_util.h"
+#ifdef __SSE2__
+# include <emmintrin.h>
+#endif
static void initData(ModifierData *md)
{
@@ -134,11 +137,15 @@ static float meshdeform_dynamic_bind(MeshDeformModifierData *mmd, float (*dco)[3
{
MDefCell *cell;
MDefInfluence *inf;
- float gridvec[3], dvec[3], ivec[3], co[3], wx, wy, wz;
+ float gridvec[3], dvec[3], ivec[3], wx, wy, wz;
float weight, cageweight, totweight, *cageco;
int i, j, a, x, y, z, size;
+#ifdef __SSE2__
+ __m128 co = _mm_setzero_ps();
+#else
+ float co[3] = {0.0f, 0.0f, 0.0f};
+#endif
- zero_v3(co);
totweight = 0.0f;
size = mmd->dyngridsize;
@@ -170,14 +177,30 @@ static float meshdeform_dynamic_bind(MeshDeformModifierData *mmd, float (*dco)[3
for (j = 0; j < cell->totinfluence; j++, inf++) {
cageco = dco[inf->vertex];
cageweight = weight * inf->weight;
+#ifdef __SSE2__
+ {
+ __m128 cageweight_r = _mm_set1_ps(cageweight);
+ /* This will load one extra element, this is ok because
+ * we ignore that part of reigister anyway.
+ */
+ __m128 cageco_r = _mm_loadu_ps(cageco);
+ co = _mm_add_ps(co,
+ _mm_mul_ps(cageco_r, cageweight_r));
+ }
+#else
co[0] += cageweight * cageco[0];
co[1] += cageweight * cageco[1];
co[2] += cageweight * cageco[2];
+#endif
totweight += cageweight;
}
}
+#ifdef __SSE2__
+ copy_v3_v3(vec, (float*)&co);
+#else
copy_v3_v3(vec, co);
+#endif
return totweight;
}
@@ -344,7 +367,11 @@ static void meshdeformModifier_do(
cagedm->getVertCos(cagedm, cagecos);
bindcagecos = (float(*)[3])mmd->bindcagecos;
- dco = MEM_callocN(sizeof(*dco) * totcagevert, "MDefDco");
+ /* We allocate 1 element extra to make it possible to
+ * load the values to SSE registers, which are float4.
+ */
+ dco = MEM_callocN(sizeof(*dco) * (totcagevert + 1), "MDefDco");
+ zero_v3(dco[totcagevert]);
for (a = 0; a < totcagevert; a++) {
/* get cage vertex in world space with binding transform */
copy_v3_v3(co, cagecos[a]);