diff options
-rw-r--r-- | source/blender/blenkernel/CMakeLists.txt | 1 | ||||
-rw-r--r-- | source/blender/blenkernel/intern/lattice_deform.c | 151 | ||||
-rw-r--r-- | source/blender/blenkernel/intern/lattice_deform_test.cc | 138 |
3 files changed, 214 insertions, 76 deletions
diff --git a/source/blender/blenkernel/CMakeLists.txt b/source/blender/blenkernel/CMakeLists.txt index 0fbc8c4c229..f6df3f1bb62 100644 --- a/source/blender/blenkernel/CMakeLists.txt +++ b/source/blender/blenkernel/CMakeLists.txt @@ -718,6 +718,7 @@ if(WITH_GTESTS) set(TEST_SRC intern/armature_test.cc intern/fcurve_test.cc + intern/lattice_deform_test.cc ) set(TEST_INC ../editors/include diff --git a/source/blender/blenkernel/intern/lattice_deform.c b/source/blender/blenkernel/intern/lattice_deform.c index 919093f3630..43965813b84 100644 --- a/source/blender/blenkernel/intern/lattice_deform.c +++ b/source/blender/blenkernel/intern/lattice_deform.c @@ -49,14 +49,24 @@ #include "BKE_deform.h" +#ifdef __SSE2__ +# include <emmintrin.h> +#endif + /* -------------------------------------------------------------------- */ /** \name Lattice Deform API * \{ */ typedef struct LatticeDeformData { - const Object *object; - float *latticedata; + /* Convert from object space to deform space */ float latmat[4][4]; + /* Cached reference to the lattice to use for evaluation. When in edit mode this attribute + * is set to the edit mode lattice. */ + const Lattice *lt; + /* Preprocessed lattice points (converted to deform space). */ + float *latticedata; + /* Prefetched DeformWeights of the lattice. */ + float *lattice_weights; } LatticeDeformData; LatticeDeformData *BKE_lattice_deform_data_create(const Object *oblatt, const Object *ob) @@ -72,6 +82,7 @@ LatticeDeformData *BKE_lattice_deform_data_create(const Object *oblatt, const Ob float fu, fv, fw; int u, v, w; float *latticedata; + float *lattice_weights = NULL; float latmat[4][4]; LatticeDeformData *lattice_deform_data; @@ -80,8 +91,10 @@ LatticeDeformData *BKE_lattice_deform_data_create(const Object *oblatt, const Ob } bp = lt->def; - fp = latticedata = MEM_mallocN(sizeof(float[3]) * lt->pntsu * lt->pntsv * lt->pntsw, - "latticedata"); + const int32_t num_points = lt->pntsu * lt->pntsv * lt->pntsw; + /* We allocate one additional float for SSE2 optimizations. Without this + * the SSE2 instructions for the last item would read in unallocated memory. */ + fp = latticedata = MEM_mallocN(sizeof(float[3]) * num_points + sizeof(float), "latticedata"); /* for example with a particle system: (ob == NULL) */ if (ob == NULL) { @@ -100,6 +113,20 @@ LatticeDeformData *BKE_lattice_deform_data_create(const Object *oblatt, const Ob invert_m4_m4(imat, latmat); } + /* Prefetch latice deform group weights. */ + int defgrp_index = -1; + const MDeformVert *dvert = BKE_lattice_deform_verts_get(oblatt); + if (lt->vgroup[0] && dvert) { + defgrp_index = BKE_object_defgroup_name_index(ob, lt->vgroup); + + if (defgrp_index != -1) { + lattice_weights = MEM_malloc_arrayN(sizeof(float), num_points, "lattice_weights"); + for (int index = 0; index < num_points; index++) { + lattice_weights[index] = BKE_defvert_find_weight(dvert + index, defgrp_index); + } + } + } + for (w = 0, fw = lt->fw; w < lt->pntsw; w++, fw += lt->dw) { for (v = 0, fv = lt->fv; v < lt->pntsv; v++, fv += lt->dv) { for (u = 0, fu = lt->fu; u < lt->pntsu; u++, bp++, co += 3, fp += 3, fu += lt->du) { @@ -121,7 +148,8 @@ LatticeDeformData *BKE_lattice_deform_data_create(const Object *oblatt, const Ob lattice_deform_data = MEM_mallocN(sizeof(LatticeDeformData), "Lattice Deform Data"); lattice_deform_data->latticedata = latticedata; - lattice_deform_data->object = oblatt; + lattice_deform_data->lattice_weights = lattice_weights; + lattice_deform_data->lt = lt; copy_m4_m4(lattice_deform_data->latmat, latmat); return lattice_deform_data; @@ -131,30 +159,21 @@ void BKE_lattice_deform_data_eval_co(LatticeDeformData *lattice_deform_data, float co[3], float weight) { - const Object *ob = lattice_deform_data->object; - Lattice *lt = ob->data; + float *latticedata = lattice_deform_data->latticedata; + float *lattice_weights = lattice_deform_data->lattice_weights; + BLI_assert(latticedata); + const Lattice *lt = lattice_deform_data->lt; float u, v, w, tu[4], tv[4], tw[4]; float vec[3]; int idx_w, idx_v, idx_u; int ui, vi, wi, uu, vv, ww; /* vgroup influence */ - int defgrp_index = -1; float co_prev[3], weight_blend = 0.0f; - const MDeformVert *dvert = BKE_lattice_deform_verts_get(ob); - float *__restrict latticedata = lattice_deform_data->latticedata; - - if (lt->editlatt) { - lt = lt->editlatt->latt; - } - if (latticedata == NULL) { - return; - } - - if (lt->vgroup[0] && dvert) { - defgrp_index = BKE_object_defgroup_name_index(ob, lt->vgroup); - copy_v3_v3(co_prev, co); - } + copy_v3_v3(co_prev, co); +#ifdef __SSE2__ + __m128 co_vec = _mm_loadu_ps(co_prev); +#endif /* co is in local coords, treat with latmat */ mul_v3_m4v3(vec, lattice_deform_data->latmat, co); @@ -197,67 +216,47 @@ void BKE_lattice_deform_data_eval_co(LatticeDeformData *lattice_deform_data, wi = 0; } - for (ww = wi - 1; ww <= wi + 2; ww++) { - w = tw[ww - wi + 1]; + const int w_stride = lt->pntsu * lt->pntsv; + const int idx_w_max = (lt->pntsw - 1) * lt->pntsu * lt->pntsv; + const int v_stride = lt->pntsu; + const int idx_v_max = (lt->pntsv - 1) * lt->pntsu; + const int idx_u_max = (lt->pntsu - 1); - if (w != 0.0f) { - if (ww > 0) { - if (ww < lt->pntsw) { - idx_w = ww * lt->pntsu * lt->pntsv; - } - else { - idx_w = (lt->pntsw - 1) * lt->pntsu * lt->pntsv; + for (ww = wi - 1; ww <= wi + 2; ww++) { + w = weight * tw[ww - wi + 1]; + idx_w = CLAMPIS(ww * w_stride, 0, idx_w_max); + for (vv = vi - 1; vv <= vi + 2; vv++) { + v = w * tv[vv - vi + 1]; + idx_v = CLAMPIS(vv * v_stride, 0, idx_v_max); + for (uu = ui - 1; uu <= ui + 2; uu++) { + u = v * tu[uu - ui + 1]; + idx_u = CLAMPIS(uu, 0, idx_u_max); + const int idx = idx_w + idx_v + idx_u; +#ifdef __SSE2__ + { + __m128 weight_vec = _mm_set1_ps(u); + /* This will load one extra element, this is ok because + * we ignore that part of register anyway. + */ + __m128 lattice_vec = _mm_loadu_ps(&latticedata[idx * 3]); + co_vec = _mm_add_ps(co_vec, _mm_mul_ps(lattice_vec, weight_vec)); } - } - else { - idx_w = 0; - } - - for (vv = vi - 1; vv <= vi + 2; vv++) { - v = w * tv[vv - vi + 1]; - - if (v != 0.0f) { - if (vv > 0) { - if (vv < lt->pntsv) { - idx_v = idx_w + vv * lt->pntsu; - } - else { - idx_v = idx_w + (lt->pntsv - 1) * lt->pntsu; - } - } - else { - idx_v = idx_w; - } - - for (uu = ui - 1; uu <= ui + 2; uu++) { - u = weight * v * tu[uu - ui + 1]; - - if (u != 0.0f) { - if (uu > 0) { - if (uu < lt->pntsu) { - idx_u = idx_v + uu; - } - else { - idx_u = idx_v + (lt->pntsu - 1); - } - } - else { - idx_u = idx_v; - } - - madd_v3_v3fl(co, &latticedata[idx_u * 3], u); - - if (defgrp_index != -1) { - weight_blend += (u * BKE_defvert_find_weight(dvert + idx_u, defgrp_index)); - } - } - } +#else + madd_v3_v3fl(co, &latticedata[idx * 3], u); +#endif + if (lattice_weights) { + weight_blend += (u * lattice_weights[idx]); } } } } +#ifdef __SSE2__ + { + copy_v3_v3(co, (float *)&co_vec); + } +#endif - if (defgrp_index != -1) { + if (lattice_weights) { interp_v3_v3v3(co, co_prev, co, weight_blend); } } diff --git a/source/blender/blenkernel/intern/lattice_deform_test.cc b/source/blender/blenkernel/intern/lattice_deform_test.cc new file mode 100644 index 00000000000..33a4cc1d871 --- /dev/null +++ b/source/blender/blenkernel/intern/lattice_deform_test.cc @@ -0,0 +1,138 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * The Original Code is Copyright (C) 2020 by Blender Foundation. + */ +#include "testing/testing.h" + +#include "BKE_idtype.h" +#include "BKE_lattice.h" + +#include "MEM_guardedalloc.h" + +#include "DNA_lattice_types.h" +#include "DNA_mesh_types.h" +#include "DNA_object_types.h" + +#include "BLI_rand.hh" + +namespace blender::bke::tests { + +struct LatticeDeformTestContext { + Lattice lattice; + Object ob_lattice; + Mesh mesh; + Object ob_mesh; + float (*coords)[3]; + LatticeDeformData *ldd; +}; + +static void test_lattice_deform_init(LatticeDeformTestContext *ctx, + RandomNumberGenerator *rng, + int32_t num_items) +{ + /* Generate random input data between -5 and 5. */ + ctx->coords = (float(*)[3])MEM_malloc_arrayN(sizeof(float[3]), num_items, __func__); + for (uint32_t index = 0; index < num_items; index++) { + ctx->coords[index][0] = (rng->get_float() - 0.5f) * 10; + ctx->coords[index][1] = (rng->get_float() - 0.5f) * 10; + ctx->coords[index][2] = (rng->get_float() - 0.5f) * 10; + } + IDType_ID_LT.init_data(&ctx->lattice.id); + IDType_ID_OB.init_data(&ctx->ob_lattice.id); + ctx->ob_lattice.type = OB_LATTICE; + ctx->ob_lattice.data = &ctx->lattice; + IDType_ID_OB.init_data(&ctx->ob_mesh.id); + IDType_ID_ME.init_data(&ctx->mesh.id); + ctx->ob_mesh.type = OB_MESH; + ctx->ob_mesh.data = &ctx->mesh; + + ctx->ldd = BKE_lattice_deform_data_create(&ctx->ob_lattice, &ctx->ob_mesh); +} + +static void test_lattice_deform(LatticeDeformTestContext *ctx, int32_t num_items) +{ + for (int i = 0; i < num_items; i++) { + float *co = &ctx->coords[i][0]; + BKE_lattice_deform_data_eval_co(ctx->ldd, co, 1.0f); + } +} + +static void test_lattice_deform_free(LatticeDeformTestContext *ctx) +{ + BKE_lattice_deform_data_destroy(ctx->ldd); + MEM_freeN(ctx->coords); + IDType_ID_LT.free_data(&ctx->lattice.id); + IDType_ID_OB.free_data(&ctx->ob_lattice.id); + IDType_ID_OB.free_data(&ctx->ob_mesh.id); + IDType_ID_ME.free_data(&ctx->mesh.id); +} + +TEST(lattice_deform_performance, performance_no_dvert_1) +{ + const int32_t num_items = 1; + LatticeDeformTestContext ctx = {0}; + RandomNumberGenerator rng; + test_lattice_deform_init(&ctx, &rng, num_items); + test_lattice_deform(&ctx, num_items); + test_lattice_deform_free(&ctx); +} +TEST(lattice_deform_performance, performance_no_dvert_1000) +{ + const int32_t num_items = 1000; + LatticeDeformTestContext ctx = {0}; + RandomNumberGenerator rng; + test_lattice_deform_init(&ctx, &rng, num_items); + test_lattice_deform(&ctx, num_items); + test_lattice_deform_free(&ctx); +} +TEST(lattice_deform_performance, performance_no_dvert_10000) +{ + const int32_t num_items = 10000; + LatticeDeformTestContext ctx = {0}; + RandomNumberGenerator rng; + test_lattice_deform_init(&ctx, &rng, num_items); + test_lattice_deform(&ctx, num_items); + test_lattice_deform_free(&ctx); +} +TEST(lattice_deform_performance, performance_no_dvert_100000) +{ + const int32_t num_items = 100000; + LatticeDeformTestContext ctx = {0}; + RandomNumberGenerator rng; + test_lattice_deform_init(&ctx, &rng, num_items); + test_lattice_deform(&ctx, num_items); + test_lattice_deform_free(&ctx); +} +TEST(lattice_deform_performance, performance_no_dvert_1000000) +{ + const int32_t num_items = 1000000; + LatticeDeformTestContext ctx = {0}; + RandomNumberGenerator rng; + test_lattice_deform_init(&ctx, &rng, num_items); + test_lattice_deform(&ctx, num_items); + test_lattice_deform_free(&ctx); +} +TEST(lattice_deform_performance, performance_no_dvert_10000000) +{ + const int32_t num_items = 10000000; + LatticeDeformTestContext ctx = {0}; + RandomNumberGenerator rng; + test_lattice_deform_init(&ctx, &rng, num_items); + test_lattice_deform(&ctx, num_items); + test_lattice_deform_free(&ctx); +} + +} // namespace blender::bke::tests
\ No newline at end of file |