Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBastien Montagne <montagne29@wanadoo.fr>2017-11-26 15:06:39 +0300
committerBastien Montagne <montagne29@wanadoo.fr>2017-11-26 18:03:29 +0300
commit2b6f3455584b3949045b517ce4759bcda70daf8a (patch)
treeec153c6762d20ded47666cd7bf45164af01a85f3 /source/blender/bmesh/intern/bmesh_interp.c
parent099bda8875e7b814e5c677e423b3c6640551267c (diff)
Removing OMP: bmesh_interp.c
Performances tests on this one are quite surprising actually... Parallelized loop itself is at least 10 times quicker with new BLI_task code than it was with OMP. And subdividing e.g. a heavy mesh with 3 levels of multires (whole process) takes 8 seconds with new code, while 10 seconds with OMP one. And cherry on top, BLI_task code only uses about 50% of CPU load, while OMP one was at nearly 100%! In fact, I suspect OMP code was not properly declaring outside vars, generating a lot of uneeded locks. Also, raised the minimum level of subdiv to enable parallelization, tests here showed that we only start to get significant gain with subdiv levels of 4, below single threaded one is quicker.
Diffstat (limited to 'source/blender/bmesh/intern/bmesh_interp.c')
-rw-r--r--source/blender/bmesh/intern/bmesh_interp.c115
1 files changed, 79 insertions, 36 deletions
diff --git a/source/blender/bmesh/intern/bmesh_interp.c b/source/blender/bmesh/intern/bmesh_interp.c
index 00f8eb6df40..96b2eb17c4c 100644
--- a/source/blender/bmesh/intern/bmesh_interp.c
+++ b/source/blender/bmesh/intern/bmesh_interp.c
@@ -36,12 +36,13 @@
#include "DNA_meshdata_types.h"
#include "BLI_alloca.h"
+#include "BLI_linklist.h"
#include "BLI_math.h"
+#include "BLI_memarena.h"
+#include "BLI_task.h"
#include "BKE_customdata.h"
#include "BKE_multires.h"
-#include "BLI_memarena.h"
-#include "BLI_linklist.h"
#include "bmesh.h"
#include "intern/bmesh_private.h"
@@ -401,13 +402,78 @@ static void bm_loop_flip_disp(
disp[1] = (mat[0][0] * b[1] - b[0] * mat[1][0]) / d;
}
+
+typedef struct BMLoopInterpMultiresData {
+ BMLoop *l_dst;
+ BMLoop *l_src_first;
+ int cd_loop_mdisp_offset;
+
+ MDisps *md_dst;
+ const float *f_src_center;
+
+ float *axis_x, *axis_y;
+ float *v1, *v4;
+ float *e1, *e2;
+
+ int res;
+ float d;
+} BMLoopInterpMultiresData;
+
+static void loop_interp_multires_cb(void *userdata, int ix)
+{
+ BMLoopInterpMultiresData *data = userdata;
+
+ BMLoop *l_first = data->l_src_first;
+ BMLoop *l_dst = data->l_dst;
+ const int cd_loop_mdisp_offset = data->cd_loop_mdisp_offset;
+
+ MDisps *md_dst = data->md_dst;
+ const float *f_src_center = data->f_src_center;
+
+ float *axis_x = data->axis_x;
+ float *axis_y = data->axis_y;
+
+ float *v1 = data->v1;
+ float *v4 = data->v4;
+ float *e1 = data->e1;
+ float *e2 = data->e2;
+
+ const int res = data->res;
+ const float d = data->d;
+
+ float x = d * ix, y;
+ int iy;
+ for (y = 0.0f, iy = 0; iy < res; y += d, iy++) {
+ BMLoop *l_iter = l_first;
+ float co1[3], co2[3], co[3];
+
+ madd_v3_v3v3fl(co1, v1, e1, y);
+ madd_v3_v3v3fl(co2, v4, e2, y);
+ interp_v3_v3v3(co, co1, co2, x);
+
+ do {
+ MDisps *md_src;
+ float src_axis_x[3], src_axis_y[3];
+ float uv[2];
+
+ md_src = BM_ELEM_CD_GET_VOID_P(l_iter, cd_loop_mdisp_offset);
+
+ if (mdisp_in_mdispquad(l_dst, l_iter, f_src_center, co, res, src_axis_x, src_axis_y, uv)) {
+ old_mdisps_bilinear(md_dst->disps[iy * res + ix], md_src->disps, res, uv[0], uv[1]);
+ bm_loop_flip_disp(src_axis_x, src_axis_y, axis_x, axis_y, md_dst->disps[iy * res + ix]);
+
+ break;
+ }
+ } while ((l_iter = l_iter->next) != l_first);
+ }
+}
+
void BM_loop_interp_multires_ex(
BMesh *UNUSED(bm), BMLoop *l_dst, const BMFace *f_src,
const float f_dst_center[3], const float f_src_center[3], const int cd_loop_mdisp_offset)
{
MDisps *md_dst;
- float d, v1[3], v2[3], v3[3], v4[3] = {0.0f, 0.0f, 0.0f}, e1[3], e2[3];
- int ix, res;
+ float v1[3], v2[3], v3[3], v4[3] = {0.0f, 0.0f, 0.0f}, e1[3], e2[3];
float axis_x[3], axis_y[3];
/* ignore 2-edged faces */
@@ -433,38 +499,15 @@ void BM_loop_interp_multires_ex(
mdisp_axis_from_quad(v1, v2, v3, v4, axis_x, axis_y);
- res = (int)sqrt(md_dst->totdisp);
- d = 1.0f / (float)(res - 1);
-#pragma omp parallel for if (res > 3)
- for (ix = 0; ix < res; ix++) {
- float x = d * ix, y;
- int iy;
- for (y = 0.0f, iy = 0; iy < res; y += d, iy++) {
- BMLoop *l_iter;
- BMLoop *l_first;
- float co1[3], co2[3], co[3];
-
- madd_v3_v3v3fl(co1, v1, e1, y);
- madd_v3_v3v3fl(co2, v4, e2, y);
- interp_v3_v3v3(co, co1, co2, x);
-
- l_iter = l_first = BM_FACE_FIRST_LOOP(f_src);
- do {
- MDisps *md_src;
- float src_axis_x[3], src_axis_y[3];
- float uv[2];
-
- md_src = BM_ELEM_CD_GET_VOID_P(l_iter, cd_loop_mdisp_offset);
-
- if (mdisp_in_mdispquad(l_dst, l_iter, f_src_center, co, res, src_axis_x, src_axis_y, uv)) {
- old_mdisps_bilinear(md_dst->disps[iy * res + ix], md_src->disps, res, uv[0], uv[1]);
- bm_loop_flip_disp(src_axis_x, src_axis_y, axis_x, axis_y, md_dst->disps[iy * res + ix]);
-
- break;
- }
- } while ((l_iter = l_iter->next) != l_first);
- }
- }
+ const int res = (int)sqrt(md_dst->totdisp);
+ BMLoopInterpMultiresData data = {
+ .l_dst = l_dst, .l_src_first = BM_FACE_FIRST_LOOP(f_src),
+ .cd_loop_mdisp_offset = cd_loop_mdisp_offset,
+ .md_dst = md_dst, .f_src_center = f_src_center,
+ .axis_x = axis_x, .axis_y = axis_y, .v1 = v1, .v4 = v4, .e1 = e1, .e2 = e2,
+ .res = res, .d = 1.0f / (float)(res - 1)
+ };
+ BLI_task_parallel_range(0, res, &data, loop_interp_multires_cb, res > 5);
}
/**