Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBastien Montagne <montagne29@wanadoo.fr>2015-12-06 19:37:10 +0300
committerBastien Montagne <montagne29@wanadoo.fr>2015-12-06 19:37:10 +0300
commitb36a0c44bb1ca12060cf724678ffe62bfa9d523f (patch)
tree3bea460a8f136d8a29cedcc6bb61e21e7d3591e7 /source/blender/blenkernel/intern/ocean.c
parentdeec6ed901c81b35c2d112b3b47124e58f6c3611 (diff)
Switch from OMP to BLI_task in BKE's part of Ocean simulation.
Not much to say, gives about 8% to 9% speedup in ocean simulation.
Diffstat (limited to 'source/blender/blenkernel/intern/ocean.c')
-rw-r--r--source/blender/blenkernel/intern/ocean.c470
1 files changed, 268 insertions, 202 deletions
diff --git a/source/blender/blenkernel/intern/ocean.c b/source/blender/blenkernel/intern/ocean.c
index 1a178fb2bdf..b1720d4b27a 100644
--- a/source/blender/blenkernel/intern/ocean.c
+++ b/source/blender/blenkernel/intern/ocean.c
@@ -41,6 +41,7 @@
#include "BLI_math.h"
#include "BLI_path_util.h"
#include "BLI_rand.h"
+#include "BLI_task.h"
#include "BLI_threads.h"
#include "BLI_utildefines.h"
@@ -494,231 +495,296 @@ void BKE_ocean_eval_ij(struct Ocean *oc, struct OceanResult *ocr, int i, int j)
BLI_rw_mutex_unlock(&oc->oceanmutex);
}
-void BKE_ocean_simulate(struct Ocean *o, float t, float scale, float chop_amount)
+typedef struct OceanSimulateData {
+ Ocean *o;
+ float t;
+ float scale;
+ float chop_amount;
+} OceanSimulateData;
+
+static void ocean_compute_htilda_cb(void *userdata, void *UNUSED(userdata_chunk), int i)
+{
+ OceanSimulateData *osd = userdata;
+ const Ocean *o = osd->o;
+ const float scale = osd->scale;
+ const float t = osd->t;
+
+ int j;
+
+ /* note the <= _N/2 here, see the fftw doco about the mechanics of the complex->real fft storage */
+ for (j = 0; j <= o->_N / 2; ++j) {
+ fftw_complex exp_param1;
+ fftw_complex exp_param2;
+ fftw_complex conj_param;
+
+ init_complex(exp_param1, 0.0, omega(o->_k[i * (1 + o->_N / 2) + j], o->_depth) * t);
+ init_complex(exp_param2, 0.0, -omega(o->_k[i * (1 + o->_N / 2) + j], o->_depth) * t);
+ exp_complex(exp_param1, exp_param1);
+ exp_complex(exp_param2, exp_param2);
+ conj_complex(conj_param, o->_h0_minus[i * o->_N + j]);
+
+ mul_complex_c(exp_param1, o->_h0[i * o->_N + j], exp_param1);
+ mul_complex_c(exp_param2, conj_param, exp_param2);
+
+ add_comlex_c(o->_htilda[i * (1 + o->_N / 2) + j], exp_param1, exp_param2);
+ mul_complex_f(o->_fft_in[i * (1 + o->_N / 2) + j], o->_htilda[i * (1 + o->_N / 2) + j], scale);
+ }
+}
+
+static void ocean_compute_displacement_y(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+ OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+ const Ocean *o = osd->o;
+
+ fftw_execute(o->_disp_y_plan);
+}
+
+static void ocean_compute_displacement_x(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
{
+ OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+ const Ocean *o = osd->o;
+ const float scale = osd->scale;
+ const float chop_amount = osd->chop_amount;
int i, j;
- scale *= o->normalize_factor;
+ for (i = 0; i < o->_M; ++i) {
+ for (j = 0; j <= o->_N / 2; ++j) {
+ fftw_complex mul_param;
+ fftw_complex minus_i;
+
+ init_complex(minus_i, 0.0, -1.0);
+ init_complex(mul_param, -scale, 0);
+ mul_complex_f(mul_param, mul_param, chop_amount);
+ mul_complex_c(mul_param, mul_param, minus_i);
+ mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
+ mul_complex_f(mul_param, mul_param,
+ ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
+ 0.0f :
+ o->_kx[i] / o->_k[i * (1 + o->_N / 2) + j]));
+ init_complex(o->_fft_in_x[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
+ }
+ }
+ fftw_execute(o->_disp_x_plan);
+}
- BLI_rw_mutex_lock(&o->oceanmutex, THREAD_LOCK_WRITE);
+static void ocean_compute_displacement_z(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+ OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+ const Ocean *o = osd->o;
+ const float scale = osd->scale;
+ const float chop_amount = osd->chop_amount;
+ int i, j;
- /* compute a new htilda */
-#pragma omp parallel for private(i, j)
for (i = 0; i < o->_M; ++i) {
- /* note the <= _N/2 here, see the fftw doco about the mechanics of the complex->real fft storage */
for (j = 0; j <= o->_N / 2; ++j) {
- fftw_complex exp_param1;
- fftw_complex exp_param2;
- fftw_complex conj_param;
+ fftw_complex mul_param;
+ fftw_complex minus_i;
+
+ init_complex(minus_i, 0.0, -1.0);
+ init_complex(mul_param, -scale, 0);
+ mul_complex_f(mul_param, mul_param, chop_amount);
+ mul_complex_c(mul_param, mul_param, minus_i);
+ mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
+ mul_complex_f(mul_param, mul_param,
+ ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
+ 0.0f :
+ o->_kz[j] / o->_k[i * (1 + o->_N / 2) + j]));
+ init_complex(o->_fft_in_z[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
+ }
+ }
+ fftw_execute(o->_disp_z_plan);
+}
+
+static void ocean_compute_jacobian_jxx(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+ OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+ const Ocean *o = osd->o;
+ const float chop_amount = osd->chop_amount;
+ int i, j;
+ for (i = 0; i < o->_M; ++i) {
+ for (j = 0; j <= o->_N / 2; ++j) {
+ fftw_complex mul_param;
+
+ /* init_complex(mul_param, -scale, 0); */
+ init_complex(mul_param, -1, 0);
+
+ mul_complex_f(mul_param, mul_param, chop_amount);
+ mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
+ mul_complex_f(mul_param, mul_param,
+ ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
+ 0.0f :
+ o->_kx[i] * o->_kx[i] / o->_k[i * (1 + o->_N / 2) + j]));
+ init_complex(o->_fft_in_jxx[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
+ }
+ }
+ fftw_execute(o->_Jxx_plan);
- init_complex(exp_param1, 0.0, omega(o->_k[i * (1 + o->_N / 2) + j], o->_depth) * t);
- init_complex(exp_param2, 0.0, -omega(o->_k[i * (1 + o->_N / 2) + j], o->_depth) * t);
- exp_complex(exp_param1, exp_param1);
- exp_complex(exp_param2, exp_param2);
- conj_complex(conj_param, o->_h0_minus[i * o->_N + j]);
+ for (i = 0; i < o->_M; ++i) {
+ for (j = 0; j < o->_N; ++j) {
+ o->_Jxx[i * o->_N + j] += 1.0;
+ }
+ }
+}
- mul_complex_c(exp_param1, o->_h0[i * o->_N + j], exp_param1);
- mul_complex_c(exp_param2, conj_param, exp_param2);
+static void ocean_compute_jacobian_jzz(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+ OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+ const Ocean *o = osd->o;
+ const float chop_amount = osd->chop_amount;
+ int i, j;
- add_comlex_c(o->_htilda[i * (1 + o->_N / 2) + j], exp_param1, exp_param2);
- mul_complex_f(o->_fft_in[i * (1 + o->_N / 2) + j], o->_htilda[i * (1 + o->_N / 2) + j], scale);
+ for (i = 0; i < o->_M; ++i) {
+ for (j = 0; j <= o->_N / 2; ++j) {
+ fftw_complex mul_param;
+
+ /* init_complex(mul_param, -scale, 0); */
+ init_complex(mul_param, -1, 0);
+
+ mul_complex_f(mul_param, mul_param, chop_amount);
+ mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
+ mul_complex_f(mul_param, mul_param,
+ ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
+ 0.0f :
+ o->_kz[j] * o->_kz[j] / o->_k[i * (1 + o->_N / 2) + j]));
+ init_complex(o->_fft_in_jzz[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
}
}
+ fftw_execute(o->_Jzz_plan);
-#pragma omp parallel sections private(i, j)
- {
+ for (i = 0; i < o->_M; ++i) {
+ for (j = 0; j < o->_N; ++j) {
+ o->_Jzz[i * o->_N + j] += 1.0;
+ }
+ }
+}
-#pragma omp section
- {
- if (o->_do_disp_y) {
- /* y displacement */
- fftw_execute(o->_disp_y_plan);
- }
- } /* section 1 */
-
-#pragma omp section
- {
- if (o->_do_chop) {
- /* x displacement */
- for (i = 0; i < o->_M; ++i) {
- for (j = 0; j <= o->_N / 2; ++j) {
- fftw_complex mul_param;
- fftw_complex minus_i;
-
- init_complex(minus_i, 0.0, -1.0);
- init_complex(mul_param, -scale, 0);
- mul_complex_f(mul_param, mul_param, chop_amount);
- mul_complex_c(mul_param, mul_param, minus_i);
- mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
- mul_complex_f(mul_param, mul_param,
- ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
- 0.0f :
- o->_kx[i] / o->_k[i * (1 + o->_N / 2) + j]));
- init_complex(o->_fft_in_x[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
- }
- }
- fftw_execute(o->_disp_x_plan);
- }
- } /* section 2 */
-
-#pragma omp section
- {
- if (o->_do_chop) {
- /* z displacement */
- for (i = 0; i < o->_M; ++i) {
- for (j = 0; j <= o->_N / 2; ++j) {
- fftw_complex mul_param;
- fftw_complex minus_i;
-
- init_complex(minus_i, 0.0, -1.0);
- init_complex(mul_param, -scale, 0);
- mul_complex_f(mul_param, mul_param, chop_amount);
- mul_complex_c(mul_param, mul_param, minus_i);
- mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
- mul_complex_f(mul_param, mul_param,
- ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
- 0.0f :
- o->_kz[j] / o->_k[i * (1 + o->_N / 2) + j]));
- init_complex(o->_fft_in_z[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
- }
- }
- fftw_execute(o->_disp_z_plan);
- }
- } /* section 3 */
-
-#pragma omp section
- {
- if (o->_do_jacobian) {
- /* Jxx */
- for (i = 0; i < o->_M; ++i) {
- for (j = 0; j <= o->_N / 2; ++j) {
- fftw_complex mul_param;
-
- /* init_complex(mul_param, -scale, 0); */
- init_complex(mul_param, -1, 0);
-
- mul_complex_f(mul_param, mul_param, chop_amount);
- mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
- mul_complex_f(mul_param, mul_param,
- ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
- 0.0f :
- o->_kx[i] * o->_kx[i] / o->_k[i * (1 + o->_N / 2) + j]));
- init_complex(o->_fft_in_jxx[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
- }
- }
- fftw_execute(o->_Jxx_plan);
+static void ocean_compute_jacobian_jxz(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+ OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+ const Ocean *o = osd->o;
+ const float chop_amount = osd->chop_amount;
+ int i, j;
- for (i = 0; i < o->_M; ++i) {
- for (j = 0; j < o->_N; ++j) {
- o->_Jxx[i * o->_N + j] += 1.0;
- }
- }
- }
- } /* section 4 */
-
-#pragma omp section
- {
- if (o->_do_jacobian) {
- /* Jzz */
- for (i = 0; i < o->_M; ++i) {
- for (j = 0; j <= o->_N / 2; ++j) {
- fftw_complex mul_param;
-
- /* init_complex(mul_param, -scale, 0); */
- init_complex(mul_param, -1, 0);
-
- mul_complex_f(mul_param, mul_param, chop_amount);
- mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
- mul_complex_f(mul_param, mul_param,
- ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
- 0.0f :
- o->_kz[j] * o->_kz[j] / o->_k[i * (1 + o->_N / 2) + j]));
- init_complex(o->_fft_in_jzz[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
- }
- }
- fftw_execute(o->_Jzz_plan);
- for (i = 0; i < o->_M; ++i) {
- for (j = 0; j < o->_N; ++j) {
- o->_Jzz[i * o->_N + j] += 1.0;
- }
- }
- }
- } /* section 5 */
-
-#pragma omp section
- {
- if (o->_do_jacobian) {
- /* Jxz */
- for (i = 0; i < o->_M; ++i) {
- for (j = 0; j <= o->_N / 2; ++j) {
- fftw_complex mul_param;
-
- /* init_complex(mul_param, -scale, 0); */
- init_complex(mul_param, -1, 0);
-
- mul_complex_f(mul_param, mul_param, chop_amount);
- mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
- mul_complex_f(mul_param, mul_param,
- ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
- 0.0f :
- o->_kx[i] * o->_kz[j] / o->_k[i * (1 + o->_N / 2) + j]));
- init_complex(o->_fft_in_jxz[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
- }
- }
- fftw_execute(o->_Jxz_plan);
- }
- } /* section 6 */
-
-#pragma omp section
- {
- /* fft normals */
- if (o->_do_normals) {
- for (i = 0; i < o->_M; ++i) {
- for (j = 0; j <= o->_N / 2; ++j) {
- fftw_complex mul_param;
-
- init_complex(mul_param, 0.0, -1.0);
- mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
- mul_complex_f(mul_param, mul_param, o->_kx[i]);
- init_complex(o->_fft_in_nx[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
- }
- }
- fftw_execute(o->_N_x_plan);
+ for (i = 0; i < o->_M; ++i) {
+ for (j = 0; j <= o->_N / 2; ++j) {
+ fftw_complex mul_param;
+
+ /* init_complex(mul_param, -scale, 0); */
+ init_complex(mul_param, -1, 0);
+
+ mul_complex_f(mul_param, mul_param, chop_amount);
+ mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
+ mul_complex_f(mul_param, mul_param,
+ ((o->_k[i * (1 + o->_N / 2) + j] == 0.0f) ?
+ 0.0f :
+ o->_kx[i] * o->_kz[j] / o->_k[i * (1 + o->_N / 2) + j]));
+ init_complex(o->_fft_in_jxz[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
+ }
+ }
+ fftw_execute(o->_Jxz_plan);
+}
- }
- } /* section 7 */
-
-#pragma omp section
- {
- if (o->_do_normals) {
- for (i = 0; i < o->_M; ++i) {
- for (j = 0; j <= o->_N / 2; ++j) {
- fftw_complex mul_param;
-
- init_complex(mul_param, 0.0, -1.0);
- mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
- mul_complex_f(mul_param, mul_param, o->_kz[i]);
- init_complex(o->_fft_in_nz[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
- }
- }
- fftw_execute(o->_N_z_plan);
+static void ocean_compute_normal_x(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+ OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+ const Ocean *o = osd->o;
+ int i, j;
+
+ for (i = 0; i < o->_M; ++i) {
+ for (j = 0; j <= o->_N / 2; ++j) {
+ fftw_complex mul_param;
+
+ init_complex(mul_param, 0.0, -1.0);
+ mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
+ mul_complex_f(mul_param, mul_param, o->_kx[i]);
+ init_complex(o->_fft_in_nx[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
+ }
+ }
+ fftw_execute(o->_N_x_plan);
+}
+
+static void ocean_compute_normal_z(TaskPool *pool, void *UNUSED(taskdata), int UNUSED(threadid))
+{
+ OceanSimulateData *osd = BLI_task_pool_userdata(pool);
+ const Ocean *o = osd->o;
+ int i, j;
+
+ for (i = 0; i < o->_M; ++i) {
+ for (j = 0; j <= o->_N / 2; ++j) {
+ fftw_complex mul_param;
+
+ init_complex(mul_param, 0.0, -1.0);
+ mul_complex_c(mul_param, mul_param, o->_htilda[i * (1 + o->_N / 2) + j]);
+ mul_complex_f(mul_param, mul_param, o->_kz[i]);
+ init_complex(o->_fft_in_nz[i * (1 + o->_N / 2) + j], real_c(mul_param), image_c(mul_param));
+ }
+ }
+ fftw_execute(o->_N_z_plan);
+}
+
+void BKE_ocean_simulate(struct Ocean *o, float t, float scale, float chop_amount)
+{
+ TaskScheduler *scheduler = BLI_task_scheduler_get();
+ TaskPool *pool;
+
+ OceanSimulateData osd;
+
+ scale *= o->normalize_factor;
+
+ osd.o = o;
+ osd.t = t;
+ osd.scale = scale;
+ osd.chop_amount = chop_amount;
+
+ pool = BLI_task_pool_create(scheduler, &osd);
+
+ BLI_rw_mutex_lock(&o->oceanmutex, THREAD_LOCK_WRITE);
+
+ /* Note about multi-threading here: we have to run a first set of computations (htilda one) before we can run
+ * all others, since they all depend on it.
+ * So we make a first parallelized forloop run for htilda, and then pack all other computations into
+ * a set of parallel tasks.
+ * This is not optimal in all cases, but remains reasonably simple and should be OK most of the time. */
+
+ /* compute a new htilda */
+ BLI_task_parallel_range(0, o->_M, &osd, ocean_compute_htilda_cb);
+
+ if (o->_do_disp_y) {
+ BLI_task_pool_push(pool, ocean_compute_displacement_y, NULL, false, TASK_PRIORITY_HIGH);
+ }
+
+ if (o->_do_chop) {
+ BLI_task_pool_push(pool, ocean_compute_displacement_x, NULL, false, TASK_PRIORITY_HIGH);
+ BLI_task_pool_push(pool, ocean_compute_displacement_z, NULL, false, TASK_PRIORITY_HIGH);
+ }
+
+ if (o->_do_jacobian) {
+ BLI_task_pool_push(pool, ocean_compute_jacobian_jxx, NULL, false, TASK_PRIORITY_HIGH);
+ BLI_task_pool_push(pool, ocean_compute_jacobian_jzz, NULL, false, TASK_PRIORITY_HIGH);
+ BLI_task_pool_push(pool, ocean_compute_jacobian_jxz, NULL, false, TASK_PRIORITY_HIGH);
+ }
+
+ if (o->_do_normals) {
+ BLI_task_pool_push(pool, ocean_compute_normal_x, NULL, false, TASK_PRIORITY_HIGH);
+ BLI_task_pool_push(pool, ocean_compute_normal_z, NULL, false, TASK_PRIORITY_HIGH);
#if 0
- for (i = 0; i < o->_M; ++i) {
- for (j = 0; j < o->_N; ++j) {
- o->_N_y[i * o->_N + j] = 1.0f / scale;
- }
- }
- (MEM01)
-#endif
- o->_N_y = 1.0f / scale;
+ for (i = 0; i < o->_M; ++i) {
+ for (j = 0; j < o->_N; ++j) {
+ o->_N_y[i * o->_N + j] = 1.0f / scale;
}
- } /* section 8 */
+ }
+ (MEM01)
+#endif
+ o->_N_y = 1.0f / scale;
+ }
- } /* omp sections */
+ BLI_task_pool_work_and_wait(pool);
BLI_rw_mutex_unlock(&o->oceanmutex);
+
+ BLI_task_pool_free(pool);
}
static void set_height_normalize_factor(struct Ocean *oc)