diff options
author | Jeroen Bakker <jbakker> | 2021-06-11 16:55:09 +0300 |
---|---|---|
committer | Jeroen Bakker <jeroen@blender.org> | 2021-06-11 16:55:22 +0300 |
commit | 7b30a3e98def6f9f158da0e315b7077655acfd20 (patch) | |
tree | 0299bf5e27e36e9f6c9b26ce8d2beb4452a95463 /source/blender/imbuf/intern/imageprocess.c | |
parent | 7b76a160a4647b17c1a55da4bd40e8f549225568 (diff) |
Performance: Use parallel range for ImBuf scanline processor.
Scanline processor did its own heurestic what didn't scale well when
having a multiple cores. In stead of using our own code this patch will
leave it to TBB to determine how to split the scanlines over the
available threads.
Performance of the IMB_transform before this change was 0.002123s, with
this change 0.001601s. This change increases performance in other areas
as well including color management conversions.
Reviewed By: zeddb
Differential Revision: https://developer.blender.org/D11578
Diffstat (limited to 'source/blender/imbuf/intern/imageprocess.c')
-rw-r--r-- | source/blender/imbuf/intern/imageprocess.c | 89 |
1 files changed, 33 insertions, 56 deletions
diff --git a/source/blender/imbuf/intern/imageprocess.c b/source/blender/imbuf/intern/imageprocess.c index 4320f30884b..a9b6e2bbb88 100644 --- a/source/blender/imbuf/intern/imageprocess.c +++ b/source/blender/imbuf/intern/imageprocess.c @@ -412,42 +412,34 @@ static void imb_transform_calc_add_y(const float transform_matrix[3][3], typedef void (*InterpolationColorFunction)( struct ImBuf *in, unsigned char outI[4], float outF[4], float u, float v); BLI_INLINE void imb_transform_scanlines(const TransformUserData *user_data, - int start_scanline, - int num_scanlines, + int scanline, InterpolationColorFunction interpolation) { const int width = user_data->dst->x; - float next_line_start_uv[2]; - madd_v2_v2v2fl(next_line_start_uv, user_data->start_uv, user_data->add_y, start_scanline); + float uv[2]; + madd_v2_v2v2fl(uv, user_data->start_uv, user_data->add_y, scanline); unsigned char *outI = NULL; float *outF = NULL; - pixel_from_buffer(user_data->dst, &outI, &outF, 0, start_scanline); - - for (int yi = start_scanline; yi < start_scanline + num_scanlines; yi++) { - float uv[2]; - copy_v2_v2(uv, next_line_start_uv); - add_v2_v2(next_line_start_uv, user_data->add_y); - for (int xi = 0; xi < width; xi++) { - if (uv[0] >= user_data->src_crop.xmin && uv[0] < user_data->src_crop.xmax && - uv[1] >= user_data->src_crop.ymin && uv[1] < user_data->src_crop.ymax) { - interpolation(user_data->src, outI, outF, uv[0], uv[1]); - } - add_v2_v2(uv, user_data->add_x); - if (outI) { - outI += 4; - } - if (outF) { - outF += 4; - } + pixel_from_buffer(user_data->dst, &outI, &outF, 0, scanline); + + for (int xi = 0; xi < width; xi++) { + if (uv[0] >= user_data->src_crop.xmin && uv[0] < user_data->src_crop.xmax && + uv[1] >= user_data->src_crop.ymin && uv[1] < user_data->src_crop.ymax) { + interpolation(user_data->src, outI, outF, uv[0], uv[1]); + } + add_v2_v2(uv, user_data->add_x); + if (outI) { + outI += 4; + } + if (outF) { + outF += 4; } } } -static void imb_transform_nearest_scanlines(void *custom_data, - int start_scanline, - int num_scanlines) +static void imb_transform_nearest_scanlines(void *custom_data, int scanline) { const TransformUserData *user_data = custom_data; InterpolationColorFunction interpolation = NULL; @@ -457,12 +449,10 @@ static void imb_transform_nearest_scanlines(void *custom_data, else { interpolation = nearest_interpolation_color_char; } - imb_transform_scanlines(user_data, start_scanline, num_scanlines, interpolation); + imb_transform_scanlines(user_data, scanline, interpolation); } -static void imb_transform_bilinear_scanlines(void *custom_data, - int start_scanline, - int num_scanlines) +static void imb_transform_bilinear_scanlines(void *custom_data, int scanline) { const TransformUserData *user_data = custom_data; InterpolationColorFunction interpolation = NULL; @@ -472,7 +462,7 @@ static void imb_transform_bilinear_scanlines(void *custom_data, else if (user_data->dst->rect) { interpolation = bilinear_interpolation_color_char; } - imb_transform_scanlines(user_data, start_scanline, num_scanlines, interpolation); + imb_transform_scanlines(user_data, scanline, interpolation); } static ScanlineThreadFunc imb_transform_scanline_func(const eIMBInterpolationFilterMode filter) @@ -568,41 +558,28 @@ void IMB_processor_apply_threaded( typedef struct ScanlineGlobalData { void *custom_data; ScanlineThreadFunc do_thread; - int scanlines_per_task; - int total_scanlines; } ScanlineGlobalData; -static void processor_apply_scanline_func(TaskPool *__restrict pool, void *taskdata) +static void processor_apply_parallel(void *__restrict userdata, + const int scanline, + const TaskParallelTLS *__restrict UNUSED(tls)) { - ScanlineGlobalData *data = BLI_task_pool_user_data(pool); - int start_scanline = POINTER_AS_INT(taskdata); - int num_scanlines = min_ii(data->scanlines_per_task, data->total_scanlines - start_scanline); - data->do_thread(data->custom_data, start_scanline, num_scanlines); + ScanlineGlobalData *data = userdata; + data->do_thread(data->custom_data, scanline); } void IMB_processor_apply_threaded_scanlines(int total_scanlines, ScanlineThreadFunc do_thread, void *custom_data) { - const int scanlines_per_task = 64; - ScanlineGlobalData data; - data.custom_data = custom_data; - data.do_thread = do_thread; - data.scanlines_per_task = scanlines_per_task; - data.total_scanlines = total_scanlines; - const int total_tasks = (total_scanlines + scanlines_per_task - 1) / scanlines_per_task; - TaskPool *task_pool = BLI_task_pool_create(&data, TASK_PRIORITY_LOW, TASK_ISOLATION_ON); - for (int i = 0, start_line = 0; i < total_tasks; i++) { - BLI_task_pool_push( - task_pool, processor_apply_scanline_func, POINTER_FROM_INT(start_line), false, NULL); - start_line += scanlines_per_task; - } - - /* work and wait until tasks are done */ - BLI_task_pool_work_and_wait(task_pool); - - /* Free memory. */ - BLI_task_pool_free(task_pool); + TaskParallelSettings settings; + ScanlineGlobalData data = { + .do_thread = do_thread, + .custom_data = custom_data, + }; + + BLI_parallel_range_settings_defaults(&settings); + BLI_task_parallel_range(0, total_scanlines, &data, processor_apply_parallel, &settings); } /** \} */ |