From 1eca4371979b45f6443446f8c7483aca8d9dc3b0 Mon Sep 17 00:00:00 2001 From: Lukas Stockner Date: Tue, 8 Nov 2022 20:39:40 +0100 Subject: Color Management: Parallelize ImBuf conversion to float Motivated by long loading times in T101969, reduces render preparation time from 14sec to 6sec. Another possible improvement would be to use C++ and template based on OCIO vs. sRGB, but moving the file to C++ seems nontrivial (and opens up the question whether ocio_capi makes any sense then or we should just use OCIO directly) so I left it at a direct 1:1 parallelization of the existing code for now. Reviewed By: brecht Differential Revision: https://developer.blender.org/D16317 --- source/blender/imbuf/intern/colormanagement.c | 75 +++++++++++++++++++-------- 1 file changed, 52 insertions(+), 23 deletions(-) (limited to 'source/blender/imbuf/intern/colormanagement.c') diff --git a/source/blender/imbuf/intern/colormanagement.c b/source/blender/imbuf/intern/colormanagement.c index 5e132826a4c..0678c224e6b 100644 --- a/source/blender/imbuf/intern/colormanagement.c +++ b/source/blender/imbuf/intern/colormanagement.c @@ -31,6 +31,7 @@ #include "BLI_math_color.h" #include "BLI_rect.h" #include "BLI_string.h" +#include "BLI_task.h" #include "BLI_threads.h" #include "BKE_appdir.h" @@ -2249,6 +2250,43 @@ void IMB_colormanagement_imbuf_to_byte_texture(uchar *out_buffer, } } +typedef struct ImbufByteToFloatData { + OCIO_ConstCPUProcessorRcPtr *processor; + int width; + int offset, stride; + const uchar *in_buffer; + float *out_buffer; + bool use_premultiply; +} ImbufByteToFloatData; + +static void imbuf_byte_to_float_cb(void *__restrict userdata, + const int y, + const TaskParallelTLS *__restrict UNUSED(tls)) +{ + ImbufByteToFloatData *data = userdata; + + const size_t in_offset = data->offset + y * data->stride; + const size_t out_offset = y * data->width; + const uchar *in = data->in_buffer + in_offset * 4; + float *out = data->out_buffer + out_offset * 4; + + /* Convert to scene linear, to sRGB and premultiply. */ + for (int x = 0; x < data->width; x++, in += 4, out += 4) { + float pixel[4]; + rgba_uchar_to_float(pixel, in); + if (data->processor) { + OCIO_cpuProcessorApplyRGB(data->processor, pixel); + } + else { + srgb_to_linearrgb_v3_v3(pixel, pixel); + } + if (data->use_premultiply) { + mul_v3_fl(pixel, pixel[3]); + } + copy_v4_v4(out, pixel); + } +} + void IMB_colormanagement_imbuf_to_float_texture(float *out_buffer, const int offset_x, const int offset_y, @@ -2307,34 +2345,25 @@ void IMB_colormanagement_imbuf_to_float_texture(float *out_buffer, const uchar *in_buffer = (uchar *)ibuf->rect; const bool use_premultiply = IMB_alpha_affects_rgb(ibuf) && store_premultiplied; - /* TODO(brecht): make this multi-threaded, or at least process in batches. */ OCIO_ConstCPUProcessorRcPtr *processor = (ibuf->rect_colorspace) ? colorspace_to_scene_linear_cpu_processor( ibuf->rect_colorspace) : NULL; - for (int y = 0; y < height; y++) { - const size_t in_offset = (offset_y + y) * ibuf->x + offset_x; - const size_t out_offset = y * width; - const uchar *in = in_buffer + in_offset * 4; - float *out = out_buffer + out_offset * 4; - - /* Convert to scene linear, to sRGB and premultiply. */ - for (int x = 0; x < width; x++, in += 4, out += 4) { - float pixel[4]; - rgba_uchar_to_float(pixel, in); - if (processor) { - OCIO_cpuProcessorApplyRGB(processor, pixel); - } - else { - srgb_to_linearrgb_v3_v3(pixel, pixel); - } - if (use_premultiply) { - mul_v3_fl(pixel, pixel[3]); - } - copy_v4_v4(out, pixel); - } - } + ImbufByteToFloatData data = { + .processor = processor, + .width = width, + .offset = offset_y * ibuf->x + offset_x, + .stride = ibuf->x, + .in_buffer = in_buffer, + .out_buffer = out_buffer, + .use_premultiply = use_premultiply, + }; + + TaskParallelSettings settings; + BLI_parallel_range_settings_defaults(&settings); + settings.use_threading = (height > 128); + BLI_task_parallel_range(0, height, &data, imbuf_byte_to_float_cb, &settings); } } -- cgit v1.2.3