diff options
author | Bastien Montagne <montagne29@wanadoo.fr> | 2016-05-26 15:30:14 +0300 |
---|---|---|
committer | Bastien Montagne <montagne29@wanadoo.fr> | 2016-05-26 15:33:26 +0300 |
commit | f87842a73ac298227e8f1456f7e7c67d6712330d (patch) | |
tree | a036eac52909ffdac233ec0b642d48c42f0a9205 /source/blender | |
parent | d5e0e681cea846facb4f2777921f6612be3ee193 (diff) |
Image viewer scopes update: OMP->BLI_task.
Gives over 50% faster scope update (from 4.5ms to 2.2ms here with SD shot)!
Probably mostly due to more clever usage of thread-local data (which avoids any lock,
when OMP code had a rather stupid critical section for minmax)...
Diffstat (limited to 'source/blender')
-rw-r--r-- | source/blender/blenkernel/intern/colortools.c | 301 |
1 files changed, 164 insertions, 137 deletions
diff --git a/source/blender/blenkernel/intern/colortools.c b/source/blender/blenkernel/intern/colortools.c index bac59c8c62d..c1f1f0128f5 100644 --- a/source/blender/blenkernel/intern/colortools.c +++ b/source/blender/blenkernel/intern/colortools.c @@ -43,6 +43,7 @@ #include "BLI_blenlib.h" #include "BLI_math.h" #include "BLI_utildefines.h" +#include "BLI_task.h" #include "BLI_threads.h" #include "BKE_colortools.h" @@ -53,10 +54,6 @@ #include "IMB_colormanagement.h" #include "IMB_imbuf_types.h" -#ifdef _OPENMP -# include <omp.h> -#endif - /* ********************************* color curve ********************* */ /* ***************** operations on full struct ************* */ @@ -1089,31 +1086,170 @@ void BKE_histogram_update_sample_line(Histogram *hist, ImBuf *ibuf, const ColorM } /* if view_settings, it also applies this to byte buffers */ +typedef struct ScopesUpdateData { + Scopes *scopes; + const ImBuf *ibuf; + struct ColormanageProcessor *cm_processor; + const unsigned char *display_buffer; + const int ycc_mode; + + unsigned int *bin_lum, *bin_r, *bin_g, *bin_b, *bin_a; +} ScopesUpdateData; + +typedef struct ScopesUpdateDataChunk { + unsigned int bin_lum[256]; + unsigned int bin_r[256]; + unsigned int bin_g[256]; + unsigned int bin_b[256]; + unsigned int bin_a[256]; + float min[3], max[3]; +} ScopesUpdateDataChunk; + +static void scopes_update_cb(void *userdata, void *userdata_chunk, const int y, const int UNUSED(threadid)) +{ + const ScopesUpdateData *data = userdata; + + Scopes *scopes = data->scopes; + const ImBuf *ibuf = data->ibuf; + struct ColormanageProcessor *cm_processor = data->cm_processor; + const unsigned char *display_buffer = data->display_buffer; + const int ycc_mode = data->ycc_mode; + + ScopesUpdateDataChunk *data_chunk = userdata_chunk; + unsigned int *bin_lum = data_chunk->bin_lum; + unsigned int *bin_r = data_chunk->bin_r; + unsigned int *bin_g = data_chunk->bin_g; + unsigned int *bin_b = data_chunk->bin_b; + unsigned int *bin_a = data_chunk->bin_a; + float *min = data_chunk->min; + float *max = data_chunk->max; + + const float *rf = NULL; + const unsigned char *rc = NULL; + const int rows_per_sample_line = ibuf->y / scopes->sample_lines; + const int savedlines = y / rows_per_sample_line; + const bool do_sample_line = (savedlines < scopes->sample_lines) && (y % rows_per_sample_line) == 0; + const bool is_float = (ibuf->rect_float != NULL); + + if (is_float) + rf = ibuf->rect_float + ((size_t)y) * ibuf->x * ibuf->channels; + else { + rc = display_buffer + ((size_t)y) * ibuf->x * ibuf->channels; + } + + for (int x = 0; x < ibuf->x; x++) { + float rgba[4], ycc[3], luma; + + if (is_float) { + switch (ibuf->channels) { + case 4: + copy_v4_v4(rgba, rf); + IMB_colormanagement_processor_apply_v4(cm_processor, rgba); + break; + case 3: + copy_v3_v3(rgba, rf); + IMB_colormanagement_processor_apply_v3(cm_processor, rgba); + rgba[3] = 1.0f; + break; + case 2: + copy_v3_fl(rgba, rf[0]); + rgba[3] = rf[1]; + break; + case 1: + copy_v3_fl(rgba, rf[0]); + rgba[3] = 1.0f; + break; + default: + BLI_assert(0); + } + } + else { + for (int c = 4; c--;) + rgba[c] = rc[c] * INV_255; + } + + /* we still need luma for histogram */ + luma = IMB_colormanagement_get_luminance(rgba); + + /* check for min max */ + if (ycc_mode == -1) { + minmax_v3v3_v3(min, max, rgba); + } + else { + rgb_to_ycc(rgba[0], rgba[1], rgba[2], &ycc[0], &ycc[1], &ycc[2], ycc_mode); + mul_v3_fl(ycc, INV_255); + minmax_v3v3_v3(min, max, ycc); + } + /* increment count for histo*/ + bin_lum[get_bin_float(luma)]++; + bin_r[get_bin_float(rgba[0])]++; + bin_g[get_bin_float(rgba[1])]++; + bin_b[get_bin_float(rgba[2])]++; + bin_a[get_bin_float(rgba[3])]++; + + /* save sample if needed */ + if (do_sample_line) { + const float fx = (float)x / (float)ibuf->x; + const int idx = 2 * (ibuf->x * savedlines + x); + save_sample_line(scopes, idx, fx, rgba, ycc); + } + + rf += ibuf->channels; + rc += ibuf->channels; + } +} + +static void scopes_update_finalize(void *userdata, void *userdata_chunk) +{ + const ScopesUpdateData *data = userdata; + const ScopesUpdateDataChunk *data_chunk = userdata_chunk; + + unsigned int *bin_lum = data->bin_lum; + unsigned int *bin_r = data->bin_r; + unsigned int *bin_g = data->bin_g; + unsigned int *bin_b = data->bin_b; + unsigned int *bin_a = data->bin_a; + const unsigned int *bin_lum_c = data_chunk->bin_lum; + const unsigned int *bin_r_c = data_chunk->bin_r; + const unsigned int *bin_g_c = data_chunk->bin_g; + const unsigned int *bin_b_c = data_chunk->bin_b; + const unsigned int *bin_a_c = data_chunk->bin_a; + + float (*minmax)[2] = data->scopes->minmax; + const float *min = data_chunk->min; + const float *max = data_chunk->max; + + for (int b = 256; b--;) { + bin_lum[b] += bin_lum_c[b]; + bin_r[b] += bin_r_c[b]; + bin_g[b] += bin_g_c[b]; + bin_b[b] += bin_b_c[b]; + bin_a[b] += bin_a_c[b]; + } + + for (int c = 3; c--;) { + if (min[c] < minmax[c][0]) + minmax[c][0] = min[c]; + if (max[c] > minmax[c][1]) + minmax[c][1] = max[c]; + } +} + void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *view_settings, const ColorManagedDisplaySettings *display_settings) { -#ifdef _OPENMP - const int num_threads = BLI_system_thread_count(); -#endif - int a, y; + int a; unsigned int nl, na, nr, ng, nb; double divl, diva, divr, divg, divb; - unsigned char *display_buffer; + const unsigned char *display_buffer = NULL; unsigned int bin_lum[256] = {0}, bin_r[256] = {0}, bin_g[256] = {0}, bin_b[256] = {0}, bin_a[256] = {0}; - unsigned int bin_lum_t[BLENDER_MAX_THREADS][256] = {{0}}, - bin_r_t[BLENDER_MAX_THREADS][256] = {{0}}, - bin_g_t[BLENDER_MAX_THREADS][256] = {{0}}, - bin_b_t[BLENDER_MAX_THREADS][256] = {{0}}, - bin_a_t[BLENDER_MAX_THREADS][256] = {{0}}; int ycc_mode = -1; - const bool is_float = (ibuf->rect_float != NULL); void *cache_handle = NULL; struct ColormanageProcessor *cm_processor = NULL; - int rows_per_sample_line; if (ibuf->rect == NULL && ibuf->rect_float == NULL) return; @@ -1151,7 +1287,6 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings * scopes->sample_lines = ibuf->y; /* scan the image */ - rows_per_sample_line = ibuf->y / scopes->sample_lines; for (a = 0; a < 3; a++) { scopes->minmax[a][0] = 25500.0f; scopes->minmax[a][1] = -25500.0f; @@ -1177,129 +1312,21 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings * cm_processor = IMB_colormanagement_display_processor_new(view_settings, display_settings); } else { - display_buffer = (unsigned char *)IMB_display_buffer_acquire(ibuf, - view_settings, - display_settings, - &cache_handle); + display_buffer = (const unsigned char *)IMB_display_buffer_acquire( + ibuf, view_settings, display_settings, &cache_handle); } /* Keep number of threads in sync with the merge parts below. */ -#pragma omp parallel for private(y) schedule(static) num_threads(num_threads) if (ibuf->y > 256) - for (y = 0; y < ibuf->y; y++) { -#ifdef _OPENMP - const int thread_idx = omp_get_thread_num(); -#else - const int thread_idx = 0; -#endif - const float *rf = NULL; - const unsigned char *rc = NULL; - const int savedlines = y / rows_per_sample_line; - const bool do_sample_line = (savedlines < scopes->sample_lines) && (y % rows_per_sample_line) == 0; - float min[3] = { FLT_MAX, FLT_MAX, FLT_MAX}, - max[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX}; - int x, c; - if (is_float) - rf = ibuf->rect_float + ((size_t)y) * ibuf->x * ibuf->channels; - else { - rc = display_buffer + ((size_t)y) * ibuf->x * ibuf->channels; - } - for (x = 0; x < ibuf->x; x++) { - float rgba[4], ycc[3], luma; - if (is_float) { - - switch (ibuf->channels) { - case 4: - copy_v4_v4(rgba, rf); - IMB_colormanagement_processor_apply_v4(cm_processor, rgba); - break; - case 3: - copy_v3_v3(rgba, rf); - IMB_colormanagement_processor_apply_v3(cm_processor, rgba); - rgba[3] = 1.0f; - break; - case 2: - copy_v3_fl(rgba, rf[0]); - rgba[3] = rf[1]; - break; - case 1: - copy_v3_fl(rgba, rf[0]); - rgba[3] = 1.0f; - break; - default: - BLI_assert(0); - } - } - else { - for (c = 0; c < 4; c++) - rgba[c] = rc[c] * INV_255; - } - - /* we still need luma for histogram */ - luma = IMB_colormanagement_get_luminance(rgba); - - /* check for min max */ - if (ycc_mode == -1) { - for (c = 0; c < 3; c++) { - if (rgba[c] < min[c]) min[c] = rgba[c]; - if (rgba[c] > max[c]) max[c] = rgba[c]; - } - } - else { - rgb_to_ycc(rgba[0], rgba[1], rgba[2], &ycc[0], &ycc[1], &ycc[2], ycc_mode); - for (c = 0; c < 3; c++) { - ycc[c] *= INV_255; - if (ycc[c] < min[c]) min[c] = ycc[c]; - if (ycc[c] > max[c]) max[c] = ycc[c]; - } - } - /* increment count for histo*/ - bin_lum_t[thread_idx][get_bin_float(luma)] += 1; - bin_r_t[thread_idx][get_bin_float(rgba[0])] += 1; - bin_g_t[thread_idx][get_bin_float(rgba[1])] += 1; - bin_b_t[thread_idx][get_bin_float(rgba[2])] += 1; - bin_a_t[thread_idx][get_bin_float(rgba[3])] += 1; - - /* save sample if needed */ - if (do_sample_line) { - const float fx = (float)x / (float)ibuf->x; - const int idx = 2 * (ibuf->x * savedlines + x); - save_sample_line(scopes, idx, fx, rgba, ycc); - } - - rf += ibuf->channels; - rc += ibuf->channels; - } -#pragma omp critical - { - for (c = 0; c < 3; c++) { - if (min[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = min[c]; - if (max[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = max[c]; - } - } - } - -#ifdef _OPENMP - if (ibuf->y > 256) { - for (a = 0; a < num_threads; a++) { - int b; - for (b = 0; b < 256; b++) { - bin_lum[b] += bin_lum_t[a][b]; - bin_r[b] += bin_r_t[a][b]; - bin_g[b] += bin_g_t[a][b]; - bin_b[b] += bin_b_t[a][b]; - bin_a[b] += bin_a_t[a][b]; - } - } - } - else -#endif - { - memcpy(bin_lum, bin_lum_t[0], sizeof(bin_lum)); - memcpy(bin_r, bin_r_t[0], sizeof(bin_r)); - memcpy(bin_g, bin_g_t[0], sizeof(bin_g)); - memcpy(bin_b, bin_b_t[0], sizeof(bin_b)); - memcpy(bin_a, bin_a_t[0], sizeof(bin_a)); - } + ScopesUpdateData data = { + .scopes = scopes, . ibuf = ibuf, + .cm_processor = cm_processor, .display_buffer = display_buffer, .ycc_mode = ycc_mode, + .bin_lum = bin_lum, .bin_r = bin_r, .bin_g = bin_g, .bin_b = bin_b, .bin_a = bin_a, + }; + ScopesUpdateDataChunk data_chunk = {0}; + INIT_MINMAX(data_chunk.min, data_chunk.max); + + BLI_task_parallel_range_finalize(0, ibuf->y, &data, &data_chunk, sizeof(data_chunk), + scopes_update_cb, scopes_update_finalize, ibuf->y > 256, false); /* test for nicer distribution even - non standard, leave it out for a while */ #if 0 |