diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2015-05-19 18:18:26 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2015-06-12 00:37:49 +0300 |
commit | 774e034d40f9494a2523076992fc19e534ecfb17 (patch) | |
tree | 91c984869ea63a5c55de0e276d7e9e466edfef44 /source/blender/blenkernel/intern/colortools.c | |
parent | 9c5995c06259ee596dc80b5b5f69690679894357 (diff) |
Make scopes update multi-threaded
This commits makes scopes evaluation multithreaded by using OpenMP
for the outer loop of pixel processor. it also makes all the changes
needed for keeping performance as high as possible by keeping data
local to thread for as long as it's possible.
Reviewers: campbellbarton
Differential Revision: https://developer.blender.org/D1334
Diffstat (limited to 'source/blender/blenkernel/intern/colortools.c')
-rw-r--r-- | source/blender/blenkernel/intern/colortools.c | 107 |
1 files changed, 77 insertions, 30 deletions
diff --git a/source/blender/blenkernel/intern/colortools.c b/source/blender/blenkernel/intern/colortools.c index 96488d021ce..0af809e79f8 100644 --- a/source/blender/blenkernel/intern/colortools.c +++ b/source/blender/blenkernel/intern/colortools.c @@ -43,6 +43,7 @@ #include "BLI_blenlib.h" #include "BLI_math.h" #include "BLI_utildefines.h" +#include "BLI_threads.h" #include "BKE_colortools.h" #include "BKE_curve.h" @@ -52,6 +53,10 @@ #include "IMB_colormanagement.h" #include "IMB_imbuf_types.h" +#ifdef _OPENMP +# include <omp.h> +#endif + /* ********************************* color curve ********************* */ /* ***************** operations on full struct ************* */ @@ -1029,11 +1034,25 @@ void BKE_histogram_update_sample_line(Histogram *hist, ImBuf *ibuf, const ColorM void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *view_settings, const ColorManagedDisplaySettings *display_settings) { +#ifdef _OPENMP + const int num_threads = BLI_system_thread_count(); +#else + const int num_threads = 1; +#endif int a, y; unsigned int nl, na, nr, ng, nb; double divl, diva, divr, divg, divb; unsigned char *display_buffer; - unsigned int *bin_lum, *bin_r, *bin_g, *bin_b, *bin_a; + unsigned int bin_lum[256] = {0}, + bin_r[256] = {0}, + bin_g[256] = {0}, + bin_b[256] = {0}, + bin_a[256] = {0}; + unsigned int bin_lum_t[BLENDER_MAX_THREADS][256] = {{0}}, + bin_r_t[BLENDER_MAX_THREADS][256] = {{0}}, + bin_g_t[BLENDER_MAX_THREADS][256] = {{0}}, + bin_b_t[BLENDER_MAX_THREADS][256] = {{0}}, + bin_a_t[BLENDER_MAX_THREADS][256] = {{0}}; int ycc_mode = -1; const bool is_float = (ibuf->rect_float != NULL); void *cache_handle = NULL; @@ -1068,13 +1087,6 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings * break; } - /* temp table to count pix value for histogram */ - bin_r = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins"); - bin_g = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins"); - bin_b = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins"); - bin_a = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins"); - bin_lum = MEM_callocN(256 * sizeof(unsigned int), "temp historgram bins"); - /* convert to number of lines with logarithmic scale */ scopes->sample_lines = (scopes->accuracy * 0.01f) * (scopes->accuracy * 0.01f) * ibuf->y; @@ -1114,9 +1126,19 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings * &cache_handle); } + /* Keep number of threads in sync with the merge parts below. */ +#pragma omp parallel for private(y) schedule(static) num_threads(num_threads) if(ibuf->y > 256) for (y = 0; y < ibuf->y; y++) { +#ifdef _OPENMP + const int thread_idx = omp_get_thread_num(); +#else + const int thread_idx = 0; +#endif const float *rf = NULL; - unsigned char *rc = NULL; + const unsigned char *rc = NULL; + const bool do_sample_line = (y % rows_per_sample_line) == 0; + float min[3] = { FLT_MAX, FLT_MAX, FLT_MAX}, + max[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX}; int x, c; if (is_float) rf = ibuf->rect_float + ((size_t)y) * ibuf->x * ibuf->channels; @@ -1140,27 +1162,27 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings * /* check for min max */ if (ycc_mode == -1) { for (c = 0; c < 3; c++) { - if (rgba[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = rgba[c]; - if (rgba[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = rgba[c]; + if (rgba[c] < min[c]) min[c] = rgba[c]; + if (rgba[c] > max[c]) max[c] = rgba[c]; } } else { rgb_to_ycc(rgba[0], rgba[1], rgba[2], &ycc[0], &ycc[1], &ycc[2], ycc_mode); for (c = 0; c < 3; c++) { ycc[c] *= INV_255; - if (ycc[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = ycc[c]; - if (ycc[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = ycc[c]; + if (ycc[c] < min[c]) min[c] = ycc[c]; + if (ycc[c] > max[c]) max[c] = ycc[c]; } } /* increment count for histo*/ - bin_lum[get_bin_float(luma)] += 1; - bin_r[get_bin_float(rgba[0])] += 1; - bin_g[get_bin_float(rgba[1])] += 1; - bin_b[get_bin_float(rgba[2])] += 1; - bin_a[get_bin_float(rgba[3])] += 1; + bin_lum_t[thread_idx][get_bin_float(luma)] += 1; + bin_r_t[thread_idx][get_bin_float(rgba[0])] += 1; + bin_g_t[thread_idx][get_bin_float(rgba[1])] += 1; + bin_b_t[thread_idx][get_bin_float(rgba[2])] += 1; + bin_a_t[thread_idx][get_bin_float(rgba[3])] += 1; /* save sample if needed */ - if (y % rows_per_sample_line == 0) { + if (do_sample_line) { const float fx = (float)x / (float)ibuf->x; const int savedlines = y / rows_per_sample_line; const int idx = 2 * (ibuf->x * savedlines + x); @@ -1170,16 +1192,46 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings * rf += ibuf->channels; rc += ibuf->channels; } +#pragma omp critical + { + for (c = 0; c < 3; c++) { + if (min[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = min[c]; + if (max[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = max[c]; + } + } + } + +#ifdef _OPENMP + if (ibuf->y > 256) { + for (a = 0; a < num_threads; a++) { + int b; + for (b = 0; b < 256; b++) { + bin_lum[b] += bin_lum_t[a][b]; + bin_r[b] += bin_r_t[a][b]; + bin_g[b] += bin_g_t[a][b]; + bin_b[b] += bin_b_t[a][b]; + bin_a[b] += bin_a_t[a][b]; + } + } + } + else +#endif + { + memcpy(bin_lum, bin_lum_t[0], sizeof(bin_lum)); + memcpy(bin_r, bin_r_t[0], sizeof(bin_r)); + memcpy(bin_g, bin_g_t[0], sizeof(bin_g)); + memcpy(bin_b, bin_b_t[0], sizeof(bin_b)); + memcpy(bin_a, bin_a_t[0], sizeof(bin_a)); } /* test for nicer distribution even - non standard, leave it out for a while */ #if 0 - for (x = 0; x < 256; x++) { - bin_lum[x] = sqrt (bin_lum[x]); - bin_r[x] = sqrt(bin_r[x]); - bin_g[x] = sqrt(bin_g[x]); - bin_b[x] = sqrt(bin_b[x]); - bin_a[x] = sqrt(bin_a[x]); + for (a = 0; a < 256; a++) { + bin_lum[a] = sqrt (bin_lum[a]); + bin_r[a] = sqrt(bin_r[a]); + bin_g[a] = sqrt(bin_g[a]); + bin_b[a] = sqrt(bin_b[a]); + bin_a[a] = sqrt(bin_a[a]); } #endif @@ -1205,11 +1257,6 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings * scopes->hist.data_b[a] = bin_b[a] * divb; scopes->hist.data_a[a] = bin_a[a] * diva; } - MEM_freeN(bin_lum); - MEM_freeN(bin_r); - MEM_freeN(bin_g); - MEM_freeN(bin_b); - MEM_freeN(bin_a); if (cm_processor) IMB_colormanagement_processor_free(cm_processor); |