Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBastien Montagne <montagne29@wanadoo.fr>2016-05-26 15:30:14 +0300
committerBastien Montagne <montagne29@wanadoo.fr>2016-05-26 15:33:26 +0300
commitf87842a73ac298227e8f1456f7e7c67d6712330d (patch)
treea036eac52909ffdac233ec0b642d48c42f0a9205 /source/blender/blenkernel
parentd5e0e681cea846facb4f2777921f6612be3ee193 (diff)
Image viewer scopes update: OMP->BLI_task.
Gives over 50% faster scope update (from 4.5ms to 2.2ms here with SD shot)! Probably mostly due to more clever usage of thread-local data (which avoids any lock, when OMP code had a rather stupid critical section for minmax)...
Diffstat (limited to 'source/blender/blenkernel')
-rw-r--r--source/blender/blenkernel/intern/colortools.c301
1 files changed, 164 insertions, 137 deletions
diff --git a/source/blender/blenkernel/intern/colortools.c b/source/blender/blenkernel/intern/colortools.c
index bac59c8c62d..c1f1f0128f5 100644
--- a/source/blender/blenkernel/intern/colortools.c
+++ b/source/blender/blenkernel/intern/colortools.c
@@ -43,6 +43,7 @@
#include "BLI_blenlib.h"
#include "BLI_math.h"
#include "BLI_utildefines.h"
+#include "BLI_task.h"
#include "BLI_threads.h"
#include "BKE_colortools.h"
@@ -53,10 +54,6 @@
#include "IMB_colormanagement.h"
#include "IMB_imbuf_types.h"
-#ifdef _OPENMP
-# include <omp.h>
-#endif
-
/* ********************************* color curve ********************* */
/* ***************** operations on full struct ************* */
@@ -1089,31 +1086,170 @@ void BKE_histogram_update_sample_line(Histogram *hist, ImBuf *ibuf, const ColorM
}
/* if view_settings, it also applies this to byte buffers */
+typedef struct ScopesUpdateData {
+ Scopes *scopes;
+ const ImBuf *ibuf;
+ struct ColormanageProcessor *cm_processor;
+ const unsigned char *display_buffer;
+ const int ycc_mode;
+
+ unsigned int *bin_lum, *bin_r, *bin_g, *bin_b, *bin_a;
+} ScopesUpdateData;
+
+typedef struct ScopesUpdateDataChunk {
+ unsigned int bin_lum[256];
+ unsigned int bin_r[256];
+ unsigned int bin_g[256];
+ unsigned int bin_b[256];
+ unsigned int bin_a[256];
+ float min[3], max[3];
+} ScopesUpdateDataChunk;
+
+static void scopes_update_cb(void *userdata, void *userdata_chunk, const int y, const int UNUSED(threadid))
+{
+ const ScopesUpdateData *data = userdata;
+
+ Scopes *scopes = data->scopes;
+ const ImBuf *ibuf = data->ibuf;
+ struct ColormanageProcessor *cm_processor = data->cm_processor;
+ const unsigned char *display_buffer = data->display_buffer;
+ const int ycc_mode = data->ycc_mode;
+
+ ScopesUpdateDataChunk *data_chunk = userdata_chunk;
+ unsigned int *bin_lum = data_chunk->bin_lum;
+ unsigned int *bin_r = data_chunk->bin_r;
+ unsigned int *bin_g = data_chunk->bin_g;
+ unsigned int *bin_b = data_chunk->bin_b;
+ unsigned int *bin_a = data_chunk->bin_a;
+ float *min = data_chunk->min;
+ float *max = data_chunk->max;
+
+ const float *rf = NULL;
+ const unsigned char *rc = NULL;
+ const int rows_per_sample_line = ibuf->y / scopes->sample_lines;
+ const int savedlines = y / rows_per_sample_line;
+ const bool do_sample_line = (savedlines < scopes->sample_lines) && (y % rows_per_sample_line) == 0;
+ const bool is_float = (ibuf->rect_float != NULL);
+
+ if (is_float)
+ rf = ibuf->rect_float + ((size_t)y) * ibuf->x * ibuf->channels;
+ else {
+ rc = display_buffer + ((size_t)y) * ibuf->x * ibuf->channels;
+ }
+
+ for (int x = 0; x < ibuf->x; x++) {
+ float rgba[4], ycc[3], luma;
+
+ if (is_float) {
+ switch (ibuf->channels) {
+ case 4:
+ copy_v4_v4(rgba, rf);
+ IMB_colormanagement_processor_apply_v4(cm_processor, rgba);
+ break;
+ case 3:
+ copy_v3_v3(rgba, rf);
+ IMB_colormanagement_processor_apply_v3(cm_processor, rgba);
+ rgba[3] = 1.0f;
+ break;
+ case 2:
+ copy_v3_fl(rgba, rf[0]);
+ rgba[3] = rf[1];
+ break;
+ case 1:
+ copy_v3_fl(rgba, rf[0]);
+ rgba[3] = 1.0f;
+ break;
+ default:
+ BLI_assert(0);
+ }
+ }
+ else {
+ for (int c = 4; c--;)
+ rgba[c] = rc[c] * INV_255;
+ }
+
+ /* we still need luma for histogram */
+ luma = IMB_colormanagement_get_luminance(rgba);
+
+ /* check for min max */
+ if (ycc_mode == -1) {
+ minmax_v3v3_v3(min, max, rgba);
+ }
+ else {
+ rgb_to_ycc(rgba[0], rgba[1], rgba[2], &ycc[0], &ycc[1], &ycc[2], ycc_mode);
+ mul_v3_fl(ycc, INV_255);
+ minmax_v3v3_v3(min, max, ycc);
+ }
+ /* increment count for histo*/
+ bin_lum[get_bin_float(luma)]++;
+ bin_r[get_bin_float(rgba[0])]++;
+ bin_g[get_bin_float(rgba[1])]++;
+ bin_b[get_bin_float(rgba[2])]++;
+ bin_a[get_bin_float(rgba[3])]++;
+
+ /* save sample if needed */
+ if (do_sample_line) {
+ const float fx = (float)x / (float)ibuf->x;
+ const int idx = 2 * (ibuf->x * savedlines + x);
+ save_sample_line(scopes, idx, fx, rgba, ycc);
+ }
+
+ rf += ibuf->channels;
+ rc += ibuf->channels;
+ }
+}
+
+static void scopes_update_finalize(void *userdata, void *userdata_chunk)
+{
+ const ScopesUpdateData *data = userdata;
+ const ScopesUpdateDataChunk *data_chunk = userdata_chunk;
+
+ unsigned int *bin_lum = data->bin_lum;
+ unsigned int *bin_r = data->bin_r;
+ unsigned int *bin_g = data->bin_g;
+ unsigned int *bin_b = data->bin_b;
+ unsigned int *bin_a = data->bin_a;
+ const unsigned int *bin_lum_c = data_chunk->bin_lum;
+ const unsigned int *bin_r_c = data_chunk->bin_r;
+ const unsigned int *bin_g_c = data_chunk->bin_g;
+ const unsigned int *bin_b_c = data_chunk->bin_b;
+ const unsigned int *bin_a_c = data_chunk->bin_a;
+
+ float (*minmax)[2] = data->scopes->minmax;
+ const float *min = data_chunk->min;
+ const float *max = data_chunk->max;
+
+ for (int b = 256; b--;) {
+ bin_lum[b] += bin_lum_c[b];
+ bin_r[b] += bin_r_c[b];
+ bin_g[b] += bin_g_c[b];
+ bin_b[b] += bin_b_c[b];
+ bin_a[b] += bin_a_c[b];
+ }
+
+ for (int c = 3; c--;) {
+ if (min[c] < minmax[c][0])
+ minmax[c][0] = min[c];
+ if (max[c] > minmax[c][1])
+ minmax[c][1] = max[c];
+ }
+}
+
void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *view_settings,
const ColorManagedDisplaySettings *display_settings)
{
-#ifdef _OPENMP
- const int num_threads = BLI_system_thread_count();
-#endif
- int a, y;
+ int a;
unsigned int nl, na, nr, ng, nb;
double divl, diva, divr, divg, divb;
- unsigned char *display_buffer;
+ const unsigned char *display_buffer = NULL;
unsigned int bin_lum[256] = {0},
bin_r[256] = {0},
bin_g[256] = {0},
bin_b[256] = {0},
bin_a[256] = {0};
- unsigned int bin_lum_t[BLENDER_MAX_THREADS][256] = {{0}},
- bin_r_t[BLENDER_MAX_THREADS][256] = {{0}},
- bin_g_t[BLENDER_MAX_THREADS][256] = {{0}},
- bin_b_t[BLENDER_MAX_THREADS][256] = {{0}},
- bin_a_t[BLENDER_MAX_THREADS][256] = {{0}};
int ycc_mode = -1;
- const bool is_float = (ibuf->rect_float != NULL);
void *cache_handle = NULL;
struct ColormanageProcessor *cm_processor = NULL;
- int rows_per_sample_line;
if (ibuf->rect == NULL && ibuf->rect_float == NULL) return;
@@ -1151,7 +1287,6 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
scopes->sample_lines = ibuf->y;
/* scan the image */
- rows_per_sample_line = ibuf->y / scopes->sample_lines;
for (a = 0; a < 3; a++) {
scopes->minmax[a][0] = 25500.0f;
scopes->minmax[a][1] = -25500.0f;
@@ -1177,129 +1312,21 @@ void scopes_update(Scopes *scopes, ImBuf *ibuf, const ColorManagedViewSettings *
cm_processor = IMB_colormanagement_display_processor_new(view_settings, display_settings);
}
else {
- display_buffer = (unsigned char *)IMB_display_buffer_acquire(ibuf,
- view_settings,
- display_settings,
- &cache_handle);
+ display_buffer = (const unsigned char *)IMB_display_buffer_acquire(
+ ibuf, view_settings, display_settings, &cache_handle);
}
/* Keep number of threads in sync with the merge parts below. */
-#pragma omp parallel for private(y) schedule(static) num_threads(num_threads) if (ibuf->y > 256)
- for (y = 0; y < ibuf->y; y++) {
-#ifdef _OPENMP
- const int thread_idx = omp_get_thread_num();
-#else
- const int thread_idx = 0;
-#endif
- const float *rf = NULL;
- const unsigned char *rc = NULL;
- const int savedlines = y / rows_per_sample_line;
- const bool do_sample_line = (savedlines < scopes->sample_lines) && (y % rows_per_sample_line) == 0;
- float min[3] = { FLT_MAX, FLT_MAX, FLT_MAX},
- max[3] = {-FLT_MAX, -FLT_MAX, -FLT_MAX};
- int x, c;
- if (is_float)
- rf = ibuf->rect_float + ((size_t)y) * ibuf->x * ibuf->channels;
- else {
- rc = display_buffer + ((size_t)y) * ibuf->x * ibuf->channels;
- }
- for (x = 0; x < ibuf->x; x++) {
- float rgba[4], ycc[3], luma;
- if (is_float) {
-
- switch (ibuf->channels) {
- case 4:
- copy_v4_v4(rgba, rf);
- IMB_colormanagement_processor_apply_v4(cm_processor, rgba);
- break;
- case 3:
- copy_v3_v3(rgba, rf);
- IMB_colormanagement_processor_apply_v3(cm_processor, rgba);
- rgba[3] = 1.0f;
- break;
- case 2:
- copy_v3_fl(rgba, rf[0]);
- rgba[3] = rf[1];
- break;
- case 1:
- copy_v3_fl(rgba, rf[0]);
- rgba[3] = 1.0f;
- break;
- default:
- BLI_assert(0);
- }
- }
- else {
- for (c = 0; c < 4; c++)
- rgba[c] = rc[c] * INV_255;
- }
-
- /* we still need luma for histogram */
- luma = IMB_colormanagement_get_luminance(rgba);
-
- /* check for min max */
- if (ycc_mode == -1) {
- for (c = 0; c < 3; c++) {
- if (rgba[c] < min[c]) min[c] = rgba[c];
- if (rgba[c] > max[c]) max[c] = rgba[c];
- }
- }
- else {
- rgb_to_ycc(rgba[0], rgba[1], rgba[2], &ycc[0], &ycc[1], &ycc[2], ycc_mode);
- for (c = 0; c < 3; c++) {
- ycc[c] *= INV_255;
- if (ycc[c] < min[c]) min[c] = ycc[c];
- if (ycc[c] > max[c]) max[c] = ycc[c];
- }
- }
- /* increment count for histo*/
- bin_lum_t[thread_idx][get_bin_float(luma)] += 1;
- bin_r_t[thread_idx][get_bin_float(rgba[0])] += 1;
- bin_g_t[thread_idx][get_bin_float(rgba[1])] += 1;
- bin_b_t[thread_idx][get_bin_float(rgba[2])] += 1;
- bin_a_t[thread_idx][get_bin_float(rgba[3])] += 1;
-
- /* save sample if needed */
- if (do_sample_line) {
- const float fx = (float)x / (float)ibuf->x;
- const int idx = 2 * (ibuf->x * savedlines + x);
- save_sample_line(scopes, idx, fx, rgba, ycc);
- }
-
- rf += ibuf->channels;
- rc += ibuf->channels;
- }
-#pragma omp critical
- {
- for (c = 0; c < 3; c++) {
- if (min[c] < scopes->minmax[c][0]) scopes->minmax[c][0] = min[c];
- if (max[c] > scopes->minmax[c][1]) scopes->minmax[c][1] = max[c];
- }
- }
- }
-
-#ifdef _OPENMP
- if (ibuf->y > 256) {
- for (a = 0; a < num_threads; a++) {
- int b;
- for (b = 0; b < 256; b++) {
- bin_lum[b] += bin_lum_t[a][b];
- bin_r[b] += bin_r_t[a][b];
- bin_g[b] += bin_g_t[a][b];
- bin_b[b] += bin_b_t[a][b];
- bin_a[b] += bin_a_t[a][b];
- }
- }
- }
- else
-#endif
- {
- memcpy(bin_lum, bin_lum_t[0], sizeof(bin_lum));
- memcpy(bin_r, bin_r_t[0], sizeof(bin_r));
- memcpy(bin_g, bin_g_t[0], sizeof(bin_g));
- memcpy(bin_b, bin_b_t[0], sizeof(bin_b));
- memcpy(bin_a, bin_a_t[0], sizeof(bin_a));
- }
+ ScopesUpdateData data = {
+ .scopes = scopes, . ibuf = ibuf,
+ .cm_processor = cm_processor, .display_buffer = display_buffer, .ycc_mode = ycc_mode,
+ .bin_lum = bin_lum, .bin_r = bin_r, .bin_g = bin_g, .bin_b = bin_b, .bin_a = bin_a,
+ };
+ ScopesUpdateDataChunk data_chunk = {0};
+ INIT_MINMAX(data_chunk.min, data_chunk.max);
+
+ BLI_task_parallel_range_finalize(0, ibuf->y, &data, &data_chunk, sizeof(data_chunk),
+ scopes_update_cb, scopes_update_finalize, ibuf->y > 256, false);
/* test for nicer distribution even - non standard, leave it out for a while */
#if 0