From 277fb1a31fc4b0c9691b3bbab43fd1a970d3e575 Mon Sep 17 00:00:00 2001 From: Jens Verwiebe Date: Mon, 31 Mar 2014 13:51:40 +0200 Subject: Sculpt/dyntopo: Make the omp threads configurable to overcome performance issues - autodetect optimal default, which typically avoids HT threads - can store setting in .blend per scene - this does not touch general omp max threads, due i found other areas where the calculations are fitting for huge corecount - Intel notes, some of the older generation processors with HyperThreading would not provide significant performance boost for FPU intensive applications. On those systems you might want to set OMP_NUM_THREADS = total number of cores (not total number of hardware theads). --- source/blender/blenkernel/BKE_scene.h | 2 ++ source/blender/blenkernel/intern/scene.c | 10 +++++++++ source/blender/blenlib/BLI_threads.h | 2 ++ source/blender/blenlib/intern/threads.c | 33 +++++++++++++++++++++++++++- source/blender/editors/sculpt_paint/sculpt.c | 21 +++++++++++++----- source/blender/makesdna/DNA_scene_types.h | 8 +++++++ source/blender/makesrna/intern/rna_scene.c | 29 ++++++++++++++++++++++++ 7 files changed, 98 insertions(+), 7 deletions(-) (limited to 'source/blender') diff --git a/source/blender/blenkernel/BKE_scene.h b/source/blender/blenkernel/BKE_scene.h index a10a3f3f59f..972db36d5a6 100644 --- a/source/blender/blenkernel/BKE_scene.h +++ b/source/blender/blenkernel/BKE_scene.h @@ -137,6 +137,8 @@ bool BKE_scene_check_rigidbody_active(const struct Scene *scene); int BKE_scene_num_threads(const struct Scene *scene); int BKE_render_num_threads(const struct RenderData *r); +int BKE_scene_num_omp_threads(const struct Scene *scene); +void BKE_scene_omp_threads_update(const struct Scene *scene); #ifdef __cplusplus } #endif diff --git a/source/blender/blenkernel/intern/scene.c b/source/blender/blenkernel/intern/scene.c index 28cc4305da8..02bc1fcb699 100644 --- a/source/blender/blenkernel/intern/scene.c +++ b/source/blender/blenkernel/intern/scene.c @@ -638,6 +638,9 @@ Scene *BKE_scene_add(Main *bmain, const char *name) sce->gm.exitkey = 218; // Blender key code for ESC + sce->omp_mode = SCE_OMP_AUTO; + sce->omp_num_threads = 1; + sound_create_scene(sce); /* color management */ @@ -1868,3 +1871,10 @@ int BKE_scene_num_threads(const Scene *scene) return BKE_render_num_threads(&scene->r); } +int BKE_scene_num_omp_threads(const struct Scene *scene) +{ + if (scene->omp_mode == SCE_OMP_AUTO) + return BLI_omp_thread_count(); + else + return scene->omp_num_threads; +} diff --git a/source/blender/blenlib/BLI_threads.h b/source/blender/blenlib/BLI_threads.h index 62eadb8a8b5..b522d95ddae 100644 --- a/source/blender/blenlib/BLI_threads.h +++ b/source/blender/blenlib/BLI_threads.h @@ -75,6 +75,8 @@ int BLI_system_thread_count(void); /* gets the number of threads the system void BLI_system_num_threads_override_set(int num); int BLI_system_num_threads_override_get(void); +int BLI_omp_thread_count(void); /* gets the number of openmp threads the system can make use of */ + /* Global Mutex Locks * * One custom lock available now. can be extended. */ diff --git a/source/blender/blenlib/intern/threads.c b/source/blender/blenlib/intern/threads.c index ded2fd7e06d..78752fde608 100644 --- a/source/blender/blenlib/intern/threads.c +++ b/source/blender/blenlib/intern/threads.c @@ -54,10 +54,25 @@ # include #endif -#if defined(__APPLE__) && defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && !defined(__clang__) +#ifdef _OPENMP +#include +#endif + +#if defined(__APPLE__) +#if defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && !defined(__clang__) # define USE_APPLE_OMP_FIX #endif +/* how many cores not counting HT aka pysical cores */ +static int system_physical_thread_count(void) +{ + int ptcount; + size_t ptcount_len = sizeof(ptcount); + sysctlbyname("hw.physicalcpu", &ptcount, &ptcount_len, NULL, 0); + return ptcount; +} +#endif // __APPLE__ + #ifdef USE_APPLE_OMP_FIX /* ************** libgomp (Apple gcc 4.2.1) TLS bug workaround *************** */ extern pthread_key_t gomp_tls_key; @@ -335,6 +350,22 @@ void BLI_end_threads(ListBase *threadbase) /* System Information */ +/* gets the number of openmp threads the system can make use of */ +int BLI_omp_thread_count(void) +{ + int t; +#ifdef _OPENMP +#ifdef __APPLE__ + t = system_physical_thread_count(); +#else + t = omp_get_num_procs(); +#endif +#else + t = 1; +#endif + return t; +} + /* how many threads are native on this system? */ int BLI_system_thread_count(void) { diff --git a/source/blender/editors/sculpt_paint/sculpt.c b/source/blender/editors/sculpt_paint/sculpt.c index 8b65d2c9432..c04f8439fe3 100644 --- a/source/blender/editors/sculpt_paint/sculpt.c +++ b/source/blender/editors/sculpt_paint/sculpt.c @@ -67,6 +67,7 @@ #include "BKE_multires.h" #include "BKE_paint.h" #include "BKE_report.h" +#include "BKE_scene.h" #include "BKE_lattice.h" /* for armature_deform_verts */ #include "BKE_node.h" #include "BKE_object.h" @@ -1541,10 +1542,10 @@ static void do_multires_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *no grid_hidden = BKE_pbvh_grid_hidden(ss->pbvh); - thread_num = 0; #ifdef _OPENMP - if (sd->flags & SCULPT_USE_OPENMP) - thread_num = omp_get_thread_num(); + thread_num = omp_get_thread_num(); +#else + thread_num = 0; #endif tmpgrid_co = ss->cache->tmpgrid_co[thread_num]; tmprow_co = ss->cache->tmprow_co[thread_num]; @@ -3769,7 +3770,7 @@ static void sculpt_init_mirror_clipping(Object *ob, SculptSession *ss) } } -static void sculpt_omp_start(Sculpt *sd, SculptSession *ss) +static void sculpt_omp_start(Scene *scene, Sculpt *sd, SculptSession *ss) { StrokeCache *cache = ss->cache; @@ -3779,15 +3780,17 @@ static void sculpt_omp_start(Sculpt *sd, SculptSession *ss) * Justification: Empirically I've found that two threads per * processor gives higher throughput. */ if (sd->flags & SCULPT_USE_OPENMP) { - cache->num_threads = omp_get_num_procs(); + cache->num_threads = BKE_scene_num_omp_threads(scene); } else { cache->num_threads = 1; } + omp_set_num_threads(cache->num_threads); #else (void)sd; cache->num_threads = 1; #endif +// printf("Sculpt omp threadcount: %d\n", cache->num_threads); if (ss->multires) { int i, gridsize, array_mem_size; BKE_pbvh_node_get_grids(ss->pbvh, NULL, NULL, NULL, NULL, @@ -4002,7 +4005,7 @@ static void sculpt_update_cache_invariants(bContext *C, Sculpt *sd, SculptSessio cache->previous_vertex_rotation = 0; cache->init_dir_set = false; - sculpt_omp_start(sd, ss); + sculpt_omp_start(scene, sd, ss); } static void sculpt_update_brush_delta(UnifiedPaintSettings *ups, Object *ob, Brush *brush) @@ -4626,6 +4629,12 @@ static void sculpt_stroke_done(const bContext *C, struct PaintStroke *UNUSED(str WM_event_add_notifier(C, NC_OBJECT | ND_DRAW, ob); } +#ifdef _OPENMP + if (!(sd->flags & SCULPT_USE_OPENMP)) + omp_set_num_threads(BLI_system_thread_count()); +// printf("Reseted to omp threadcount: %d\n", BLI_system_thread_count()); +#endif + sculpt_brush_exit_tex(sd); } diff --git a/source/blender/makesdna/DNA_scene_types.h b/source/blender/makesdna/DNA_scene_types.h index b9621b4753c..cc16ccd201d 100644 --- a/source/blender/makesdna/DNA_scene_types.h +++ b/source/blender/makesdna/DNA_scene_types.h @@ -1224,6 +1224,10 @@ typedef struct Scene { /* RigidBody simulation world+settings */ struct RigidBodyWorld *rigidbody_world; + + /* Openmp Global Settings */ + int omp_num_threads; + int omp_mode; } Scene; @@ -1769,6 +1773,10 @@ typedef enum SculptFlags { #define USER_UNIT_OPT_SPLIT 1 #define USER_UNIT_ROT_RADIANS 2 +/* OpenMP settings */ +#define SCE_OMP_AUTO 0 +#define SCE_OMP_MANUAL 1 + #ifdef __cplusplus } #endif diff --git a/source/blender/makesrna/intern/rna_scene.c b/source/blender/makesrna/intern/rna_scene.c index 0c70e332053..d1b04bdc1a9 100644 --- a/source/blender/makesrna/intern/rna_scene.c +++ b/source/blender/makesrna/intern/rna_scene.c @@ -43,6 +43,7 @@ #include "BKE_freestyle.h" #include "BKE_editmesh.h" #include "BKE_paint.h" +#include "BKE_scene.h" #include "RNA_define.h" #include "RNA_enum_types.h" @@ -680,6 +681,17 @@ static char *rna_RenderSettings_path(PointerRNA *UNUSED(ptr)) return BLI_sprintfN("render"); } +static void rna_omp_threads_update(Main *UNUSED(bmain), Scene *scene, PointerRNA *UNUSED(ptr)) +{ + BKE_scene_omp_threads_update(scene); +} + +static int rna_omp_threads_get(PointerRNA *ptr) +{ + Scene *scene = (Scene *)ptr->data; + return BKE_scene_num_omp_threads(scene); +} + static int rna_RenderSettings_threads_get(PointerRNA *ptr) { RenderData *rd = (RenderData *)ptr->data; @@ -5088,6 +5100,12 @@ void RNA_def_scene(BlenderRNA *brna) {0, NULL, 0, NULL, NULL} }; + static EnumPropertyItem omp_threads_mode_items[] = { + {SCE_OMP_AUTO, "AUTO", 0, "Auto-detect", "Automatically determine the number of threads, based on CPUs"}, + {SCE_OMP_MANUAL, "MANUAL", 0, "Manual", "Manually determine the number of threads"}, + {0, NULL, 0, NULL, NULL} + }; + /* Struct definition */ srna = RNA_def_struct(brna, "Scene", "ID"); RNA_def_struct_ui_text(srna, "Scene", @@ -5450,6 +5468,17 @@ void RNA_def_scene(BlenderRNA *brna) RNA_def_property_struct_type(prop, "ColorManagedSequencerColorspaceSettings"); RNA_def_property_ui_text(prop, "Sequencer Color Space Settings", "Settings of color space sequencer is working in"); + prop = RNA_def_property(srna, "omp_num_threads", PROP_INT, PROP_NONE); + RNA_def_property_range(prop, 1, BLENDER_MAX_THREADS); + RNA_def_property_int_funcs(prop, "rna_omp_threads_get", NULL, NULL); + RNA_def_property_ui_text(prop, "OpenMP Threads", + "Number of CPU threads to use simultaneously for openmp" + "(for multi-core/CPU systems)"); + + prop = RNA_def_property(srna, "omp_mode", PROP_ENUM, PROP_NONE); + RNA_def_property_enum_items(prop, omp_threads_mode_items); + RNA_def_property_ui_text(prop, "OpenMP Mode", "Determine the amount of openmp threads used"); + /* Nestled Data */ /* *** Non-Animated *** */ RNA_define_animate_sdna(false); -- cgit v1.2.3