From 277fb1a31fc4b0c9691b3bbab43fd1a970d3e575 Mon Sep 17 00:00:00 2001
From: Jens Verwiebe <info@jensverwiebe.de>
Date: Mon, 31 Mar 2014 13:51:40 +0200
Subject: Sculpt/dyntopo: Make the omp threads configurable to overcome
 performance issues - autodetect optimal default, which typically avoids HT
 threads - can store setting in .blend per scene - this does not touch general
 omp max threads, due i found other areas where the calculations are fitting
 for huge corecount - Intel notes, some of the older generation processors
 with HyperThreading would not provide significant performance boost for FPU
 intensive applications. On those systems you might want to set
 OMP_NUM_THREADS = total number of cores (not total number of hardware
 theads).

---
 source/blender/blenkernel/BKE_scene.h        |  2 ++
 source/blender/blenkernel/intern/scene.c     | 10 +++++++++
 source/blender/blenlib/BLI_threads.h         |  2 ++
 source/blender/blenlib/intern/threads.c      | 33 +++++++++++++++++++++++++++-
 source/blender/editors/sculpt_paint/sculpt.c | 21 +++++++++++++-----
 source/blender/makesdna/DNA_scene_types.h    |  8 +++++++
 source/blender/makesrna/intern/rna_scene.c   | 29 ++++++++++++++++++++++++
 7 files changed, 98 insertions(+), 7 deletions(-)

(limited to 'source/blender')

diff --git a/source/blender/blenkernel/BKE_scene.h b/source/blender/blenkernel/BKE_scene.h
index a10a3f3f59f..972db36d5a6 100644
--- a/source/blender/blenkernel/BKE_scene.h
+++ b/source/blender/blenkernel/BKE_scene.h
@@ -137,6 +137,8 @@ bool BKE_scene_check_rigidbody_active(const struct Scene *scene);
 int BKE_scene_num_threads(const struct Scene *scene);
 int BKE_render_num_threads(const struct RenderData *r);
 
+int BKE_scene_num_omp_threads(const struct Scene *scene);
+void BKE_scene_omp_threads_update(const struct Scene *scene);
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/blenkernel/intern/scene.c b/source/blender/blenkernel/intern/scene.c
index 28cc4305da8..02bc1fcb699 100644
--- a/source/blender/blenkernel/intern/scene.c
+++ b/source/blender/blenkernel/intern/scene.c
@@ -638,6 +638,9 @@ Scene *BKE_scene_add(Main *bmain, const char *name)
 
 	sce->gm.exitkey = 218; // Blender key code for ESC
 
+	sce->omp_mode = SCE_OMP_AUTO;
+	sce->omp_num_threads = 1;
+
 	sound_create_scene(sce);
 
 	/* color management */
@@ -1868,3 +1871,10 @@ int BKE_scene_num_threads(const Scene *scene)
 	return BKE_render_num_threads(&scene->r);
 }
 
+int BKE_scene_num_omp_threads(const struct Scene *scene)
+{
+	if (scene->omp_mode == SCE_OMP_AUTO)
+		return BLI_omp_thread_count();
+	else
+		return scene->omp_num_threads;
+}
diff --git a/source/blender/blenlib/BLI_threads.h b/source/blender/blenlib/BLI_threads.h
index 62eadb8a8b5..b522d95ddae 100644
--- a/source/blender/blenlib/BLI_threads.h
+++ b/source/blender/blenlib/BLI_threads.h
@@ -75,6 +75,8 @@ int     BLI_system_thread_count(void); /* gets the number of threads the system
 void    BLI_system_num_threads_override_set(int num);
 int     BLI_system_num_threads_override_get(void);
 
+int     BLI_omp_thread_count(void); /* gets the number of openmp threads the system can make use of */
+	
 /* Global Mutex Locks
  * 
  * One custom lock available now. can be extended. */
diff --git a/source/blender/blenlib/intern/threads.c b/source/blender/blenlib/intern/threads.c
index ded2fd7e06d..78752fde608 100644
--- a/source/blender/blenlib/intern/threads.c
+++ b/source/blender/blenlib/intern/threads.c
@@ -54,10 +54,25 @@
 #  include <sys/time.h>
 #endif
 
-#if defined(__APPLE__) && defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && !defined(__clang__)
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#if defined(__APPLE__)
+#if defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && !defined(__clang__)
 #  define USE_APPLE_OMP_FIX
 #endif
 
+/* how many cores not counting HT aka pysical cores */
+static int system_physical_thread_count(void)
+{
+	int ptcount;
+	size_t ptcount_len = sizeof(ptcount);
+	sysctlbyname("hw.physicalcpu", &ptcount, &ptcount_len, NULL, 0);
+	return ptcount;
+}
+#endif // __APPLE__
+
 #ifdef USE_APPLE_OMP_FIX
 /* ************** libgomp (Apple gcc 4.2.1) TLS bug workaround *************** */
 extern pthread_key_t gomp_tls_key;
@@ -335,6 +350,22 @@ void BLI_end_threads(ListBase *threadbase)
 
 /* System Information */
 
+/* gets the number of openmp threads the system can make use of */
+int BLI_omp_thread_count(void)
+{
+	int t;
+#ifdef _OPENMP
+#ifdef __APPLE__
+	t = system_physical_thread_count();
+#else
+	t = omp_get_num_procs();
+#endif
+#else
+	t = 1;
+#endif
+	return t;
+}
+
 /* how many threads are native on this system? */
 int BLI_system_thread_count(void)
 {
diff --git a/source/blender/editors/sculpt_paint/sculpt.c b/source/blender/editors/sculpt_paint/sculpt.c
index 8b65d2c9432..c04f8439fe3 100644
--- a/source/blender/editors/sculpt_paint/sculpt.c
+++ b/source/blender/editors/sculpt_paint/sculpt.c
@@ -67,6 +67,7 @@
 #include "BKE_multires.h"
 #include "BKE_paint.h"
 #include "BKE_report.h"
+#include "BKE_scene.h"
 #include "BKE_lattice.h" /* for armature_deform_verts */
 #include "BKE_node.h"
 #include "BKE_object.h"
@@ -1541,10 +1542,10 @@ static void do_multires_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *no
 
 	grid_hidden = BKE_pbvh_grid_hidden(ss->pbvh);
 
-	thread_num = 0;
 #ifdef _OPENMP
-	if (sd->flags & SCULPT_USE_OPENMP)
-		thread_num = omp_get_thread_num();
+	thread_num = omp_get_thread_num();
+#else
+	thread_num = 0;
 #endif
 	tmpgrid_co = ss->cache->tmpgrid_co[thread_num];
 	tmprow_co = ss->cache->tmprow_co[thread_num];
@@ -3769,7 +3770,7 @@ static void sculpt_init_mirror_clipping(Object *ob, SculptSession *ss)
 	}
 }
 
-static void sculpt_omp_start(Sculpt *sd, SculptSession *ss)
+static void sculpt_omp_start(Scene *scene, Sculpt *sd, SculptSession *ss)
 {
 	StrokeCache *cache = ss->cache;
 
@@ -3779,15 +3780,17 @@ static void sculpt_omp_start(Sculpt *sd, SculptSession *ss)
 	 * Justification: Empirically I've found that two threads per
 	 * processor gives higher throughput. */
 	if (sd->flags & SCULPT_USE_OPENMP) {
-		cache->num_threads = omp_get_num_procs();
+		cache->num_threads = BKE_scene_num_omp_threads(scene);
 	}
 	else {
 		cache->num_threads = 1;
 	}
+	omp_set_num_threads(cache->num_threads);
 #else
 	(void)sd;
 	cache->num_threads = 1;
 #endif
+//	printf("Sculpt omp threadcount: %d\n", cache->num_threads);
 	if (ss->multires) {
 		int i, gridsize, array_mem_size;
 		BKE_pbvh_node_get_grids(ss->pbvh, NULL, NULL, NULL, NULL,
@@ -4002,7 +4005,7 @@ static void sculpt_update_cache_invariants(bContext *C, Sculpt *sd, SculptSessio
 	cache->previous_vertex_rotation = 0;
 	cache->init_dir_set = false;
 
-	sculpt_omp_start(sd, ss);
+	sculpt_omp_start(scene, sd, ss);
 }
 
 static void sculpt_update_brush_delta(UnifiedPaintSettings *ups, Object *ob, Brush *brush)
@@ -4626,6 +4629,12 @@ static void sculpt_stroke_done(const bContext *C, struct PaintStroke *UNUSED(str
 		WM_event_add_notifier(C, NC_OBJECT | ND_DRAW, ob);
 	}
 
+#ifdef _OPENMP
+	if (!(sd->flags & SCULPT_USE_OPENMP))
+		omp_set_num_threads(BLI_system_thread_count());
+//		printf("Reseted to omp threadcount: %d\n", BLI_system_thread_count());
+#endif
+
 	sculpt_brush_exit_tex(sd);
 }
 
diff --git a/source/blender/makesdna/DNA_scene_types.h b/source/blender/makesdna/DNA_scene_types.h
index b9621b4753c..cc16ccd201d 100644
--- a/source/blender/makesdna/DNA_scene_types.h
+++ b/source/blender/makesdna/DNA_scene_types.h
@@ -1224,6 +1224,10 @@ typedef struct Scene {
 	
 	/* RigidBody simulation world+settings */
 	struct RigidBodyWorld *rigidbody_world;
+
+	/* Openmp Global Settings */
+	int omp_num_threads;
+	int omp_mode;
 } Scene;
 
 
@@ -1769,6 +1773,10 @@ typedef enum SculptFlags {
 #define	USER_UNIT_OPT_SPLIT		1
 #define USER_UNIT_ROT_RADIANS	2
 
+/* OpenMP settings */
+#define SCE_OMP_AUTO 0
+#define SCE_OMP_MANUAL 1
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/source/blender/makesrna/intern/rna_scene.c b/source/blender/makesrna/intern/rna_scene.c
index 0c70e332053..d1b04bdc1a9 100644
--- a/source/blender/makesrna/intern/rna_scene.c
+++ b/source/blender/makesrna/intern/rna_scene.c
@@ -43,6 +43,7 @@
 #include "BKE_freestyle.h"
 #include "BKE_editmesh.h"
 #include "BKE_paint.h"
+#include "BKE_scene.h"
 
 #include "RNA_define.h"
 #include "RNA_enum_types.h"
@@ -680,6 +681,17 @@ static char *rna_RenderSettings_path(PointerRNA *UNUSED(ptr))
 	return BLI_sprintfN("render");
 }
 
+static void rna_omp_threads_update(Main *UNUSED(bmain), Scene *scene, PointerRNA *UNUSED(ptr))
+{
+	BKE_scene_omp_threads_update(scene);
+}
+
+static int rna_omp_threads_get(PointerRNA *ptr)
+{
+	Scene *scene = (Scene *)ptr->data;
+	return BKE_scene_num_omp_threads(scene);
+}
+
 static int rna_RenderSettings_threads_get(PointerRNA *ptr)
 {
 	RenderData *rd = (RenderData *)ptr->data;
@@ -5088,6 +5100,12 @@ void RNA_def_scene(BlenderRNA *brna)
 		{0, NULL, 0, NULL, NULL}
 	};
 
+	static EnumPropertyItem omp_threads_mode_items[] = {
+		{SCE_OMP_AUTO, "AUTO", 0, "Auto-detect", "Automatically determine the number of threads, based on CPUs"},
+		{SCE_OMP_MANUAL, "MANUAL", 0, "Manual", "Manually determine the number of threads"},
+		{0, NULL, 0, NULL, NULL}
+	};
+
 	/* Struct definition */
 	srna = RNA_def_struct(brna, "Scene", "ID");
 	RNA_def_struct_ui_text(srna, "Scene",
@@ -5450,6 +5468,17 @@ void RNA_def_scene(BlenderRNA *brna)
 	RNA_def_property_struct_type(prop, "ColorManagedSequencerColorspaceSettings");
 	RNA_def_property_ui_text(prop, "Sequencer Color Space Settings", "Settings of color space sequencer is working in");
 
+	prop = RNA_def_property(srna, "omp_num_threads", PROP_INT, PROP_NONE);
+	RNA_def_property_range(prop, 1, BLENDER_MAX_THREADS);
+	RNA_def_property_int_funcs(prop, "rna_omp_threads_get", NULL, NULL);
+	RNA_def_property_ui_text(prop, "OpenMP Threads",
+							 "Number of CPU threads to use simultaneously for openmp"
+							 "(for multi-core/CPU systems)");
+
+	prop = RNA_def_property(srna, "omp_mode", PROP_ENUM, PROP_NONE);
+	RNA_def_property_enum_items(prop, omp_threads_mode_items);
+	RNA_def_property_ui_text(prop, "OpenMP Mode", "Determine the amount of openmp threads used");
+
 	/* Nestled Data  */
 	/* *** Non-Animated *** */
 	RNA_define_animate_sdna(false);
-- 
cgit v1.2.3