Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Verwiebe <info@jensverwiebe.de>2014-03-31 15:51:40 +0400
committerJens Verwiebe <info@jensverwiebe.de>2014-03-31 15:51:49 +0400
commit277fb1a31fc4b0c9691b3bbab43fd1a970d3e575 (patch)
tree14fdf30a1783d0d4b17ca83f4de8b4e0c4276c66 /source/blender
parente05d35bfaffec69ed4d990f6a90a1b9244970aa4 (diff)
Sculpt/dyntopo: Make the omp threads configurable to overcome performance issues
- autodetect optimal default, which typically avoids HT threads - can store setting in .blend per scene - this does not touch general omp max threads, due i found other areas where the calculations are fitting for huge corecount - Intel notes, some of the older generation processors with HyperThreading would not provide significant performance boost for FPU intensive applications. On those systems you might want to set OMP_NUM_THREADS = total number of cores (not total number of hardware theads).
Diffstat (limited to 'source/blender')
-rw-r--r--source/blender/blenkernel/BKE_scene.h2
-rw-r--r--source/blender/blenkernel/intern/scene.c10
-rw-r--r--source/blender/blenlib/BLI_threads.h2
-rw-r--r--source/blender/blenlib/intern/threads.c33
-rw-r--r--source/blender/editors/sculpt_paint/sculpt.c21
-rw-r--r--source/blender/makesdna/DNA_scene_types.h8
-rw-r--r--source/blender/makesrna/intern/rna_scene.c29
7 files changed, 98 insertions, 7 deletions
diff --git a/source/blender/blenkernel/BKE_scene.h b/source/blender/blenkernel/BKE_scene.h
index a10a3f3f59f..972db36d5a6 100644
--- a/source/blender/blenkernel/BKE_scene.h
+++ b/source/blender/blenkernel/BKE_scene.h
@@ -137,6 +137,8 @@ bool BKE_scene_check_rigidbody_active(const struct Scene *scene);
int BKE_scene_num_threads(const struct Scene *scene);
int BKE_render_num_threads(const struct RenderData *r);
+int BKE_scene_num_omp_threads(const struct Scene *scene);
+void BKE_scene_omp_threads_update(const struct Scene *scene);
#ifdef __cplusplus
}
#endif
diff --git a/source/blender/blenkernel/intern/scene.c b/source/blender/blenkernel/intern/scene.c
index 28cc4305da8..02bc1fcb699 100644
--- a/source/blender/blenkernel/intern/scene.c
+++ b/source/blender/blenkernel/intern/scene.c
@@ -638,6 +638,9 @@ Scene *BKE_scene_add(Main *bmain, const char *name)
sce->gm.exitkey = 218; // Blender key code for ESC
+ sce->omp_mode = SCE_OMP_AUTO;
+ sce->omp_num_threads = 1;
+
sound_create_scene(sce);
/* color management */
@@ -1868,3 +1871,10 @@ int BKE_scene_num_threads(const Scene *scene)
return BKE_render_num_threads(&scene->r);
}
+int BKE_scene_num_omp_threads(const struct Scene *scene)
+{
+ if (scene->omp_mode == SCE_OMP_AUTO)
+ return BLI_omp_thread_count();
+ else
+ return scene->omp_num_threads;
+}
diff --git a/source/blender/blenlib/BLI_threads.h b/source/blender/blenlib/BLI_threads.h
index 62eadb8a8b5..b522d95ddae 100644
--- a/source/blender/blenlib/BLI_threads.h
+++ b/source/blender/blenlib/BLI_threads.h
@@ -75,6 +75,8 @@ int BLI_system_thread_count(void); /* gets the number of threads the system
void BLI_system_num_threads_override_set(int num);
int BLI_system_num_threads_override_get(void);
+int BLI_omp_thread_count(void); /* gets the number of openmp threads the system can make use of */
+
/* Global Mutex Locks
*
* One custom lock available now. can be extended. */
diff --git a/source/blender/blenlib/intern/threads.c b/source/blender/blenlib/intern/threads.c
index ded2fd7e06d..78752fde608 100644
--- a/source/blender/blenlib/intern/threads.c
+++ b/source/blender/blenlib/intern/threads.c
@@ -54,10 +54,25 @@
# include <sys/time.h>
#endif
-#if defined(__APPLE__) && defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && !defined(__clang__)
+#ifdef _OPENMP
+#include <omp.h>
+#endif
+
+#if defined(__APPLE__)
+#if defined(_OPENMP) && (__GNUC__ == 4) && (__GNUC_MINOR__ == 2) && !defined(__clang__)
# define USE_APPLE_OMP_FIX
#endif
+/* how many cores not counting HT aka pysical cores */
+static int system_physical_thread_count(void)
+{
+ int ptcount;
+ size_t ptcount_len = sizeof(ptcount);
+ sysctlbyname("hw.physicalcpu", &ptcount, &ptcount_len, NULL, 0);
+ return ptcount;
+}
+#endif // __APPLE__
+
#ifdef USE_APPLE_OMP_FIX
/* ************** libgomp (Apple gcc 4.2.1) TLS bug workaround *************** */
extern pthread_key_t gomp_tls_key;
@@ -335,6 +350,22 @@ void BLI_end_threads(ListBase *threadbase)
/* System Information */
+/* gets the number of openmp threads the system can make use of */
+int BLI_omp_thread_count(void)
+{
+ int t;
+#ifdef _OPENMP
+#ifdef __APPLE__
+ t = system_physical_thread_count();
+#else
+ t = omp_get_num_procs();
+#endif
+#else
+ t = 1;
+#endif
+ return t;
+}
+
/* how many threads are native on this system? */
int BLI_system_thread_count(void)
{
diff --git a/source/blender/editors/sculpt_paint/sculpt.c b/source/blender/editors/sculpt_paint/sculpt.c
index 8b65d2c9432..c04f8439fe3 100644
--- a/source/blender/editors/sculpt_paint/sculpt.c
+++ b/source/blender/editors/sculpt_paint/sculpt.c
@@ -67,6 +67,7 @@
#include "BKE_multires.h"
#include "BKE_paint.h"
#include "BKE_report.h"
+#include "BKE_scene.h"
#include "BKE_lattice.h" /* for armature_deform_verts */
#include "BKE_node.h"
#include "BKE_object.h"
@@ -1541,10 +1542,10 @@ static void do_multires_smooth_brush(Sculpt *sd, SculptSession *ss, PBVHNode *no
grid_hidden = BKE_pbvh_grid_hidden(ss->pbvh);
- thread_num = 0;
#ifdef _OPENMP
- if (sd->flags & SCULPT_USE_OPENMP)
- thread_num = omp_get_thread_num();
+ thread_num = omp_get_thread_num();
+#else
+ thread_num = 0;
#endif
tmpgrid_co = ss->cache->tmpgrid_co[thread_num];
tmprow_co = ss->cache->tmprow_co[thread_num];
@@ -3769,7 +3770,7 @@ static void sculpt_init_mirror_clipping(Object *ob, SculptSession *ss)
}
}
-static void sculpt_omp_start(Sculpt *sd, SculptSession *ss)
+static void sculpt_omp_start(Scene *scene, Sculpt *sd, SculptSession *ss)
{
StrokeCache *cache = ss->cache;
@@ -3779,15 +3780,17 @@ static void sculpt_omp_start(Sculpt *sd, SculptSession *ss)
* Justification: Empirically I've found that two threads per
* processor gives higher throughput. */
if (sd->flags & SCULPT_USE_OPENMP) {
- cache->num_threads = omp_get_num_procs();
+ cache->num_threads = BKE_scene_num_omp_threads(scene);
}
else {
cache->num_threads = 1;
}
+ omp_set_num_threads(cache->num_threads);
#else
(void)sd;
cache->num_threads = 1;
#endif
+// printf("Sculpt omp threadcount: %d\n", cache->num_threads);
if (ss->multires) {
int i, gridsize, array_mem_size;
BKE_pbvh_node_get_grids(ss->pbvh, NULL, NULL, NULL, NULL,
@@ -4002,7 +4005,7 @@ static void sculpt_update_cache_invariants(bContext *C, Sculpt *sd, SculptSessio
cache->previous_vertex_rotation = 0;
cache->init_dir_set = false;
- sculpt_omp_start(sd, ss);
+ sculpt_omp_start(scene, sd, ss);
}
static void sculpt_update_brush_delta(UnifiedPaintSettings *ups, Object *ob, Brush *brush)
@@ -4626,6 +4629,12 @@ static void sculpt_stroke_done(const bContext *C, struct PaintStroke *UNUSED(str
WM_event_add_notifier(C, NC_OBJECT | ND_DRAW, ob);
}
+#ifdef _OPENMP
+ if (!(sd->flags & SCULPT_USE_OPENMP))
+ omp_set_num_threads(BLI_system_thread_count());
+// printf("Reseted to omp threadcount: %d\n", BLI_system_thread_count());
+#endif
+
sculpt_brush_exit_tex(sd);
}
diff --git a/source/blender/makesdna/DNA_scene_types.h b/source/blender/makesdna/DNA_scene_types.h
index b9621b4753c..cc16ccd201d 100644
--- a/source/blender/makesdna/DNA_scene_types.h
+++ b/source/blender/makesdna/DNA_scene_types.h
@@ -1224,6 +1224,10 @@ typedef struct Scene {
/* RigidBody simulation world+settings */
struct RigidBodyWorld *rigidbody_world;
+
+ /* Openmp Global Settings */
+ int omp_num_threads;
+ int omp_mode;
} Scene;
@@ -1769,6 +1773,10 @@ typedef enum SculptFlags {
#define USER_UNIT_OPT_SPLIT 1
#define USER_UNIT_ROT_RADIANS 2
+/* OpenMP settings */
+#define SCE_OMP_AUTO 0
+#define SCE_OMP_MANUAL 1
+
#ifdef __cplusplus
}
#endif
diff --git a/source/blender/makesrna/intern/rna_scene.c b/source/blender/makesrna/intern/rna_scene.c
index 0c70e332053..d1b04bdc1a9 100644
--- a/source/blender/makesrna/intern/rna_scene.c
+++ b/source/blender/makesrna/intern/rna_scene.c
@@ -43,6 +43,7 @@
#include "BKE_freestyle.h"
#include "BKE_editmesh.h"
#include "BKE_paint.h"
+#include "BKE_scene.h"
#include "RNA_define.h"
#include "RNA_enum_types.h"
@@ -680,6 +681,17 @@ static char *rna_RenderSettings_path(PointerRNA *UNUSED(ptr))
return BLI_sprintfN("render");
}
+static void rna_omp_threads_update(Main *UNUSED(bmain), Scene *scene, PointerRNA *UNUSED(ptr))
+{
+ BKE_scene_omp_threads_update(scene);
+}
+
+static int rna_omp_threads_get(PointerRNA *ptr)
+{
+ Scene *scene = (Scene *)ptr->data;
+ return BKE_scene_num_omp_threads(scene);
+}
+
static int rna_RenderSettings_threads_get(PointerRNA *ptr)
{
RenderData *rd = (RenderData *)ptr->data;
@@ -5088,6 +5100,12 @@ void RNA_def_scene(BlenderRNA *brna)
{0, NULL, 0, NULL, NULL}
};
+ static EnumPropertyItem omp_threads_mode_items[] = {
+ {SCE_OMP_AUTO, "AUTO", 0, "Auto-detect", "Automatically determine the number of threads, based on CPUs"},
+ {SCE_OMP_MANUAL, "MANUAL", 0, "Manual", "Manually determine the number of threads"},
+ {0, NULL, 0, NULL, NULL}
+ };
+
/* Struct definition */
srna = RNA_def_struct(brna, "Scene", "ID");
RNA_def_struct_ui_text(srna, "Scene",
@@ -5450,6 +5468,17 @@ void RNA_def_scene(BlenderRNA *brna)
RNA_def_property_struct_type(prop, "ColorManagedSequencerColorspaceSettings");
RNA_def_property_ui_text(prop, "Sequencer Color Space Settings", "Settings of color space sequencer is working in");
+ prop = RNA_def_property(srna, "omp_num_threads", PROP_INT, PROP_NONE);
+ RNA_def_property_range(prop, 1, BLENDER_MAX_THREADS);
+ RNA_def_property_int_funcs(prop, "rna_omp_threads_get", NULL, NULL);
+ RNA_def_property_ui_text(prop, "OpenMP Threads",
+ "Number of CPU threads to use simultaneously for openmp"
+ "(for multi-core/CPU systems)");
+
+ prop = RNA_def_property(srna, "omp_mode", PROP_ENUM, PROP_NONE);
+ RNA_def_property_enum_items(prop, omp_threads_mode_items);
+ RNA_def_property_ui_text(prop, "OpenMP Mode", "Determine the amount of openmp threads used");
+
/* Nestled Data */
/* *** Non-Animated *** */
RNA_define_animate_sdna(false);