Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2019-01-11 20:09:05 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2019-01-11 20:09:05 +0300
commit1c7695b8483dc9bbcfd9dac26a652922062ea2b7 (patch)
tree0e7fc3c9dc7de3338c49bebb1aba568d717154d9 /intern
parentba4e6d73af0a125c319cd087ff5db68a914bbabe (diff)
parent48506a3431fb5b4396f7cf2d9c6a8a208b3c0df5 (diff)
Merge branch 'blender2.7'
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/util/util_system.cpp22
-rw-r--r--intern/cycles/util/util_system.h4
-rw-r--r--intern/cycles/util/util_task.cpp18
-rw-r--r--intern/numaapi/README.blender2
-rw-r--r--intern/numaapi/include/numaapi.h10
-rw-r--r--intern/numaapi/source/numaapi_linux.c24
-rw-r--r--intern/numaapi/source/numaapi_stub.c7
-rw-r--r--intern/numaapi/source/numaapi_win32.c53
8 files changed, 118 insertions, 22 deletions
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index cc2d7017fd8..a22bd25ce77 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -40,7 +40,7 @@ bool system_cpu_ensure_initialized()
{
static bool is_initialized = false;
static bool result = false;
- if (is_initialized) {
+ if(is_initialized) {
return result;
}
is_initialized = true;
@@ -71,8 +71,8 @@ int system_cpu_thread_count()
{
const int num_nodes = system_cpu_num_numa_nodes();
int num_threads = 0;
- for (int node = 0; node < num_nodes; ++node) {
- if (!system_cpu_is_numa_node_available(node)) {
+ for(int node = 0; node < num_nodes; ++node) {
+ if(!system_cpu_is_numa_node_available(node)) {
continue;
}
num_threads += system_cpu_num_numa_node_processors(node);
@@ -82,7 +82,7 @@ int system_cpu_thread_count()
int system_cpu_num_numa_nodes()
{
- if (!system_cpu_ensure_initialized()) {
+ if(!system_cpu_ensure_initialized()) {
/* Fallback to a single node with all the threads. */
return 1;
}
@@ -91,7 +91,7 @@ int system_cpu_num_numa_nodes()
bool system_cpu_is_numa_node_available(int node)
{
- if (!system_cpu_ensure_initialized()) {
+ if(!system_cpu_ensure_initialized()) {
return true;
}
return numaAPI_IsNodeAvailable(node);
@@ -99,7 +99,7 @@ bool system_cpu_is_numa_node_available(int node)
int system_cpu_num_numa_node_processors(int node)
{
- if (!system_cpu_ensure_initialized()) {
+ if(!system_cpu_ensure_initialized()) {
return system_cpu_thread_count_fallback();
}
return numaAPI_GetNumNodeProcessors(node);
@@ -107,12 +107,20 @@ int system_cpu_num_numa_node_processors(int node)
bool system_cpu_run_thread_on_node(int node)
{
- if (!system_cpu_ensure_initialized()) {
+ if(!system_cpu_ensure_initialized()) {
return true;
}
return numaAPI_RunThreadOnNode(node);
}
+int system_cpu_num_active_group_processors()
+{
+ if(!system_cpu_ensure_initialized()) {
+ return system_cpu_thread_count_fallback();
+ }
+ return numaAPI_GetNumCurrentNodesProcessors();
+}
+
#if !defined(_WIN32) || defined(FREE_WINDOWS)
static void __cpuid(int data[4], int selector)
{
diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h
index 15f69bcf153..0c001f11f0e 100644
--- a/intern/cycles/util/util_system.h
+++ b/intern/cycles/util/util_system.h
@@ -44,6 +44,10 @@ int system_cpu_num_numa_node_processors(int node);
* Returns truth if affinity has successfully changed. */
bool system_cpu_run_thread_on_node(int node);
+/* Number of processors within the current CPU group (or within active thread
+ * thread affinity). */
+int system_cpu_num_active_group_processors();
+
string system_cpu_brand_string();
int system_cpu_bits();
bool system_cpu_support_sse2();
diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp
index 7e9f7313fba..4241c4aa8cc 100644
--- a/intern/cycles/util/util_task.cpp
+++ b/intern/cycles/util/util_task.cpp
@@ -228,9 +228,21 @@ int get_num_total_processors(const vector<int>& num_per_node_processors)
void distribute_threads_on_nodes(const vector<thread*>& threads)
{
const int num_threads = threads.size();
- /* TODO(sergey): Skip overriding affinity if threads fits into the current
- * nodes/CPU group. This will allow user to tweak affinity for weird and
- * wonderful reasons. */
+ const int num_active_group_processors =
+ system_cpu_num_active_group_processors();
+ VLOG(1) << "Detected " << num_active_group_processors << " processors "
+ << "in active group.";
+ if(num_active_group_processors >= num_threads) {
+ /* If the current thread is set up in a way that its affinity allows to
+ * use at least requested number of threads we do not explicitly set
+ * affinity to the worker therads.
+ * This way we allow users to manually edit affinity of the parent
+ * thread, and here we follow that affinity. This way it's possible to
+ * have two Cycles/Blender instances running manually set to a different
+ * dies on a CPU. */
+ VLOG(1) << "Not setting thread group affinity.";
+ return;
+ }
vector<int> num_per_node_processors;
get_per_node_num_processors(&num_per_node_processors);
if(num_per_node_processors.size() == 0) {
diff --git a/intern/numaapi/README.blender b/intern/numaapi/README.blender
index 661073712b9..6f71d5f8807 100644
--- a/intern/numaapi/README.blender
+++ b/intern/numaapi/README.blender
@@ -1,5 +1,5 @@
Project: LibNumaAPI
URL: https://github.com/Nazg-Gul/libNumaAPI
License: MIT
-Upstream version: f83d41ec4d7
+Upstream version: 4e7206befce
Local modifications: None
diff --git a/intern/numaapi/include/numaapi.h b/intern/numaapi/include/numaapi.h
index 7b5b50fdf39..bddb51448f8 100644
--- a/intern/numaapi/include/numaapi.h
+++ b/intern/numaapi/include/numaapi.h
@@ -71,6 +71,16 @@ bool numaAPI_IsNodeAvailable(int node);
int numaAPI_GetNumNodeProcessors(int node);
////////////////////////////////////////////////////////////////////////////////
+// Topology helpers.
+//
+// Those are a bit higher level queries, but is still rather platform-specific
+// and generally useful.
+
+// Get number of processors within the NUMA nodes on which current thread is
+// set affinity on.
+int numaAPI_GetNumCurrentNodesProcessors(void);
+
+////////////////////////////////////////////////////////////////////////////////
// Affinities.
// Runs the current process and its children on a specific node.
diff --git a/intern/numaapi/source/numaapi_linux.c b/intern/numaapi/source/numaapi_linux.c
index 62e9dcdfadf..9750f1c17df 100644
--- a/intern/numaapi/source/numaapi_linux.c
+++ b/intern/numaapi/source/numaapi_linux.c
@@ -34,8 +34,6 @@
# include <dlfcn.h>
#endif
-#include <stdio.h>
-
#ifdef WITH_DYNLOAD
// Descriptor numa library.
@@ -64,6 +62,7 @@ typedef void tnuma_free_cpumask(struct bitmask* bitmask);
typedef void tnuma_free_nodemask(struct bitmask* bitmask);
typedef int tnuma_run_on_node_mask(struct bitmask *nodemask);
typedef int tnuma_run_on_node_mask_all(struct bitmask *nodemask);
+typedef struct bitmask *tnuma_get_run_node_mask(void);
typedef void tnuma_set_interleave_mask(struct bitmask *nodemask);
typedef void tnuma_set_localalloc(void);
@@ -87,6 +86,7 @@ static tnuma_free_nodemask* numa_free_nodemask;
static tnuma_free_cpumask* numa_free_cpumask;
static tnuma_run_on_node_mask* numa_run_on_node_mask;
static tnuma_run_on_node_mask_all* numa_run_on_node_mask_all;
+static tnuma_get_run_node_mask* numa_get_run_node_mask;
static tnuma_set_interleave_mask* numa_set_interleave_mask;
static tnuma_set_localalloc* numa_set_localalloc;
@@ -162,6 +162,7 @@ static NUMAAPI_Result loadNumaSymbols(void) {
NUMA_LIBRARY_FIND(numa_free_nodemask);
NUMA_LIBRARY_FIND(numa_run_on_node_mask);
NUMA_LIBRARY_FIND(numa_run_on_node_mask_all);
+ NUMA_LIBRARY_FIND(numa_get_run_node_mask);
NUMA_LIBRARY_FIND(numa_set_interleave_mask);
NUMA_LIBRARY_FIND(numa_set_localalloc);
@@ -204,7 +205,7 @@ int numaAPI_GetNumNodeProcessors(int node) {
struct bitmask* cpu_mask = numa_allocate_cpumask();
numa_node_to_cpus(node, cpu_mask);
const unsigned int num_bytes = numa_bitmask_nbytes(cpu_mask);
- const unsigned int num_bits = num_bytes *8;
+ const unsigned int num_bits = num_bytes * 8;
// TODO(sergey): There might be faster way calculating number of set bits.
int num_processors = 0;
for (unsigned int bit = 0; bit < num_bits; ++bit) {
@@ -225,6 +226,23 @@ int numaAPI_GetNumNodeProcessors(int node) {
}
////////////////////////////////////////////////////////////////////////////////
+// Topology helpers.
+
+int numaAPI_GetNumCurrentNodesProcessors(void) {
+ struct bitmask* node_mask = numa_get_run_node_mask();
+ const unsigned int num_bytes = numa_bitmask_nbytes(node_mask);
+ const unsigned int num_bits = num_bytes * 8;
+ int num_processors = 0;
+ for (unsigned int bit = 0; bit < num_bits; ++bit) {
+ if (numa_bitmask_isbitset(node_mask, bit)) {
+ num_processors += numaAPI_GetNumNodeProcessors(bit);
+ }
+ }
+ numa_bitmask_free(node_mask);
+ return num_processors;
+}
+
+////////////////////////////////////////////////////////////////////////////////
// Affinities.
bool numaAPI_RunProcessOnNode(int node) {
diff --git a/intern/numaapi/source/numaapi_stub.c b/intern/numaapi/source/numaapi_stub.c
index e054d71018c..6ac41136c8f 100644
--- a/intern/numaapi/source/numaapi_stub.c
+++ b/intern/numaapi/source/numaapi_stub.c
@@ -53,6 +53,13 @@ int numaAPI_GetNumNodeProcessors(int node) {
}
////////////////////////////////////////////////////////////////////////////////
+// Topology helpers.
+
+int numaAPI_GetNumCurrentNodesProcessors(void) {
+ return 0;
+}
+
+////////////////////////////////////////////////////////////////////////////////
// Affinities.
bool numaAPI_RunProcessOnNode(int node) {
diff --git a/intern/numaapi/source/numaapi_win32.c b/intern/numaapi/source/numaapi_win32.c
index 33cbc797bd0..e278ef612fd 100644
--- a/intern/numaapi/source/numaapi_win32.c
+++ b/intern/numaapi/source/numaapi_win32.c
@@ -47,8 +47,6 @@
# include <VersionHelpers.h>
#endif
-#include <stdio.h>
-
////////////////////////////////////////////////////////////////////////////////
// Initialization.
@@ -74,9 +72,14 @@ typedef BOOL t_VirtualFree(void* address, SIZE_T size, DWORD free_type);
typedef BOOL t_SetProcessAffinityMask(HANDLE process_handle,
DWORD_PTR process_affinity_mask);
typedef BOOL t_SetThreadGroupAffinity(HANDLE thread_handle,
- const GROUP_AFFINITY* GroupAffinity,
+ const GROUP_AFFINITY* group_affinity,
GROUP_AFFINITY* PreviousGroupAffinity);
+typedef BOOL t_GetThreadGroupAffinity(HANDLE thread_handle,
+ GROUP_AFFINITY* group_affinity);
typedef DWORD t_GetCurrentProcessorNumber(void);
+typedef void t_GetCurrentProcessorNumberEx(PROCESSOR_NUMBER* proc_number);
+typedef DWORD t_GetActiveProcessorCount(WORD group_number);
+
// NUMA symbols.
static t_GetNumaHighestNodeNumber* _GetNumaHighestNodeNumber;
@@ -88,7 +91,10 @@ static t_VirtualFree* _VirtualFree;
// Threading symbols.
static t_SetProcessAffinityMask* _SetProcessAffinityMask;
static t_SetThreadGroupAffinity* _SetThreadGroupAffinity;
+static t_GetThreadGroupAffinity* _GetThreadGroupAffinity;
static t_GetCurrentProcessorNumber* _GetCurrentProcessorNumber;
+static t_GetCurrentProcessorNumberEx* _GetCurrentProcessorNumberEx;
+static t_GetActiveProcessorCount* _GetActiveProcessorCount;
static void numaExit(void) {
// TODO(sergey): Consider closing library here.
@@ -128,7 +134,10 @@ static NUMAAPI_Result loadNumaSymbols(void) {
// Threading.
KERNEL_LIBRARY_FIND(SetProcessAffinityMask);
KERNEL_LIBRARY_FIND(SetThreadGroupAffinity);
+ KERNEL_LIBRARY_FIND(GetThreadGroupAffinity);
KERNEL_LIBRARY_FIND(GetCurrentProcessorNumber);
+ KERNEL_LIBRARY_FIND(GetCurrentProcessorNumberEx);
+ KERNEL_LIBRARY_FIND(GetActiveProcessorCount);
#undef KERNEL_LIBRARY_FIND
#undef _LIBRARY_FIND
@@ -152,6 +161,19 @@ NUMAAPI_Result numaAPI_Initialize(void) {
}
////////////////////////////////////////////////////////////////////////////////
+// Internal helpers.
+
+static int countNumSetBits(int64_t mask) {
+ // TODO(sergey): There might be faster way calculating number of set bits.
+ int num_bits = 0;
+ while (mask != 0) {
+ num_bits += (mask & 1);
+ mask = (mask >> 1);
+ }
+ return num_bits;
+}
+
+////////////////////////////////////////////////////////////////////////////////
// Topology query.
int numaAPI_GetNumNodes(void) {
@@ -185,11 +207,26 @@ int numaAPI_GetNumNodeProcessors(int node) {
if (!_GetNumaNodeProcessorMask(node, &processor_mask)) {
return 0;
}
- // TODO(sergey): There might be faster way calculating number of set bits.
- int num_processors = 0;
- while (processor_mask != 0) {
- num_processors += (processor_mask & 1);
- processor_mask = (processor_mask >> 1);
+ return countNumSetBits(processor_mask);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Topology helpers.
+
+int numaAPI_GetNumCurrentNodesProcessors(void) {
+ HANDLE thread_handle = GetCurrentThread();
+ GROUP_AFFINITY group_affinity;
+ // TODO(sergey): Needs implementation.
+ if (!_GetThreadGroupAffinity(thread_handle, &group_affinity)) {
+ return 0;
+ }
+ // First, count number of possible bits in the affinity mask.
+ const int num_processors = countNumSetBits(group_affinity.Mask);
+ // Then check that it's not exceeding number of processors in tjhe group.
+ const int num_group_processors =
+ _GetActiveProcessorCount(group_affinity.Group);
+ if (num_group_processors < num_processors) {
+ return num_group_processors;
}
return num_processors;
}