diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2019-01-11 20:09:05 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2019-01-11 20:09:05 +0300 |
commit | 1c7695b8483dc9bbcfd9dac26a652922062ea2b7 (patch) | |
tree | 0e7fc3c9dc7de3338c49bebb1aba568d717154d9 /intern | |
parent | ba4e6d73af0a125c319cd087ff5db68a914bbabe (diff) | |
parent | 48506a3431fb5b4396f7cf2d9c6a8a208b3c0df5 (diff) |
Merge branch 'blender2.7'
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/util/util_system.cpp | 22 | ||||
-rw-r--r-- | intern/cycles/util/util_system.h | 4 | ||||
-rw-r--r-- | intern/cycles/util/util_task.cpp | 18 | ||||
-rw-r--r-- | intern/numaapi/README.blender | 2 | ||||
-rw-r--r-- | intern/numaapi/include/numaapi.h | 10 | ||||
-rw-r--r-- | intern/numaapi/source/numaapi_linux.c | 24 | ||||
-rw-r--r-- | intern/numaapi/source/numaapi_stub.c | 7 | ||||
-rw-r--r-- | intern/numaapi/source/numaapi_win32.c | 53 |
8 files changed, 118 insertions, 22 deletions
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp index cc2d7017fd8..a22bd25ce77 100644 --- a/intern/cycles/util/util_system.cpp +++ b/intern/cycles/util/util_system.cpp @@ -40,7 +40,7 @@ bool system_cpu_ensure_initialized() { static bool is_initialized = false; static bool result = false; - if (is_initialized) { + if(is_initialized) { return result; } is_initialized = true; @@ -71,8 +71,8 @@ int system_cpu_thread_count() { const int num_nodes = system_cpu_num_numa_nodes(); int num_threads = 0; - for (int node = 0; node < num_nodes; ++node) { - if (!system_cpu_is_numa_node_available(node)) { + for(int node = 0; node < num_nodes; ++node) { + if(!system_cpu_is_numa_node_available(node)) { continue; } num_threads += system_cpu_num_numa_node_processors(node); @@ -82,7 +82,7 @@ int system_cpu_thread_count() int system_cpu_num_numa_nodes() { - if (!system_cpu_ensure_initialized()) { + if(!system_cpu_ensure_initialized()) { /* Fallback to a single node with all the threads. */ return 1; } @@ -91,7 +91,7 @@ int system_cpu_num_numa_nodes() bool system_cpu_is_numa_node_available(int node) { - if (!system_cpu_ensure_initialized()) { + if(!system_cpu_ensure_initialized()) { return true; } return numaAPI_IsNodeAvailable(node); @@ -99,7 +99,7 @@ bool system_cpu_is_numa_node_available(int node) int system_cpu_num_numa_node_processors(int node) { - if (!system_cpu_ensure_initialized()) { + if(!system_cpu_ensure_initialized()) { return system_cpu_thread_count_fallback(); } return numaAPI_GetNumNodeProcessors(node); @@ -107,12 +107,20 @@ int system_cpu_num_numa_node_processors(int node) bool system_cpu_run_thread_on_node(int node) { - if (!system_cpu_ensure_initialized()) { + if(!system_cpu_ensure_initialized()) { return true; } return numaAPI_RunThreadOnNode(node); } +int system_cpu_num_active_group_processors() +{ + if(!system_cpu_ensure_initialized()) { + return system_cpu_thread_count_fallback(); + } + return numaAPI_GetNumCurrentNodesProcessors(); +} + #if !defined(_WIN32) || defined(FREE_WINDOWS) static void __cpuid(int data[4], int selector) { diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h index 15f69bcf153..0c001f11f0e 100644 --- a/intern/cycles/util/util_system.h +++ b/intern/cycles/util/util_system.h @@ -44,6 +44,10 @@ int system_cpu_num_numa_node_processors(int node); * Returns truth if affinity has successfully changed. */ bool system_cpu_run_thread_on_node(int node); +/* Number of processors within the current CPU group (or within active thread + * thread affinity). */ +int system_cpu_num_active_group_processors(); + string system_cpu_brand_string(); int system_cpu_bits(); bool system_cpu_support_sse2(); diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp index 7e9f7313fba..4241c4aa8cc 100644 --- a/intern/cycles/util/util_task.cpp +++ b/intern/cycles/util/util_task.cpp @@ -228,9 +228,21 @@ int get_num_total_processors(const vector<int>& num_per_node_processors) void distribute_threads_on_nodes(const vector<thread*>& threads) { const int num_threads = threads.size(); - /* TODO(sergey): Skip overriding affinity if threads fits into the current - * nodes/CPU group. This will allow user to tweak affinity for weird and - * wonderful reasons. */ + const int num_active_group_processors = + system_cpu_num_active_group_processors(); + VLOG(1) << "Detected " << num_active_group_processors << " processors " + << "in active group."; + if(num_active_group_processors >= num_threads) { + /* If the current thread is set up in a way that its affinity allows to + * use at least requested number of threads we do not explicitly set + * affinity to the worker therads. + * This way we allow users to manually edit affinity of the parent + * thread, and here we follow that affinity. This way it's possible to + * have two Cycles/Blender instances running manually set to a different + * dies on a CPU. */ + VLOG(1) << "Not setting thread group affinity."; + return; + } vector<int> num_per_node_processors; get_per_node_num_processors(&num_per_node_processors); if(num_per_node_processors.size() == 0) { diff --git a/intern/numaapi/README.blender b/intern/numaapi/README.blender index 661073712b9..6f71d5f8807 100644 --- a/intern/numaapi/README.blender +++ b/intern/numaapi/README.blender @@ -1,5 +1,5 @@ Project: LibNumaAPI URL: https://github.com/Nazg-Gul/libNumaAPI License: MIT -Upstream version: f83d41ec4d7 +Upstream version: 4e7206befce Local modifications: None diff --git a/intern/numaapi/include/numaapi.h b/intern/numaapi/include/numaapi.h index 7b5b50fdf39..bddb51448f8 100644 --- a/intern/numaapi/include/numaapi.h +++ b/intern/numaapi/include/numaapi.h @@ -71,6 +71,16 @@ bool numaAPI_IsNodeAvailable(int node); int numaAPI_GetNumNodeProcessors(int node); //////////////////////////////////////////////////////////////////////////////// +// Topology helpers. +// +// Those are a bit higher level queries, but is still rather platform-specific +// and generally useful. + +// Get number of processors within the NUMA nodes on which current thread is +// set affinity on. +int numaAPI_GetNumCurrentNodesProcessors(void); + +//////////////////////////////////////////////////////////////////////////////// // Affinities. // Runs the current process and its children on a specific node. diff --git a/intern/numaapi/source/numaapi_linux.c b/intern/numaapi/source/numaapi_linux.c index 62e9dcdfadf..9750f1c17df 100644 --- a/intern/numaapi/source/numaapi_linux.c +++ b/intern/numaapi/source/numaapi_linux.c @@ -34,8 +34,6 @@ # include <dlfcn.h> #endif -#include <stdio.h> - #ifdef WITH_DYNLOAD // Descriptor numa library. @@ -64,6 +62,7 @@ typedef void tnuma_free_cpumask(struct bitmask* bitmask); typedef void tnuma_free_nodemask(struct bitmask* bitmask); typedef int tnuma_run_on_node_mask(struct bitmask *nodemask); typedef int tnuma_run_on_node_mask_all(struct bitmask *nodemask); +typedef struct bitmask *tnuma_get_run_node_mask(void); typedef void tnuma_set_interleave_mask(struct bitmask *nodemask); typedef void tnuma_set_localalloc(void); @@ -87,6 +86,7 @@ static tnuma_free_nodemask* numa_free_nodemask; static tnuma_free_cpumask* numa_free_cpumask; static tnuma_run_on_node_mask* numa_run_on_node_mask; static tnuma_run_on_node_mask_all* numa_run_on_node_mask_all; +static tnuma_get_run_node_mask* numa_get_run_node_mask; static tnuma_set_interleave_mask* numa_set_interleave_mask; static tnuma_set_localalloc* numa_set_localalloc; @@ -162,6 +162,7 @@ static NUMAAPI_Result loadNumaSymbols(void) { NUMA_LIBRARY_FIND(numa_free_nodemask); NUMA_LIBRARY_FIND(numa_run_on_node_mask); NUMA_LIBRARY_FIND(numa_run_on_node_mask_all); + NUMA_LIBRARY_FIND(numa_get_run_node_mask); NUMA_LIBRARY_FIND(numa_set_interleave_mask); NUMA_LIBRARY_FIND(numa_set_localalloc); @@ -204,7 +205,7 @@ int numaAPI_GetNumNodeProcessors(int node) { struct bitmask* cpu_mask = numa_allocate_cpumask(); numa_node_to_cpus(node, cpu_mask); const unsigned int num_bytes = numa_bitmask_nbytes(cpu_mask); - const unsigned int num_bits = num_bytes *8; + const unsigned int num_bits = num_bytes * 8; // TODO(sergey): There might be faster way calculating number of set bits. int num_processors = 0; for (unsigned int bit = 0; bit < num_bits; ++bit) { @@ -225,6 +226,23 @@ int numaAPI_GetNumNodeProcessors(int node) { } //////////////////////////////////////////////////////////////////////////////// +// Topology helpers. + +int numaAPI_GetNumCurrentNodesProcessors(void) { + struct bitmask* node_mask = numa_get_run_node_mask(); + const unsigned int num_bytes = numa_bitmask_nbytes(node_mask); + const unsigned int num_bits = num_bytes * 8; + int num_processors = 0; + for (unsigned int bit = 0; bit < num_bits; ++bit) { + if (numa_bitmask_isbitset(node_mask, bit)) { + num_processors += numaAPI_GetNumNodeProcessors(bit); + } + } + numa_bitmask_free(node_mask); + return num_processors; +} + +//////////////////////////////////////////////////////////////////////////////// // Affinities. bool numaAPI_RunProcessOnNode(int node) { diff --git a/intern/numaapi/source/numaapi_stub.c b/intern/numaapi/source/numaapi_stub.c index e054d71018c..6ac41136c8f 100644 --- a/intern/numaapi/source/numaapi_stub.c +++ b/intern/numaapi/source/numaapi_stub.c @@ -53,6 +53,13 @@ int numaAPI_GetNumNodeProcessors(int node) { } //////////////////////////////////////////////////////////////////////////////// +// Topology helpers. + +int numaAPI_GetNumCurrentNodesProcessors(void) { + return 0; +} + +//////////////////////////////////////////////////////////////////////////////// // Affinities. bool numaAPI_RunProcessOnNode(int node) { diff --git a/intern/numaapi/source/numaapi_win32.c b/intern/numaapi/source/numaapi_win32.c index 33cbc797bd0..e278ef612fd 100644 --- a/intern/numaapi/source/numaapi_win32.c +++ b/intern/numaapi/source/numaapi_win32.c @@ -47,8 +47,6 @@ # include <VersionHelpers.h> #endif -#include <stdio.h> - //////////////////////////////////////////////////////////////////////////////// // Initialization. @@ -74,9 +72,14 @@ typedef BOOL t_VirtualFree(void* address, SIZE_T size, DWORD free_type); typedef BOOL t_SetProcessAffinityMask(HANDLE process_handle, DWORD_PTR process_affinity_mask); typedef BOOL t_SetThreadGroupAffinity(HANDLE thread_handle, - const GROUP_AFFINITY* GroupAffinity, + const GROUP_AFFINITY* group_affinity, GROUP_AFFINITY* PreviousGroupAffinity); +typedef BOOL t_GetThreadGroupAffinity(HANDLE thread_handle, + GROUP_AFFINITY* group_affinity); typedef DWORD t_GetCurrentProcessorNumber(void); +typedef void t_GetCurrentProcessorNumberEx(PROCESSOR_NUMBER* proc_number); +typedef DWORD t_GetActiveProcessorCount(WORD group_number); + // NUMA symbols. static t_GetNumaHighestNodeNumber* _GetNumaHighestNodeNumber; @@ -88,7 +91,10 @@ static t_VirtualFree* _VirtualFree; // Threading symbols. static t_SetProcessAffinityMask* _SetProcessAffinityMask; static t_SetThreadGroupAffinity* _SetThreadGroupAffinity; +static t_GetThreadGroupAffinity* _GetThreadGroupAffinity; static t_GetCurrentProcessorNumber* _GetCurrentProcessorNumber; +static t_GetCurrentProcessorNumberEx* _GetCurrentProcessorNumberEx; +static t_GetActiveProcessorCount* _GetActiveProcessorCount; static void numaExit(void) { // TODO(sergey): Consider closing library here. @@ -128,7 +134,10 @@ static NUMAAPI_Result loadNumaSymbols(void) { // Threading. KERNEL_LIBRARY_FIND(SetProcessAffinityMask); KERNEL_LIBRARY_FIND(SetThreadGroupAffinity); + KERNEL_LIBRARY_FIND(GetThreadGroupAffinity); KERNEL_LIBRARY_FIND(GetCurrentProcessorNumber); + KERNEL_LIBRARY_FIND(GetCurrentProcessorNumberEx); + KERNEL_LIBRARY_FIND(GetActiveProcessorCount); #undef KERNEL_LIBRARY_FIND #undef _LIBRARY_FIND @@ -152,6 +161,19 @@ NUMAAPI_Result numaAPI_Initialize(void) { } //////////////////////////////////////////////////////////////////////////////// +// Internal helpers. + +static int countNumSetBits(int64_t mask) { + // TODO(sergey): There might be faster way calculating number of set bits. + int num_bits = 0; + while (mask != 0) { + num_bits += (mask & 1); + mask = (mask >> 1); + } + return num_bits; +} + +//////////////////////////////////////////////////////////////////////////////// // Topology query. int numaAPI_GetNumNodes(void) { @@ -185,11 +207,26 @@ int numaAPI_GetNumNodeProcessors(int node) { if (!_GetNumaNodeProcessorMask(node, &processor_mask)) { return 0; } - // TODO(sergey): There might be faster way calculating number of set bits. - int num_processors = 0; - while (processor_mask != 0) { - num_processors += (processor_mask & 1); - processor_mask = (processor_mask >> 1); + return countNumSetBits(processor_mask); +} + +//////////////////////////////////////////////////////////////////////////////// +// Topology helpers. + +int numaAPI_GetNumCurrentNodesProcessors(void) { + HANDLE thread_handle = GetCurrentThread(); + GROUP_AFFINITY group_affinity; + // TODO(sergey): Needs implementation. + if (!_GetThreadGroupAffinity(thread_handle, &group_affinity)) { + return 0; + } + // First, count number of possible bits in the affinity mask. + const int num_processors = countNumSetBits(group_affinity.Mask); + // Then check that it's not exceeding number of processors in tjhe group. + const int num_group_processors = + _GetActiveProcessorCount(group_affinity.Group); + if (num_group_processors < num_processors) { + return num_group_processors; } return num_processors; } |