diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2019-01-11 19:47:10 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2019-01-11 19:47:10 +0300 |
commit | ff44a9957ee553a71585e66ffea615503075313a (patch) | |
tree | 4188c0aca8644026cdbc5368febf7cda72b899d8 /intern | |
parent | c1dd74580ed8352b9f6c96d816a604ebb4f3c39d (diff) |
Update bundled version of NUMA API library
Diffstat (limited to 'intern')
-rw-r--r-- | intern/numaapi/README.blender | 2 | ||||
-rw-r--r-- | intern/numaapi/include/numaapi.h | 10 | ||||
-rw-r--r-- | intern/numaapi/source/numaapi_linux.c | 24 | ||||
-rw-r--r-- | intern/numaapi/source/numaapi_stub.c | 7 | ||||
-rw-r--r-- | intern/numaapi/source/numaapi_win32.c | 53 |
5 files changed, 84 insertions, 12 deletions
diff --git a/intern/numaapi/README.blender b/intern/numaapi/README.blender index 661073712b9..6f71d5f8807 100644 --- a/intern/numaapi/README.blender +++ b/intern/numaapi/README.blender @@ -1,5 +1,5 @@ Project: LibNumaAPI URL: https://github.com/Nazg-Gul/libNumaAPI License: MIT -Upstream version: f83d41ec4d7 +Upstream version: 4e7206befce Local modifications: None diff --git a/intern/numaapi/include/numaapi.h b/intern/numaapi/include/numaapi.h index 7b5b50fdf39..bddb51448f8 100644 --- a/intern/numaapi/include/numaapi.h +++ b/intern/numaapi/include/numaapi.h @@ -71,6 +71,16 @@ bool numaAPI_IsNodeAvailable(int node); int numaAPI_GetNumNodeProcessors(int node); //////////////////////////////////////////////////////////////////////////////// +// Topology helpers. +// +// Those are a bit higher level queries, but is still rather platform-specific +// and generally useful. + +// Get number of processors within the NUMA nodes on which current thread is +// set affinity on. +int numaAPI_GetNumCurrentNodesProcessors(void); + +//////////////////////////////////////////////////////////////////////////////// // Affinities. // Runs the current process and its children on a specific node. diff --git a/intern/numaapi/source/numaapi_linux.c b/intern/numaapi/source/numaapi_linux.c index 62e9dcdfadf..9750f1c17df 100644 --- a/intern/numaapi/source/numaapi_linux.c +++ b/intern/numaapi/source/numaapi_linux.c @@ -34,8 +34,6 @@ # include <dlfcn.h> #endif -#include <stdio.h> - #ifdef WITH_DYNLOAD // Descriptor numa library. @@ -64,6 +62,7 @@ typedef void tnuma_free_cpumask(struct bitmask* bitmask); typedef void tnuma_free_nodemask(struct bitmask* bitmask); typedef int tnuma_run_on_node_mask(struct bitmask *nodemask); typedef int tnuma_run_on_node_mask_all(struct bitmask *nodemask); +typedef struct bitmask *tnuma_get_run_node_mask(void); typedef void tnuma_set_interleave_mask(struct bitmask *nodemask); typedef void tnuma_set_localalloc(void); @@ -87,6 +86,7 @@ static tnuma_free_nodemask* numa_free_nodemask; static tnuma_free_cpumask* numa_free_cpumask; static tnuma_run_on_node_mask* numa_run_on_node_mask; static tnuma_run_on_node_mask_all* numa_run_on_node_mask_all; +static tnuma_get_run_node_mask* numa_get_run_node_mask; static tnuma_set_interleave_mask* numa_set_interleave_mask; static tnuma_set_localalloc* numa_set_localalloc; @@ -162,6 +162,7 @@ static NUMAAPI_Result loadNumaSymbols(void) { NUMA_LIBRARY_FIND(numa_free_nodemask); NUMA_LIBRARY_FIND(numa_run_on_node_mask); NUMA_LIBRARY_FIND(numa_run_on_node_mask_all); + NUMA_LIBRARY_FIND(numa_get_run_node_mask); NUMA_LIBRARY_FIND(numa_set_interleave_mask); NUMA_LIBRARY_FIND(numa_set_localalloc); @@ -204,7 +205,7 @@ int numaAPI_GetNumNodeProcessors(int node) { struct bitmask* cpu_mask = numa_allocate_cpumask(); numa_node_to_cpus(node, cpu_mask); const unsigned int num_bytes = numa_bitmask_nbytes(cpu_mask); - const unsigned int num_bits = num_bytes *8; + const unsigned int num_bits = num_bytes * 8; // TODO(sergey): There might be faster way calculating number of set bits. int num_processors = 0; for (unsigned int bit = 0; bit < num_bits; ++bit) { @@ -225,6 +226,23 @@ int numaAPI_GetNumNodeProcessors(int node) { } //////////////////////////////////////////////////////////////////////////////// +// Topology helpers. + +int numaAPI_GetNumCurrentNodesProcessors(void) { + struct bitmask* node_mask = numa_get_run_node_mask(); + const unsigned int num_bytes = numa_bitmask_nbytes(node_mask); + const unsigned int num_bits = num_bytes * 8; + int num_processors = 0; + for (unsigned int bit = 0; bit < num_bits; ++bit) { + if (numa_bitmask_isbitset(node_mask, bit)) { + num_processors += numaAPI_GetNumNodeProcessors(bit); + } + } + numa_bitmask_free(node_mask); + return num_processors; +} + +//////////////////////////////////////////////////////////////////////////////// // Affinities. bool numaAPI_RunProcessOnNode(int node) { diff --git a/intern/numaapi/source/numaapi_stub.c b/intern/numaapi/source/numaapi_stub.c index e054d71018c..6ac41136c8f 100644 --- a/intern/numaapi/source/numaapi_stub.c +++ b/intern/numaapi/source/numaapi_stub.c @@ -53,6 +53,13 @@ int numaAPI_GetNumNodeProcessors(int node) { } //////////////////////////////////////////////////////////////////////////////// +// Topology helpers. + +int numaAPI_GetNumCurrentNodesProcessors(void) { + return 0; +} + +//////////////////////////////////////////////////////////////////////////////// // Affinities. bool numaAPI_RunProcessOnNode(int node) { diff --git a/intern/numaapi/source/numaapi_win32.c b/intern/numaapi/source/numaapi_win32.c index 33cbc797bd0..e278ef612fd 100644 --- a/intern/numaapi/source/numaapi_win32.c +++ b/intern/numaapi/source/numaapi_win32.c @@ -47,8 +47,6 @@ # include <VersionHelpers.h> #endif -#include <stdio.h> - //////////////////////////////////////////////////////////////////////////////// // Initialization. @@ -74,9 +72,14 @@ typedef BOOL t_VirtualFree(void* address, SIZE_T size, DWORD free_type); typedef BOOL t_SetProcessAffinityMask(HANDLE process_handle, DWORD_PTR process_affinity_mask); typedef BOOL t_SetThreadGroupAffinity(HANDLE thread_handle, - const GROUP_AFFINITY* GroupAffinity, + const GROUP_AFFINITY* group_affinity, GROUP_AFFINITY* PreviousGroupAffinity); +typedef BOOL t_GetThreadGroupAffinity(HANDLE thread_handle, + GROUP_AFFINITY* group_affinity); typedef DWORD t_GetCurrentProcessorNumber(void); +typedef void t_GetCurrentProcessorNumberEx(PROCESSOR_NUMBER* proc_number); +typedef DWORD t_GetActiveProcessorCount(WORD group_number); + // NUMA symbols. static t_GetNumaHighestNodeNumber* _GetNumaHighestNodeNumber; @@ -88,7 +91,10 @@ static t_VirtualFree* _VirtualFree; // Threading symbols. static t_SetProcessAffinityMask* _SetProcessAffinityMask; static t_SetThreadGroupAffinity* _SetThreadGroupAffinity; +static t_GetThreadGroupAffinity* _GetThreadGroupAffinity; static t_GetCurrentProcessorNumber* _GetCurrentProcessorNumber; +static t_GetCurrentProcessorNumberEx* _GetCurrentProcessorNumberEx; +static t_GetActiveProcessorCount* _GetActiveProcessorCount; static void numaExit(void) { // TODO(sergey): Consider closing library here. @@ -128,7 +134,10 @@ static NUMAAPI_Result loadNumaSymbols(void) { // Threading. KERNEL_LIBRARY_FIND(SetProcessAffinityMask); KERNEL_LIBRARY_FIND(SetThreadGroupAffinity); + KERNEL_LIBRARY_FIND(GetThreadGroupAffinity); KERNEL_LIBRARY_FIND(GetCurrentProcessorNumber); + KERNEL_LIBRARY_FIND(GetCurrentProcessorNumberEx); + KERNEL_LIBRARY_FIND(GetActiveProcessorCount); #undef KERNEL_LIBRARY_FIND #undef _LIBRARY_FIND @@ -152,6 +161,19 @@ NUMAAPI_Result numaAPI_Initialize(void) { } //////////////////////////////////////////////////////////////////////////////// +// Internal helpers. + +static int countNumSetBits(int64_t mask) { + // TODO(sergey): There might be faster way calculating number of set bits. + int num_bits = 0; + while (mask != 0) { + num_bits += (mask & 1); + mask = (mask >> 1); + } + return num_bits; +} + +//////////////////////////////////////////////////////////////////////////////// // Topology query. int numaAPI_GetNumNodes(void) { @@ -185,11 +207,26 @@ int numaAPI_GetNumNodeProcessors(int node) { if (!_GetNumaNodeProcessorMask(node, &processor_mask)) { return 0; } - // TODO(sergey): There might be faster way calculating number of set bits. - int num_processors = 0; - while (processor_mask != 0) { - num_processors += (processor_mask & 1); - processor_mask = (processor_mask >> 1); + return countNumSetBits(processor_mask); +} + +//////////////////////////////////////////////////////////////////////////////// +// Topology helpers. + +int numaAPI_GetNumCurrentNodesProcessors(void) { + HANDLE thread_handle = GetCurrentThread(); + GROUP_AFFINITY group_affinity; + // TODO(sergey): Needs implementation. + if (!_GetThreadGroupAffinity(thread_handle, &group_affinity)) { + return 0; + } + // First, count number of possible bits in the affinity mask. + const int num_processors = countNumSetBits(group_affinity.Mask); + // Then check that it's not exceeding number of processors in tjhe group. + const int num_group_processors = + _GetActiveProcessorCount(group_affinity.Group); + if (num_group_processors < num_processors) { + return num_group_processors; } return num_processors; } |