From ff44a9957ee553a71585e66ffea615503075313a Mon Sep 17 00:00:00 2001 From: Sergey Sharybin Date: Fri, 11 Jan 2019 17:47:10 +0100 Subject: Update bundled version of NUMA API library --- intern/numaapi/README.blender | 2 +- intern/numaapi/include/numaapi.h | 10 +++++++ intern/numaapi/source/numaapi_linux.c | 24 ++++++++++++++-- intern/numaapi/source/numaapi_stub.c | 7 +++++ intern/numaapi/source/numaapi_win32.c | 53 +++++++++++++++++++++++++++++------ 5 files changed, 84 insertions(+), 12 deletions(-) (limited to 'intern') diff --git a/intern/numaapi/README.blender b/intern/numaapi/README.blender index 661073712b9..6f71d5f8807 100644 --- a/intern/numaapi/README.blender +++ b/intern/numaapi/README.blender @@ -1,5 +1,5 @@ Project: LibNumaAPI URL: https://github.com/Nazg-Gul/libNumaAPI License: MIT -Upstream version: f83d41ec4d7 +Upstream version: 4e7206befce Local modifications: None diff --git a/intern/numaapi/include/numaapi.h b/intern/numaapi/include/numaapi.h index 7b5b50fdf39..bddb51448f8 100644 --- a/intern/numaapi/include/numaapi.h +++ b/intern/numaapi/include/numaapi.h @@ -70,6 +70,16 @@ bool numaAPI_IsNodeAvailable(int node); // Get number of available processors on a given node. int numaAPI_GetNumNodeProcessors(int node); +//////////////////////////////////////////////////////////////////////////////// +// Topology helpers. +// +// Those are a bit higher level queries, but is still rather platform-specific +// and generally useful. + +// Get number of processors within the NUMA nodes on which current thread is +// set affinity on. +int numaAPI_GetNumCurrentNodesProcessors(void); + //////////////////////////////////////////////////////////////////////////////// // Affinities. diff --git a/intern/numaapi/source/numaapi_linux.c b/intern/numaapi/source/numaapi_linux.c index 62e9dcdfadf..9750f1c17df 100644 --- a/intern/numaapi/source/numaapi_linux.c +++ b/intern/numaapi/source/numaapi_linux.c @@ -34,8 +34,6 @@ # include #endif -#include - #ifdef WITH_DYNLOAD // Descriptor numa library. @@ -64,6 +62,7 @@ typedef void tnuma_free_cpumask(struct bitmask* bitmask); typedef void tnuma_free_nodemask(struct bitmask* bitmask); typedef int tnuma_run_on_node_mask(struct bitmask *nodemask); typedef int tnuma_run_on_node_mask_all(struct bitmask *nodemask); +typedef struct bitmask *tnuma_get_run_node_mask(void); typedef void tnuma_set_interleave_mask(struct bitmask *nodemask); typedef void tnuma_set_localalloc(void); @@ -87,6 +86,7 @@ static tnuma_free_nodemask* numa_free_nodemask; static tnuma_free_cpumask* numa_free_cpumask; static tnuma_run_on_node_mask* numa_run_on_node_mask; static tnuma_run_on_node_mask_all* numa_run_on_node_mask_all; +static tnuma_get_run_node_mask* numa_get_run_node_mask; static tnuma_set_interleave_mask* numa_set_interleave_mask; static tnuma_set_localalloc* numa_set_localalloc; @@ -162,6 +162,7 @@ static NUMAAPI_Result loadNumaSymbols(void) { NUMA_LIBRARY_FIND(numa_free_nodemask); NUMA_LIBRARY_FIND(numa_run_on_node_mask); NUMA_LIBRARY_FIND(numa_run_on_node_mask_all); + NUMA_LIBRARY_FIND(numa_get_run_node_mask); NUMA_LIBRARY_FIND(numa_set_interleave_mask); NUMA_LIBRARY_FIND(numa_set_localalloc); @@ -204,7 +205,7 @@ int numaAPI_GetNumNodeProcessors(int node) { struct bitmask* cpu_mask = numa_allocate_cpumask(); numa_node_to_cpus(node, cpu_mask); const unsigned int num_bytes = numa_bitmask_nbytes(cpu_mask); - const unsigned int num_bits = num_bytes *8; + const unsigned int num_bits = num_bytes * 8; // TODO(sergey): There might be faster way calculating number of set bits. int num_processors = 0; for (unsigned int bit = 0; bit < num_bits; ++bit) { @@ -224,6 +225,23 @@ int numaAPI_GetNumNodeProcessors(int node) { return num_processors; } +//////////////////////////////////////////////////////////////////////////////// +// Topology helpers. + +int numaAPI_GetNumCurrentNodesProcessors(void) { + struct bitmask* node_mask = numa_get_run_node_mask(); + const unsigned int num_bytes = numa_bitmask_nbytes(node_mask); + const unsigned int num_bits = num_bytes * 8; + int num_processors = 0; + for (unsigned int bit = 0; bit < num_bits; ++bit) { + if (numa_bitmask_isbitset(node_mask, bit)) { + num_processors += numaAPI_GetNumNodeProcessors(bit); + } + } + numa_bitmask_free(node_mask); + return num_processors; +} + //////////////////////////////////////////////////////////////////////////////// // Affinities. diff --git a/intern/numaapi/source/numaapi_stub.c b/intern/numaapi/source/numaapi_stub.c index e054d71018c..6ac41136c8f 100644 --- a/intern/numaapi/source/numaapi_stub.c +++ b/intern/numaapi/source/numaapi_stub.c @@ -52,6 +52,13 @@ int numaAPI_GetNumNodeProcessors(int node) { return 0; } +//////////////////////////////////////////////////////////////////////////////// +// Topology helpers. + +int numaAPI_GetNumCurrentNodesProcessors(void) { + return 0; +} + //////////////////////////////////////////////////////////////////////////////// // Affinities. diff --git a/intern/numaapi/source/numaapi_win32.c b/intern/numaapi/source/numaapi_win32.c index 33cbc797bd0..e278ef612fd 100644 --- a/intern/numaapi/source/numaapi_win32.c +++ b/intern/numaapi/source/numaapi_win32.c @@ -47,8 +47,6 @@ # include #endif -#include - //////////////////////////////////////////////////////////////////////////////// // Initialization. @@ -74,9 +72,14 @@ typedef BOOL t_VirtualFree(void* address, SIZE_T size, DWORD free_type); typedef BOOL t_SetProcessAffinityMask(HANDLE process_handle, DWORD_PTR process_affinity_mask); typedef BOOL t_SetThreadGroupAffinity(HANDLE thread_handle, - const GROUP_AFFINITY* GroupAffinity, + const GROUP_AFFINITY* group_affinity, GROUP_AFFINITY* PreviousGroupAffinity); +typedef BOOL t_GetThreadGroupAffinity(HANDLE thread_handle, + GROUP_AFFINITY* group_affinity); typedef DWORD t_GetCurrentProcessorNumber(void); +typedef void t_GetCurrentProcessorNumberEx(PROCESSOR_NUMBER* proc_number); +typedef DWORD t_GetActiveProcessorCount(WORD group_number); + // NUMA symbols. static t_GetNumaHighestNodeNumber* _GetNumaHighestNodeNumber; @@ -88,7 +91,10 @@ static t_VirtualFree* _VirtualFree; // Threading symbols. static t_SetProcessAffinityMask* _SetProcessAffinityMask; static t_SetThreadGroupAffinity* _SetThreadGroupAffinity; +static t_GetThreadGroupAffinity* _GetThreadGroupAffinity; static t_GetCurrentProcessorNumber* _GetCurrentProcessorNumber; +static t_GetCurrentProcessorNumberEx* _GetCurrentProcessorNumberEx; +static t_GetActiveProcessorCount* _GetActiveProcessorCount; static void numaExit(void) { // TODO(sergey): Consider closing library here. @@ -128,7 +134,10 @@ static NUMAAPI_Result loadNumaSymbols(void) { // Threading. KERNEL_LIBRARY_FIND(SetProcessAffinityMask); KERNEL_LIBRARY_FIND(SetThreadGroupAffinity); + KERNEL_LIBRARY_FIND(GetThreadGroupAffinity); KERNEL_LIBRARY_FIND(GetCurrentProcessorNumber); + KERNEL_LIBRARY_FIND(GetCurrentProcessorNumberEx); + KERNEL_LIBRARY_FIND(GetActiveProcessorCount); #undef KERNEL_LIBRARY_FIND #undef _LIBRARY_FIND @@ -151,6 +160,19 @@ NUMAAPI_Result numaAPI_Initialize(void) { #endif } +//////////////////////////////////////////////////////////////////////////////// +// Internal helpers. + +static int countNumSetBits(int64_t mask) { + // TODO(sergey): There might be faster way calculating number of set bits. + int num_bits = 0; + while (mask != 0) { + num_bits += (mask & 1); + mask = (mask >> 1); + } + return num_bits; +} + //////////////////////////////////////////////////////////////////////////////// // Topology query. @@ -185,11 +207,26 @@ int numaAPI_GetNumNodeProcessors(int node) { if (!_GetNumaNodeProcessorMask(node, &processor_mask)) { return 0; } - // TODO(sergey): There might be faster way calculating number of set bits. - int num_processors = 0; - while (processor_mask != 0) { - num_processors += (processor_mask & 1); - processor_mask = (processor_mask >> 1); + return countNumSetBits(processor_mask); +} + +//////////////////////////////////////////////////////////////////////////////// +// Topology helpers. + +int numaAPI_GetNumCurrentNodesProcessors(void) { + HANDLE thread_handle = GetCurrentThread(); + GROUP_AFFINITY group_affinity; + // TODO(sergey): Needs implementation. + if (!_GetThreadGroupAffinity(thread_handle, &group_affinity)) { + return 0; + } + // First, count number of possible bits in the affinity mask. + const int num_processors = countNumSetBits(group_affinity.Mask); + // Then check that it's not exceeding number of processors in tjhe group. + const int num_group_processors = + _GetActiveProcessorCount(group_affinity.Group); + if (num_group_processors < num_processors) { + return num_group_processors; } return num_processors; } -- cgit v1.2.3