diff options
author | Sergey Sharybin <sergey.vfx@gmail.com> | 2018-12-27 21:01:19 +0300 |
---|---|---|
committer | Sergey Sharybin <sergey.vfx@gmail.com> | 2018-12-27 21:12:59 +0300 |
commit | 826d7adde79216d271b78059c05abd10b7559899 (patch) | |
tree | 5d8b334bf5603208e12b579e5a6cd1314d29cfdd /intern/numaapi | |
parent | de14ddf3a141cb0e2baff00b06dc7d86d3088916 (diff) |
Fix T59874: Cycles CPU 25% load only during rendering
The issue was introduced by a Threadripper2 commit back in
ce927e15e0e3. This boils down to threads inheriting affinity
from the parent thread. It is a question how this slipped
through the review (we definitely run benchmark round).
Quick fix could have been to always set CPU group affinity
in Cycles, and it would work for Windows. On other platforms
we did not have CPU groups API finished.
Ended up making Cycles aware of NUMA topology, so now we
bound threads to a specific NUMA node. This required adding
an external dependency to Cycles, but made some code there
shorter.
Diffstat (limited to 'intern/numaapi')
-rw-r--r-- | intern/numaapi/include/numaapi.h | 2 | ||||
-rw-r--r-- | intern/numaapi/source/numaapi_linux.c | 18 |
2 files changed, 12 insertions, 8 deletions
diff --git a/intern/numaapi/include/numaapi.h b/intern/numaapi/include/numaapi.h index a4f32d88458..7b5b50fdf39 100644 --- a/intern/numaapi/include/numaapi.h +++ b/intern/numaapi/include/numaapi.h @@ -67,7 +67,7 @@ int numaAPI_GetNumNodes(void); // Returns truth if the given node is available for compute. bool numaAPI_IsNodeAvailable(int node); -// Getnumber of available processors on a given node. +// Get number of available processors on a given node. int numaAPI_GetNumNodeProcessors(int node); //////////////////////////////////////////////////////////////////////////////// diff --git a/intern/numaapi/source/numaapi_linux.c b/intern/numaapi/source/numaapi_linux.c index 559e97b67d3..62e9dcdfadf 100644 --- a/intern/numaapi/source/numaapi_linux.c +++ b/intern/numaapi/source/numaapi_linux.c @@ -34,6 +34,8 @@ # include <dlfcn.h> #endif +#include <stdio.h> + #ifdef WITH_DYNLOAD // Descriptor numa library. @@ -61,6 +63,7 @@ typedef struct bitmask* tnuma_allocate_nodemask(void); typedef void tnuma_free_cpumask(struct bitmask* bitmask); typedef void tnuma_free_nodemask(struct bitmask* bitmask); typedef int tnuma_run_on_node_mask(struct bitmask *nodemask); +typedef int tnuma_run_on_node_mask_all(struct bitmask *nodemask); typedef void tnuma_set_interleave_mask(struct bitmask *nodemask); typedef void tnuma_set_localalloc(void); @@ -83,6 +86,7 @@ static tnuma_allocate_nodemask* numa_allocate_nodemask; static tnuma_free_nodemask* numa_free_nodemask; static tnuma_free_cpumask* numa_free_cpumask; static tnuma_run_on_node_mask* numa_run_on_node_mask; +static tnuma_run_on_node_mask_all* numa_run_on_node_mask_all; static tnuma_set_interleave_mask* numa_set_interleave_mask; static tnuma_set_localalloc* numa_set_localalloc; @@ -157,6 +161,7 @@ static NUMAAPI_Result loadNumaSymbols(void) { NUMA_LIBRARY_FIND(numa_free_cpumask); NUMA_LIBRARY_FIND(numa_free_nodemask); NUMA_LIBRARY_FIND(numa_run_on_node_mask); + NUMA_LIBRARY_FIND(numa_run_on_node_mask_all); NUMA_LIBRARY_FIND(numa_set_interleave_mask); NUMA_LIBRARY_FIND(numa_set_localalloc); @@ -192,10 +197,7 @@ int numaAPI_GetNumNodes(void) { } bool numaAPI_IsNodeAvailable(int node) { - if (numa_node_size(node, NULL) > 0) { - return true; - } - return false; + return numaAPI_GetNumNodeProcessors(node) > 0; } int numaAPI_GetNumNodeProcessors(int node) { @@ -235,13 +237,15 @@ bool numaAPI_RunThreadOnNode(int node) { struct bitmask* node_mask = numa_allocate_nodemask(); numa_bitmask_clearall(node_mask); numa_bitmask_setbit(node_mask, node); - numa_run_on_node_mask(node_mask); + numa_run_on_node_mask_all(node_mask); // TODO(sergey): The following commands are based on x265 code, we might want // to make those optional, or require to call those explicitly. // // Current assumption is that this is similar to SetThreadGroupAffinity(). - numa_set_interleave_mask(node_mask); - numa_set_localalloc(); + if (numa_node_size(node, NULL) > 0) { + numa_set_interleave_mask(node_mask); + numa_set_localalloc(); + } #ifdef WITH_DYNLOAD if (numa_free_nodemask != NULL) { numa_free_nodemask(node_mask); |