Fix T59874: Cycles CPU 25% load only during rendering

The issue was introduced by a Threadripper2 commit back in ce927e15e0e3. This boils down to threads inheriting affinity from the parent thread. It is a question how this slipped through the review (we definitely run benchmark round). Quick fix could have been to always set CPU group affinity in Cycles, and it would work for Windows. On other platforms we did not have CPU groups API finished. Ended up making Cycles aware of NUMA topology, so now we bound threads to a specific NUMA node. This required adding an external dependency to Cycles, but made some code there shorter.
author: Sergey Sharybin <sergey.vfx@gmail.com> 2018-12-27 21:01:19 +0300
committer: Sergey Sharybin <sergey.vfx@gmail.com> 2018-12-27 21:12:59 +0300
commit: 826d7adde79216d271b78059c05abd10b7559899 (patch)
tree: 5d8b334bf5603208e12b579e5a6cd1314d29cfdd /intern/numaapi
parent: de14ddf3a141cb0e2baff00b06dc7d86d3088916 (diff)
2 files changed, 12 insertions, 8 deletions
diff --git a/intern/numaapi/include/numaapi.h b/intern/numaapi/include/numaapi.h
index a4f32d88458..7b5b50fdf39 100644
--- a/intern/numaapi/include/numaapi.h
+++ b/intern/numaapi/include/numaapi.h
@@ -67,7 +67,7 @@ int numaAPI_GetNumNodes(void);
 // Returns truth if the given node is available for compute.
 bool numaAPI_IsNodeAvailable(int node);
 
-// Getnumber of available processors on a given node.
+// Get number of available processors on a given node.
 int numaAPI_GetNumNodeProcessors(int node);
 
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/intern/numaapi/source/numaapi_linux.c b/intern/numaapi/source/numaapi_linux.c
index 559e97b67d3..62e9dcdfadf 100644
--- a/intern/numaapi/source/numaapi_linux.c
+++ b/intern/numaapi/source/numaapi_linux.c
@@ -34,6 +34,8 @@
 #  include <dlfcn.h>
 #endif
 
+#include <stdio.h>
+
 #ifdef WITH_DYNLOAD
 
 // Descriptor numa library.
@@ -61,6 +63,7 @@ typedef struct bitmask* tnuma_allocate_nodemask(void);
 typedef void tnuma_free_cpumask(struct bitmask* bitmask);
 typedef void tnuma_free_nodemask(struct bitmask* bitmask);
 typedef int tnuma_run_on_node_mask(struct bitmask *nodemask);
+typedef int tnuma_run_on_node_mask_all(struct bitmask *nodemask);
 typedef void tnuma_set_interleave_mask(struct bitmask *nodemask);
 typedef void tnuma_set_localalloc(void);
 
@@ -83,6 +86,7 @@ static tnuma_allocate_nodemask* numa_allocate_nodemask;
 static tnuma_free_nodemask* numa_free_nodemask;
 static tnuma_free_cpumask* numa_free_cpumask;
 static tnuma_run_on_node_mask* numa_run_on_node_mask;
+static tnuma_run_on_node_mask_all* numa_run_on_node_mask_all;
 static tnuma_set_interleave_mask* numa_set_interleave_mask;
 static tnuma_set_localalloc* numa_set_localalloc;
 
@@ -157,6 +161,7 @@ static NUMAAPI_Result loadNumaSymbols(void) {
   NUMA_LIBRARY_FIND(numa_free_cpumask);
   NUMA_LIBRARY_FIND(numa_free_nodemask);
   NUMA_LIBRARY_FIND(numa_run_on_node_mask);
+  NUMA_LIBRARY_FIND(numa_run_on_node_mask_all);
   NUMA_LIBRARY_FIND(numa_set_interleave_mask);
   NUMA_LIBRARY_FIND(numa_set_localalloc);
 
@@ -192,10 +197,7 @@ int numaAPI_GetNumNodes(void) {
 }
 
 bool numaAPI_IsNodeAvailable(int node) {
-  if (numa_node_size(node, NULL) > 0) {
-    return true;
-  }
-  return false;
+  return numaAPI_GetNumNodeProcessors(node) > 0;
 }
 
 int numaAPI_GetNumNodeProcessors(int node) {
@@ -235,13 +237,15 @@ bool numaAPI_RunThreadOnNode(int node) {
   struct bitmask* node_mask = numa_allocate_nodemask();
   numa_bitmask_clearall(node_mask);
   numa_bitmask_setbit(node_mask, node);
-  numa_run_on_node_mask(node_mask);
+  numa_run_on_node_mask_all(node_mask);
   // TODO(sergey): The following commands are based on x265 code, we might want
   // to make those optional, or require to call those explicitly.
   //
   // Current assumption is that this is similar to SetThreadGroupAffinity().
-  numa_set_interleave_mask(node_mask);
-  numa_set_localalloc();
+  if (numa_node_size(node, NULL) > 0) {
+    numa_set_interleave_mask(node_mask);
+    numa_set_localalloc();
+  }
 #ifdef WITH_DYNLOAD
   if (numa_free_nodemask != NULL) {
     numa_free_nodemask(node_mask);
author	Sergey Sharybin <sergey.vfx@gmail.com>	2018-12-27 21:01:19 +0300
committer	Sergey Sharybin <sergey.vfx@gmail.com>	2018-12-27 21:12:59 +0300
commit	826d7adde79216d271b78059c05abd10b7559899 (patch)
tree	5d8b334bf5603208e12b579e5a6cd1314d29cfdd /intern/numaapi
parent	de14ddf3a141cb0e2baff00b06dc7d86d3088916 (diff)