Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSergey Sharybin <sergey.vfx@gmail.com>2018-12-27 21:01:19 +0300
committerSergey Sharybin <sergey.vfx@gmail.com>2018-12-27 21:12:59 +0300
commit826d7adde79216d271b78059c05abd10b7559899 (patch)
tree5d8b334bf5603208e12b579e5a6cd1314d29cfdd /intern/cycles/util/util_system.cpp
parentde14ddf3a141cb0e2baff00b06dc7d86d3088916 (diff)
Fix T59874: Cycles CPU 25% load only during rendering
The issue was introduced by a Threadripper2 commit back in ce927e15e0e3. This boils down to threads inheriting affinity from the parent thread. It is a question how this slipped through the review (we definitely run benchmark round). Quick fix could have been to always set CPU group affinity in Cycles, and it would work for Windows. On other platforms we did not have CPU groups API finished. Ended up making Cycles aware of NUMA topology, so now we bound threads to a specific NUMA node. This required adding an external dependency to Cycles, but made some code there shorter.
Diffstat (limited to 'intern/cycles/util/util_system.cpp')
-rw-r--r--intern/cycles/util/util_system.cpp89
1 files changed, 49 insertions, 40 deletions
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index 34f428f111c..cc2d7017fd8 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -20,6 +20,8 @@
#include "util/util_types.h"
#include "util/util_string.h"
+#include <numaapi.h>
+
#ifdef _WIN32
# if(!defined(FREE_WINDOWS))
# include <intrin.h>
@@ -34,74 +36,81 @@
CCL_NAMESPACE_BEGIN
-int system_cpu_group_count()
+bool system_cpu_ensure_initialized()
{
-#ifdef _WIN32
- util_windows_init_numa_groups();
- return GetActiveProcessorGroupCount();
-#else
- /* TODO(sergey): Need to adopt for other platforms. */
- return 1;
-#endif
+ static bool is_initialized = false;
+ static bool result = false;
+ if (is_initialized) {
+ return result;
+ }
+ is_initialized = true;
+ const NUMAAPI_Result numa_result = numaAPI_Initialize();
+ result = (numa_result == NUMAAPI_SUCCESS);
+ return result;
}
-int system_cpu_group_thread_count(int group)
+/* Fallback solution, which doesn't use NUMA/CPU groups. */
+static int system_cpu_thread_count_fallback()
{
- /* TODO(sergey): Need make other platforms aware of groups. */
#ifdef _WIN32
- util_windows_init_numa_groups();
- return GetActiveProcessorCount(group);
+ SYSTEM_INFO info;
+ GetSystemInfo(&info);
+ return info.dwNumberOfProcessors;
#elif defined(__APPLE__)
- (void) group;
int count;
size_t len = sizeof(count);
int mib[2] = { CTL_HW, HW_NCPU };
sysctl(mib, 2, &count, &len, NULL, 0);
return count;
#else
- (void) group;
return sysconf(_SC_NPROCESSORS_ONLN);
#endif
}
int system_cpu_thread_count()
{
- static uint count = 0;
-
- if(count > 0) {
- return count;
+ const int num_nodes = system_cpu_num_numa_nodes();
+ int num_threads = 0;
+ for (int node = 0; node < num_nodes; ++node) {
+ if (!system_cpu_is_numa_node_available(node)) {
+ continue;
+ }
+ num_threads += system_cpu_num_numa_node_processors(node);
}
+ return num_threads;
+}
- int max_group = system_cpu_group_count();
- VLOG(1) << "Detected " << max_group << " CPU groups.";
- for(int group = 0; group < max_group; ++group) {
- int num_threads = system_cpu_group_thread_count(group);
- VLOG(1) << "Group " << group
- << " has " << num_threads << " threads.";
- count += num_threads;
+int system_cpu_num_numa_nodes()
+{
+ if (!system_cpu_ensure_initialized()) {
+ /* Fallback to a single node with all the threads. */
+ return 1;
}
+ return numaAPI_GetNumNodes();
+}
- if(count < 1) {
- count = 1;
+bool system_cpu_is_numa_node_available(int node)
+{
+ if (!system_cpu_ensure_initialized()) {
+ return true;
}
+ return numaAPI_IsNodeAvailable(node);
+}
- return count;
+int system_cpu_num_numa_node_processors(int node)
+{
+ if (!system_cpu_ensure_initialized()) {
+ return system_cpu_thread_count_fallback();
+ }
+ return numaAPI_GetNumNodeProcessors(node);
}
-unsigned short system_cpu_process_groups(unsigned short max_groups,
- unsigned short *groups)
+bool system_cpu_run_thread_on_node(int node)
{
-#ifdef _WIN32
- unsigned short group_count = max_groups;
- if(!GetProcessGroupAffinity(GetCurrentProcess(), &group_count, groups)) {
- return 0;
+ if (!system_cpu_ensure_initialized()) {
+ return true;
}
- return group_count;
-#else
- (void) max_groups;
- (void) groups;
- return 0;
-#endif
+ return numaAPI_RunThreadOnNode(node);
}
#if !defined(_WIN32) || defined(FREE_WINDOWS)