Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/util/CMakeLists.txt14
-rw-r--r--intern/cycles/util/util_system.cpp89
-rw-r--r--intern/cycles/util/util_system.h28
-rw-r--r--intern/cycles/util/util_task.cpp60
-rw-r--r--intern/cycles/util/util_thread.cpp19
-rw-r--r--intern/cycles/util/util_thread.h4
-rw-r--r--intern/cycles/util/util_windows.cpp103
-rw-r--r--intern/cycles/util/util_windows.h25
-rw-r--r--intern/numaapi/include/numaapi.h2
-rw-r--r--intern/numaapi/source/numaapi_linux.c18
10 files changed, 114 insertions, 248 deletions
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index 92dfc9fa85d..42626d05cf9 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -25,13 +25,17 @@ set(SRC
util_thread.cpp
util_time.cpp
util_transform.cpp
- util_windows.cpp
)
-if(WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI)
- list(APPEND SRC
- util_view.cpp
- )
+if(WITH_CYCLES_STANDALONE)
+ if (WITH_CYCLES_STANDALONE_GUI)
+ list(APPEND SRC
+ util_view.cpp
+ )
+ endif()
+ list(APPEND INC_SYS ../../third_party/numaapi/include)
+else()
+ list(APPEND INC_SYS ../../numaapi/include)
endif()
set(SRC_HEADERS
diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp
index 34f428f111c..cc2d7017fd8 100644
--- a/intern/cycles/util/util_system.cpp
+++ b/intern/cycles/util/util_system.cpp
@@ -20,6 +20,8 @@
#include "util/util_types.h"
#include "util/util_string.h"
+#include <numaapi.h>
+
#ifdef _WIN32
# if(!defined(FREE_WINDOWS))
# include <intrin.h>
@@ -34,74 +36,81 @@
CCL_NAMESPACE_BEGIN
-int system_cpu_group_count()
+bool system_cpu_ensure_initialized()
{
-#ifdef _WIN32
- util_windows_init_numa_groups();
- return GetActiveProcessorGroupCount();
-#else
- /* TODO(sergey): Need to adopt for other platforms. */
- return 1;
-#endif
+ static bool is_initialized = false;
+ static bool result = false;
+ if (is_initialized) {
+ return result;
+ }
+ is_initialized = true;
+ const NUMAAPI_Result numa_result = numaAPI_Initialize();
+ result = (numa_result == NUMAAPI_SUCCESS);
+ return result;
}
-int system_cpu_group_thread_count(int group)
+/* Fallback solution, which doesn't use NUMA/CPU groups. */
+static int system_cpu_thread_count_fallback()
{
- /* TODO(sergey): Need make other platforms aware of groups. */
#ifdef _WIN32
- util_windows_init_numa_groups();
- return GetActiveProcessorCount(group);
+ SYSTEM_INFO info;
+ GetSystemInfo(&info);
+ return info.dwNumberOfProcessors;
#elif defined(__APPLE__)
- (void) group;
int count;
size_t len = sizeof(count);
int mib[2] = { CTL_HW, HW_NCPU };
sysctl(mib, 2, &count, &len, NULL, 0);
return count;
#else
- (void) group;
return sysconf(_SC_NPROCESSORS_ONLN);
#endif
}
int system_cpu_thread_count()
{
- static uint count = 0;
-
- if(count > 0) {
- return count;
+ const int num_nodes = system_cpu_num_numa_nodes();
+ int num_threads = 0;
+ for (int node = 0; node < num_nodes; ++node) {
+ if (!system_cpu_is_numa_node_available(node)) {
+ continue;
+ }
+ num_threads += system_cpu_num_numa_node_processors(node);
}
+ return num_threads;
+}
- int max_group = system_cpu_group_count();
- VLOG(1) << "Detected " << max_group << " CPU groups.";
- for(int group = 0; group < max_group; ++group) {
- int num_threads = system_cpu_group_thread_count(group);
- VLOG(1) << "Group " << group
- << " has " << num_threads << " threads.";
- count += num_threads;
+int system_cpu_num_numa_nodes()
+{
+ if (!system_cpu_ensure_initialized()) {
+ /* Fallback to a single node with all the threads. */
+ return 1;
}
+ return numaAPI_GetNumNodes();
+}
- if(count < 1) {
- count = 1;
+bool system_cpu_is_numa_node_available(int node)
+{
+ if (!system_cpu_ensure_initialized()) {
+ return true;
}
+ return numaAPI_IsNodeAvailable(node);
+}
- return count;
+int system_cpu_num_numa_node_processors(int node)
+{
+ if (!system_cpu_ensure_initialized()) {
+ return system_cpu_thread_count_fallback();
+ }
+ return numaAPI_GetNumNodeProcessors(node);
}
-unsigned short system_cpu_process_groups(unsigned short max_groups,
- unsigned short *groups)
+bool system_cpu_run_thread_on_node(int node)
{
-#ifdef _WIN32
- unsigned short group_count = max_groups;
- if(!GetProcessGroupAffinity(GetCurrentProcess(), &group_count, groups)) {
- return 0;
+ if (!system_cpu_ensure_initialized()) {
+ return true;
}
- return group_count;
-#else
- (void) max_groups;
- (void) groups;
- return 0;
-#endif
+ return numaAPI_RunThreadOnNode(node);
}
#if !defined(_WIN32) || defined(FREE_WINDOWS)
diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h
index 241ac897157..15f69bcf153 100644
--- a/intern/cycles/util/util_system.h
+++ b/intern/cycles/util/util_system.h
@@ -21,18 +21,28 @@
CCL_NAMESPACE_BEGIN
-/* Get number of available CPU groups. */
-int system_cpu_group_count();
+/* Make sure CPU groups / NUMA API is initialized. */
+bool system_cpu_ensure_initialized();
-/* Get number of threads/processors in the specified group. */
-int system_cpu_group_thread_count(int group);
-
-/* Get total number of threads in all groups. */
+/* Get total number of threads in all NUMA nodes / CPU groups. */
int system_cpu_thread_count();
-/* Get current process groups. */
-unsigned short system_cpu_process_groups(unsigned short max_groups,
- unsigned short *grpups);
+/* Get number of available nodes.
+ *
+ * This is in fact an index of last node plus one and it's not guaranteed
+ * that all nodes up to this one are available. */
+int system_cpu_num_numa_nodes();
+
+/* Returns truth if the given node is available for compute. */
+bool system_cpu_is_numa_node_available(int node);
+
+/* Get number of available processors on a given node. */
+int system_cpu_num_numa_node_processors(int node);
+
+/* Runs the current thread and its children on a specific node.
+ *
+ * Returns truth if affinity has successfully changed. */
+bool system_cpu_run_thread_on_node(int node);
string system_cpu_brand_string();
int system_cpu_bits();
diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp
index 2d21d6b5a18..50a2bb160ff 100644
--- a/intern/cycles/util/util_task.cpp
+++ b/intern/cycles/util/util_task.cpp
@@ -204,50 +204,26 @@ void TaskScheduler::init(int num_threads)
/* launch threads that will be waiting for work */
threads.resize(num_threads);
- const int num_groups = system_cpu_group_count();
- unsigned short num_process_groups = 0;
- vector<unsigned short> process_groups;
- int current_group_threads = 0;
- if(num_groups > 1) {
- process_groups.resize(num_groups);
- num_process_groups = system_cpu_process_groups(num_groups,
- &process_groups[0]);
- if(num_process_groups == 1) {
- current_group_threads = system_cpu_group_thread_count(process_groups[0]);
- }
- }
+ const int num_nodes = system_cpu_num_numa_nodes();
int thread_index = 0;
- for(int group = 0; group < num_groups; ++group) {
- /* NOTE: That's not really efficient from threading point of view,
- * but it is simple to read and it doesn't make sense to use more
- * user-specified threads than logical threads anyway.
- */
- int num_group_threads = (group == num_groups - 1)
- ? (threads.size() - thread_index)
- : system_cpu_group_thread_count(group);
- for(int group_thread = 0;
- group_thread < num_group_threads && thread_index < threads.size();
- ++group_thread, ++thread_index)
+ for (int node = 0;
+ node < num_nodes && thread_index < threads.size();
+ ++node)
+ {
+ if (!system_cpu_is_numa_node_available(node)) {
+ continue;
+ }
+ const int num_node_processors =
+ system_cpu_num_numa_node_processors(node);
+ for (int i = 0;
+ i < num_node_processors && thread_index < threads.size();
+ ++i)
{
- /* NOTE: Thread group of -1 means we would not force thread affinity. */
- int thread_group;
- if(num_groups == 1) {
- /* Use default affinity if there's only one CPU group in the system. */
- thread_group = -1;
- }
- else if(use_auto_threads &&
- num_process_groups == 1 &&
- num_threads <= current_group_threads)
- {
- /* If we fit into curent CPU group we also don't force any affinity. */
- thread_group = -1;
- }
- else {
- thread_group = group;
- }
- threads[thread_index] = new thread(function_bind(&TaskScheduler::thread_run,
- thread_index + 1),
- thread_group);
+ threads[thread_index] = new thread(
+ function_bind(&TaskScheduler::thread_run,
+ thread_index + 1),
+ node);
+ thread_index++;
}
}
}
diff --git a/intern/cycles/util/util_thread.cpp b/intern/cycles/util/util_thread.cpp
index 37d8bdbd4b0..4d30e3f564f 100644
--- a/intern/cycles/util/util_thread.cpp
+++ b/intern/cycles/util/util_thread.cpp
@@ -21,10 +21,10 @@
CCL_NAMESPACE_BEGIN
-thread::thread(function<void()> run_cb, int group)
+thread::thread(function<void()> run_cb, int node)
: run_cb_(run_cb),
joined_(false),
- group_(group)
+ node_(node)
{
thread_ = std::thread(&thread::run, this);
}
@@ -39,19 +39,8 @@ thread::~thread()
void *thread::run(void *arg)
{
thread *self = (thread*)(arg);
- if(self->group_ != -1) {
-#ifdef _WIN32
- HANDLE thread_handle = GetCurrentThread();
- GROUP_AFFINITY group_affinity = { 0 };
- int num_threads = system_cpu_group_thread_count(self->group_);
- group_affinity.Group = self->group_;
- group_affinity.Mask = (num_threads == 64)
- ? -1
- : (1ull << num_threads) - 1;
- if(SetThreadGroupAffinity(thread_handle, &group_affinity, NULL) == 0) {
- fprintf(stderr, "Error setting thread affinity.\n");
- }
-#endif
+ if (self->node_ != -1) {
+ system_cpu_run_thread_on_node(self->node_);
}
self->run_cb_();
return NULL;
diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h
index 6250bb95dcf..d54199a37fc 100644
--- a/intern/cycles/util/util_thread.h
+++ b/intern/cycles/util/util_thread.h
@@ -46,7 +46,7 @@ typedef std::condition_variable thread_condition_variable;
class thread {
public:
- thread(function<void()> run_cb, int group = -1);
+ thread(function<void()> run_cb, int node = -1);
~thread();
static void *run(void *arg);
@@ -56,7 +56,7 @@ protected:
function<void()> run_cb_;
std::thread thread_;
bool joined_;
- int group_;
+ int node_;
};
/* Own wrapper around pthread's spin lock to make it's use easier. */
diff --git a/intern/cycles/util/util_windows.cpp b/intern/cycles/util/util_windows.cpp
deleted file mode 100644
index 073db2a27db..00000000000
--- a/intern/cycles/util/util_windows.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-/*
- * Copyright 2011-2016 Blender Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "util/util_windows.h"
-
-#ifdef _WIN32
-
-CCL_NAMESPACE_BEGIN
-
-#ifdef _M_X64
-# include <VersionHelpers.h>
-#endif
-
-#if _WIN32_WINNT < 0x0601
-tGetActiveProcessorGroupCount *GetActiveProcessorGroupCount;
-tGetActiveProcessorCount *GetActiveProcessorCount;
-tSetThreadGroupAffinity *SetThreadGroupAffinity;
-tGetProcessGroupAffinity *GetProcessGroupAffinity;
-#endif
-
-static WORD GetActiveProcessorGroupCount_stub()
-{
- return 1;
-}
-
-static DWORD GetActiveProcessorCount_stub(WORD /*GroupNumber*/)
-{
- SYSTEM_INFO info;
- GetSystemInfo(&info);
- return info.dwNumberOfProcessors;
-}
-
-static BOOL SetThreadGroupAffinity_stub(
- HANDLE /*hThread*/,
- const GROUP_AFFINITY * /*GroupAffinity*/,
- PGROUP_AFFINITY /*PreviousGroupAffinity*/)
-{
- return TRUE;
-}
-
-static BOOL GetProcessGroupAffinity_stub(HANDLE hProcess,
- PUSHORT GroupCount,
- PUSHORT GroupArray)
-{
- if(*GroupCount < 1) {
- return FALSE;
- }
- *GroupCount = 1;
- GroupArray[0] = 0;
- return TRUE;
-}
-
-static bool supports_numa()
-{
-#ifndef _M_X64
- return false;
-#else
- return IsWindows7OrGreater();
-#endif
-}
-
-void util_windows_init_numa_groups()
-{
- static bool initialized = false;
- if(initialized) {
- return;
- }
- initialized = true;
-#if _WIN32_WINNT < 0x0601
- if(!supports_numa()) {
- /* Use stubs on platforms which doesn't have rean NUMA/Groups. */
- GetActiveProcessorGroupCount = GetActiveProcessorGroupCount_stub;
- GetActiveProcessorCount = GetActiveProcessorCount_stub;
- SetThreadGroupAffinity = SetThreadGroupAffinity_stub;
- GetProcessGroupAffinity = GetProcessGroupAffinity_stub;
- return;
- }
- HMODULE kernel = GetModuleHandleA("kernel32.dll");
-# define READ_SYMBOL(sym) sym = (t##sym*)GetProcAddress(kernel, #sym)
- READ_SYMBOL(GetActiveProcessorGroupCount);
- READ_SYMBOL(GetActiveProcessorCount);
- READ_SYMBOL(SetThreadGroupAffinity);
- READ_SYMBOL(GetProcessGroupAffinity);
-# undef READ_SUMBOL
-#endif
-}
-
-CCL_NAMESPACE_END
-
-#endif /* _WIN32 */
diff --git a/intern/cycles/util/util_windows.h b/intern/cycles/util/util_windows.h
index 9b9268fed7a..bd1bc85adff 100644
--- a/intern/cycles/util/util_windows.h
+++ b/intern/cycles/util/util_windows.h
@@ -31,29 +31,6 @@
#include <windows.h>
-CCL_NAMESPACE_BEGIN
-
-#if _WIN32_WINNT < 0x0601
-typedef WORD tGetActiveProcessorGroupCount();
-typedef DWORD tGetActiveProcessorCount(WORD GroupNumber);
-typedef BOOL tSetThreadGroupAffinity(HANDLE hThread,
- const GROUP_AFFINITY *GroupAffinity,
- PGROUP_AFFINITY PreviousGroupAffinity);
-typedef BOOL tGetProcessGroupAffinity(HANDLE hProcess,
- PUSHORT GroupCount,
- PUSHORT GroupArray);
-
-extern tGetActiveProcessorGroupCount *GetActiveProcessorGroupCount;
-extern tGetActiveProcessorCount *GetActiveProcessorCount;
-extern tSetThreadGroupAffinity *SetThreadGroupAffinity;
-extern tGetProcessGroupAffinity *GetProcessGroupAffinity;
-#endif
-
-/* Make sure NUMA and processor groups API is initialized. */
-void util_windows_init_numa_groups();
-
-CCL_NAMESPACE_END
-
-#endif /* WIN32 */
+#endif /* _WIN32 */
#endif /* __UTIL_WINDOWS_H__ */
diff --git a/intern/numaapi/include/numaapi.h b/intern/numaapi/include/numaapi.h
index a4f32d88458..7b5b50fdf39 100644
--- a/intern/numaapi/include/numaapi.h
+++ b/intern/numaapi/include/numaapi.h
@@ -67,7 +67,7 @@ int numaAPI_GetNumNodes(void);
// Returns truth if the given node is available for compute.
bool numaAPI_IsNodeAvailable(int node);
-// Getnumber of available processors on a given node.
+// Get number of available processors on a given node.
int numaAPI_GetNumNodeProcessors(int node);
////////////////////////////////////////////////////////////////////////////////
diff --git a/intern/numaapi/source/numaapi_linux.c b/intern/numaapi/source/numaapi_linux.c
index 559e97b67d3..62e9dcdfadf 100644
--- a/intern/numaapi/source/numaapi_linux.c
+++ b/intern/numaapi/source/numaapi_linux.c
@@ -34,6 +34,8 @@
# include <dlfcn.h>
#endif
+#include <stdio.h>
+
#ifdef WITH_DYNLOAD
// Descriptor numa library.
@@ -61,6 +63,7 @@ typedef struct bitmask* tnuma_allocate_nodemask(void);
typedef void tnuma_free_cpumask(struct bitmask* bitmask);
typedef void tnuma_free_nodemask(struct bitmask* bitmask);
typedef int tnuma_run_on_node_mask(struct bitmask *nodemask);
+typedef int tnuma_run_on_node_mask_all(struct bitmask *nodemask);
typedef void tnuma_set_interleave_mask(struct bitmask *nodemask);
typedef void tnuma_set_localalloc(void);
@@ -83,6 +86,7 @@ static tnuma_allocate_nodemask* numa_allocate_nodemask;
static tnuma_free_nodemask* numa_free_nodemask;
static tnuma_free_cpumask* numa_free_cpumask;
static tnuma_run_on_node_mask* numa_run_on_node_mask;
+static tnuma_run_on_node_mask_all* numa_run_on_node_mask_all;
static tnuma_set_interleave_mask* numa_set_interleave_mask;
static tnuma_set_localalloc* numa_set_localalloc;
@@ -157,6 +161,7 @@ static NUMAAPI_Result loadNumaSymbols(void) {
NUMA_LIBRARY_FIND(numa_free_cpumask);
NUMA_LIBRARY_FIND(numa_free_nodemask);
NUMA_LIBRARY_FIND(numa_run_on_node_mask);
+ NUMA_LIBRARY_FIND(numa_run_on_node_mask_all);
NUMA_LIBRARY_FIND(numa_set_interleave_mask);
NUMA_LIBRARY_FIND(numa_set_localalloc);
@@ -192,10 +197,7 @@ int numaAPI_GetNumNodes(void) {
}
bool numaAPI_IsNodeAvailable(int node) {
- if (numa_node_size(node, NULL) > 0) {
- return true;
- }
- return false;
+ return numaAPI_GetNumNodeProcessors(node) > 0;
}
int numaAPI_GetNumNodeProcessors(int node) {
@@ -235,13 +237,15 @@ bool numaAPI_RunThreadOnNode(int node) {
struct bitmask* node_mask = numa_allocate_nodemask();
numa_bitmask_clearall(node_mask);
numa_bitmask_setbit(node_mask, node);
- numa_run_on_node_mask(node_mask);
+ numa_run_on_node_mask_all(node_mask);
// TODO(sergey): The following commands are based on x265 code, we might want
// to make those optional, or require to call those explicitly.
//
// Current assumption is that this is similar to SetThreadGroupAffinity().
- numa_set_interleave_mask(node_mask);
- numa_set_localalloc();
+ if (numa_node_size(node, NULL) > 0) {
+ numa_set_interleave_mask(node_mask);
+ numa_set_localalloc();
+ }
#ifdef WITH_DYNLOAD
if (numa_free_nodemask != NULL) {
numa_free_nodemask(node_mask);