diff options
-rw-r--r-- | intern/cycles/util/CMakeLists.txt | 14 | ||||
-rw-r--r-- | intern/cycles/util/util_system.cpp | 89 | ||||
-rw-r--r-- | intern/cycles/util/util_system.h | 28 | ||||
-rw-r--r-- | intern/cycles/util/util_task.cpp | 60 | ||||
-rw-r--r-- | intern/cycles/util/util_thread.cpp | 19 | ||||
-rw-r--r-- | intern/cycles/util/util_thread.h | 4 | ||||
-rw-r--r-- | intern/cycles/util/util_windows.cpp | 103 | ||||
-rw-r--r-- | intern/cycles/util/util_windows.h | 25 | ||||
-rw-r--r-- | intern/numaapi/include/numaapi.h | 2 | ||||
-rw-r--r-- | intern/numaapi/source/numaapi_linux.c | 18 | ||||
-rw-r--r-- | source/blender/blenkernel/intern/font.c | 4 |
11 files changed, 115 insertions, 251 deletions
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index 92dfc9fa85d..42626d05cf9 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -25,13 +25,17 @@ set(SRC util_thread.cpp util_time.cpp util_transform.cpp - util_windows.cpp ) -if(WITH_CYCLES_STANDALONE AND WITH_CYCLES_STANDALONE_GUI) - list(APPEND SRC - util_view.cpp - ) +if(WITH_CYCLES_STANDALONE) + if (WITH_CYCLES_STANDALONE_GUI) + list(APPEND SRC + util_view.cpp + ) + endif() + list(APPEND INC_SYS ../../third_party/numaapi/include) +else() + list(APPEND INC_SYS ../../numaapi/include) endif() set(SRC_HEADERS diff --git a/intern/cycles/util/util_system.cpp b/intern/cycles/util/util_system.cpp index 34f428f111c..cc2d7017fd8 100644 --- a/intern/cycles/util/util_system.cpp +++ b/intern/cycles/util/util_system.cpp @@ -20,6 +20,8 @@ #include "util/util_types.h" #include "util/util_string.h" +#include <numaapi.h> + #ifdef _WIN32 # if(!defined(FREE_WINDOWS)) # include <intrin.h> @@ -34,74 +36,81 @@ CCL_NAMESPACE_BEGIN -int system_cpu_group_count() +bool system_cpu_ensure_initialized() { -#ifdef _WIN32 - util_windows_init_numa_groups(); - return GetActiveProcessorGroupCount(); -#else - /* TODO(sergey): Need to adopt for other platforms. */ - return 1; -#endif + static bool is_initialized = false; + static bool result = false; + if (is_initialized) { + return result; + } + is_initialized = true; + const NUMAAPI_Result numa_result = numaAPI_Initialize(); + result = (numa_result == NUMAAPI_SUCCESS); + return result; } -int system_cpu_group_thread_count(int group) +/* Fallback solution, which doesn't use NUMA/CPU groups. */ +static int system_cpu_thread_count_fallback() { - /* TODO(sergey): Need make other platforms aware of groups. */ #ifdef _WIN32 - util_windows_init_numa_groups(); - return GetActiveProcessorCount(group); + SYSTEM_INFO info; + GetSystemInfo(&info); + return info.dwNumberOfProcessors; #elif defined(__APPLE__) - (void) group; int count; size_t len = sizeof(count); int mib[2] = { CTL_HW, HW_NCPU }; sysctl(mib, 2, &count, &len, NULL, 0); return count; #else - (void) group; return sysconf(_SC_NPROCESSORS_ONLN); #endif } int system_cpu_thread_count() { - static uint count = 0; - - if(count > 0) { - return count; + const int num_nodes = system_cpu_num_numa_nodes(); + int num_threads = 0; + for (int node = 0; node < num_nodes; ++node) { + if (!system_cpu_is_numa_node_available(node)) { + continue; + } + num_threads += system_cpu_num_numa_node_processors(node); } + return num_threads; +} - int max_group = system_cpu_group_count(); - VLOG(1) << "Detected " << max_group << " CPU groups."; - for(int group = 0; group < max_group; ++group) { - int num_threads = system_cpu_group_thread_count(group); - VLOG(1) << "Group " << group - << " has " << num_threads << " threads."; - count += num_threads; +int system_cpu_num_numa_nodes() +{ + if (!system_cpu_ensure_initialized()) { + /* Fallback to a single node with all the threads. */ + return 1; } + return numaAPI_GetNumNodes(); +} - if(count < 1) { - count = 1; +bool system_cpu_is_numa_node_available(int node) +{ + if (!system_cpu_ensure_initialized()) { + return true; } + return numaAPI_IsNodeAvailable(node); +} - return count; +int system_cpu_num_numa_node_processors(int node) +{ + if (!system_cpu_ensure_initialized()) { + return system_cpu_thread_count_fallback(); + } + return numaAPI_GetNumNodeProcessors(node); } -unsigned short system_cpu_process_groups(unsigned short max_groups, - unsigned short *groups) +bool system_cpu_run_thread_on_node(int node) { -#ifdef _WIN32 - unsigned short group_count = max_groups; - if(!GetProcessGroupAffinity(GetCurrentProcess(), &group_count, groups)) { - return 0; + if (!system_cpu_ensure_initialized()) { + return true; } - return group_count; -#else - (void) max_groups; - (void) groups; - return 0; -#endif + return numaAPI_RunThreadOnNode(node); } #if !defined(_WIN32) || defined(FREE_WINDOWS) diff --git a/intern/cycles/util/util_system.h b/intern/cycles/util/util_system.h index 241ac897157..15f69bcf153 100644 --- a/intern/cycles/util/util_system.h +++ b/intern/cycles/util/util_system.h @@ -21,18 +21,28 @@ CCL_NAMESPACE_BEGIN -/* Get number of available CPU groups. */ -int system_cpu_group_count(); +/* Make sure CPU groups / NUMA API is initialized. */ +bool system_cpu_ensure_initialized(); -/* Get number of threads/processors in the specified group. */ -int system_cpu_group_thread_count(int group); - -/* Get total number of threads in all groups. */ +/* Get total number of threads in all NUMA nodes / CPU groups. */ int system_cpu_thread_count(); -/* Get current process groups. */ -unsigned short system_cpu_process_groups(unsigned short max_groups, - unsigned short *grpups); +/* Get number of available nodes. + * + * This is in fact an index of last node plus one and it's not guaranteed + * that all nodes up to this one are available. */ +int system_cpu_num_numa_nodes(); + +/* Returns truth if the given node is available for compute. */ +bool system_cpu_is_numa_node_available(int node); + +/* Get number of available processors on a given node. */ +int system_cpu_num_numa_node_processors(int node); + +/* Runs the current thread and its children on a specific node. + * + * Returns truth if affinity has successfully changed. */ +bool system_cpu_run_thread_on_node(int node); string system_cpu_brand_string(); int system_cpu_bits(); diff --git a/intern/cycles/util/util_task.cpp b/intern/cycles/util/util_task.cpp index 2d21d6b5a18..50a2bb160ff 100644 --- a/intern/cycles/util/util_task.cpp +++ b/intern/cycles/util/util_task.cpp @@ -204,50 +204,26 @@ void TaskScheduler::init(int num_threads) /* launch threads that will be waiting for work */ threads.resize(num_threads); - const int num_groups = system_cpu_group_count(); - unsigned short num_process_groups = 0; - vector<unsigned short> process_groups; - int current_group_threads = 0; - if(num_groups > 1) { - process_groups.resize(num_groups); - num_process_groups = system_cpu_process_groups(num_groups, - &process_groups[0]); - if(num_process_groups == 1) { - current_group_threads = system_cpu_group_thread_count(process_groups[0]); - } - } + const int num_nodes = system_cpu_num_numa_nodes(); int thread_index = 0; - for(int group = 0; group < num_groups; ++group) { - /* NOTE: That's not really efficient from threading point of view, - * but it is simple to read and it doesn't make sense to use more - * user-specified threads than logical threads anyway. - */ - int num_group_threads = (group == num_groups - 1) - ? (threads.size() - thread_index) - : system_cpu_group_thread_count(group); - for(int group_thread = 0; - group_thread < num_group_threads && thread_index < threads.size(); - ++group_thread, ++thread_index) + for (int node = 0; + node < num_nodes && thread_index < threads.size(); + ++node) + { + if (!system_cpu_is_numa_node_available(node)) { + continue; + } + const int num_node_processors = + system_cpu_num_numa_node_processors(node); + for (int i = 0; + i < num_node_processors && thread_index < threads.size(); + ++i) { - /* NOTE: Thread group of -1 means we would not force thread affinity. */ - int thread_group; - if(num_groups == 1) { - /* Use default affinity if there's only one CPU group in the system. */ - thread_group = -1; - } - else if(use_auto_threads && - num_process_groups == 1 && - num_threads <= current_group_threads) - { - /* If we fit into curent CPU group we also don't force any affinity. */ - thread_group = -1; - } - else { - thread_group = group; - } - threads[thread_index] = new thread(function_bind(&TaskScheduler::thread_run, - thread_index + 1), - thread_group); + threads[thread_index] = new thread( + function_bind(&TaskScheduler::thread_run, + thread_index + 1), + node); + thread_index++; } } } diff --git a/intern/cycles/util/util_thread.cpp b/intern/cycles/util/util_thread.cpp index 37d8bdbd4b0..4d30e3f564f 100644 --- a/intern/cycles/util/util_thread.cpp +++ b/intern/cycles/util/util_thread.cpp @@ -21,10 +21,10 @@ CCL_NAMESPACE_BEGIN -thread::thread(function<void()> run_cb, int group) +thread::thread(function<void()> run_cb, int node) : run_cb_(run_cb), joined_(false), - group_(group) + node_(node) { thread_ = std::thread(&thread::run, this); } @@ -39,19 +39,8 @@ thread::~thread() void *thread::run(void *arg) { thread *self = (thread*)(arg); - if(self->group_ != -1) { -#ifdef _WIN32 - HANDLE thread_handle = GetCurrentThread(); - GROUP_AFFINITY group_affinity = { 0 }; - int num_threads = system_cpu_group_thread_count(self->group_); - group_affinity.Group = self->group_; - group_affinity.Mask = (num_threads == 64) - ? -1 - : (1ull << num_threads) - 1; - if(SetThreadGroupAffinity(thread_handle, &group_affinity, NULL) == 0) { - fprintf(stderr, "Error setting thread affinity.\n"); - } -#endif + if (self->node_ != -1) { + system_cpu_run_thread_on_node(self->node_); } self->run_cb_(); return NULL; diff --git a/intern/cycles/util/util_thread.h b/intern/cycles/util/util_thread.h index 6250bb95dcf..d54199a37fc 100644 --- a/intern/cycles/util/util_thread.h +++ b/intern/cycles/util/util_thread.h @@ -46,7 +46,7 @@ typedef std::condition_variable thread_condition_variable; class thread { public: - thread(function<void()> run_cb, int group = -1); + thread(function<void()> run_cb, int node = -1); ~thread(); static void *run(void *arg); @@ -56,7 +56,7 @@ protected: function<void()> run_cb_; std::thread thread_; bool joined_; - int group_; + int node_; }; /* Own wrapper around pthread's spin lock to make it's use easier. */ diff --git a/intern/cycles/util/util_windows.cpp b/intern/cycles/util/util_windows.cpp deleted file mode 100644 index 073db2a27db..00000000000 --- a/intern/cycles/util/util_windows.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright 2011-2016 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "util/util_windows.h" - -#ifdef _WIN32 - -CCL_NAMESPACE_BEGIN - -#ifdef _M_X64 -# include <VersionHelpers.h> -#endif - -#if _WIN32_WINNT < 0x0601 -tGetActiveProcessorGroupCount *GetActiveProcessorGroupCount; -tGetActiveProcessorCount *GetActiveProcessorCount; -tSetThreadGroupAffinity *SetThreadGroupAffinity; -tGetProcessGroupAffinity *GetProcessGroupAffinity; -#endif - -static WORD GetActiveProcessorGroupCount_stub() -{ - return 1; -} - -static DWORD GetActiveProcessorCount_stub(WORD /*GroupNumber*/) -{ - SYSTEM_INFO info; - GetSystemInfo(&info); - return info.dwNumberOfProcessors; -} - -static BOOL SetThreadGroupAffinity_stub( - HANDLE /*hThread*/, - const GROUP_AFFINITY * /*GroupAffinity*/, - PGROUP_AFFINITY /*PreviousGroupAffinity*/) -{ - return TRUE; -} - -static BOOL GetProcessGroupAffinity_stub(HANDLE hProcess, - PUSHORT GroupCount, - PUSHORT GroupArray) -{ - if(*GroupCount < 1) { - return FALSE; - } - *GroupCount = 1; - GroupArray[0] = 0; - return TRUE; -} - -static bool supports_numa() -{ -#ifndef _M_X64 - return false; -#else - return IsWindows7OrGreater(); -#endif -} - -void util_windows_init_numa_groups() -{ - static bool initialized = false; - if(initialized) { - return; - } - initialized = true; -#if _WIN32_WINNT < 0x0601 - if(!supports_numa()) { - /* Use stubs on platforms which doesn't have rean NUMA/Groups. */ - GetActiveProcessorGroupCount = GetActiveProcessorGroupCount_stub; - GetActiveProcessorCount = GetActiveProcessorCount_stub; - SetThreadGroupAffinity = SetThreadGroupAffinity_stub; - GetProcessGroupAffinity = GetProcessGroupAffinity_stub; - return; - } - HMODULE kernel = GetModuleHandleA("kernel32.dll"); -# define READ_SYMBOL(sym) sym = (t##sym*)GetProcAddress(kernel, #sym) - READ_SYMBOL(GetActiveProcessorGroupCount); - READ_SYMBOL(GetActiveProcessorCount); - READ_SYMBOL(SetThreadGroupAffinity); - READ_SYMBOL(GetProcessGroupAffinity); -# undef READ_SUMBOL -#endif -} - -CCL_NAMESPACE_END - -#endif /* _WIN32 */ diff --git a/intern/cycles/util/util_windows.h b/intern/cycles/util/util_windows.h index 9b9268fed7a..bd1bc85adff 100644 --- a/intern/cycles/util/util_windows.h +++ b/intern/cycles/util/util_windows.h @@ -31,29 +31,6 @@ #include <windows.h> -CCL_NAMESPACE_BEGIN - -#if _WIN32_WINNT < 0x0601 -typedef WORD tGetActiveProcessorGroupCount(); -typedef DWORD tGetActiveProcessorCount(WORD GroupNumber); -typedef BOOL tSetThreadGroupAffinity(HANDLE hThread, - const GROUP_AFFINITY *GroupAffinity, - PGROUP_AFFINITY PreviousGroupAffinity); -typedef BOOL tGetProcessGroupAffinity(HANDLE hProcess, - PUSHORT GroupCount, - PUSHORT GroupArray); - -extern tGetActiveProcessorGroupCount *GetActiveProcessorGroupCount; -extern tGetActiveProcessorCount *GetActiveProcessorCount; -extern tSetThreadGroupAffinity *SetThreadGroupAffinity; -extern tGetProcessGroupAffinity *GetProcessGroupAffinity; -#endif - -/* Make sure NUMA and processor groups API is initialized. */ -void util_windows_init_numa_groups(); - -CCL_NAMESPACE_END - -#endif /* WIN32 */ +#endif /* _WIN32 */ #endif /* __UTIL_WINDOWS_H__ */ diff --git a/intern/numaapi/include/numaapi.h b/intern/numaapi/include/numaapi.h index a4f32d88458..7b5b50fdf39 100644 --- a/intern/numaapi/include/numaapi.h +++ b/intern/numaapi/include/numaapi.h @@ -67,7 +67,7 @@ int numaAPI_GetNumNodes(void); // Returns truth if the given node is available for compute. bool numaAPI_IsNodeAvailable(int node); -// Getnumber of available processors on a given node. +// Get number of available processors on a given node. int numaAPI_GetNumNodeProcessors(int node); //////////////////////////////////////////////////////////////////////////////// diff --git a/intern/numaapi/source/numaapi_linux.c b/intern/numaapi/source/numaapi_linux.c index 559e97b67d3..62e9dcdfadf 100644 --- a/intern/numaapi/source/numaapi_linux.c +++ b/intern/numaapi/source/numaapi_linux.c @@ -34,6 +34,8 @@ # include <dlfcn.h> #endif +#include <stdio.h> + #ifdef WITH_DYNLOAD // Descriptor numa library. @@ -61,6 +63,7 @@ typedef struct bitmask* tnuma_allocate_nodemask(void); typedef void tnuma_free_cpumask(struct bitmask* bitmask); typedef void tnuma_free_nodemask(struct bitmask* bitmask); typedef int tnuma_run_on_node_mask(struct bitmask *nodemask); +typedef int tnuma_run_on_node_mask_all(struct bitmask *nodemask); typedef void tnuma_set_interleave_mask(struct bitmask *nodemask); typedef void tnuma_set_localalloc(void); @@ -83,6 +86,7 @@ static tnuma_allocate_nodemask* numa_allocate_nodemask; static tnuma_free_nodemask* numa_free_nodemask; static tnuma_free_cpumask* numa_free_cpumask; static tnuma_run_on_node_mask* numa_run_on_node_mask; +static tnuma_run_on_node_mask_all* numa_run_on_node_mask_all; static tnuma_set_interleave_mask* numa_set_interleave_mask; static tnuma_set_localalloc* numa_set_localalloc; @@ -157,6 +161,7 @@ static NUMAAPI_Result loadNumaSymbols(void) { NUMA_LIBRARY_FIND(numa_free_cpumask); NUMA_LIBRARY_FIND(numa_free_nodemask); NUMA_LIBRARY_FIND(numa_run_on_node_mask); + NUMA_LIBRARY_FIND(numa_run_on_node_mask_all); NUMA_LIBRARY_FIND(numa_set_interleave_mask); NUMA_LIBRARY_FIND(numa_set_localalloc); @@ -192,10 +197,7 @@ int numaAPI_GetNumNodes(void) { } bool numaAPI_IsNodeAvailable(int node) { - if (numa_node_size(node, NULL) > 0) { - return true; - } - return false; + return numaAPI_GetNumNodeProcessors(node) > 0; } int numaAPI_GetNumNodeProcessors(int node) { @@ -235,13 +237,15 @@ bool numaAPI_RunThreadOnNode(int node) { struct bitmask* node_mask = numa_allocate_nodemask(); numa_bitmask_clearall(node_mask); numa_bitmask_setbit(node_mask, node); - numa_run_on_node_mask(node_mask); + numa_run_on_node_mask_all(node_mask); // TODO(sergey): The following commands are based on x265 code, we might want // to make those optional, or require to call those explicitly. // // Current assumption is that this is similar to SetThreadGroupAffinity(). - numa_set_interleave_mask(node_mask); - numa_set_localalloc(); + if (numa_node_size(node, NULL) > 0) { + numa_set_interleave_mask(node_mask); + numa_set_localalloc(); + } #ifdef WITH_DYNLOAD if (numa_free_nodemask != NULL) { numa_free_nodemask(node_mask); diff --git a/source/blender/blenkernel/intern/font.c b/source/blender/blenkernel/intern/font.c index 1b6061d5b04..d796110f185 100644 --- a/source/blender/blenkernel/intern/font.c +++ b/source/blender/blenkernel/intern/font.c @@ -244,7 +244,6 @@ VFont *BKE_vfont_load(Main *bmain, const char *filepath) char filename[FILE_MAXFILE]; VFont *vfont = NULL; PackedFile *pf; - PackedFile *temp_pf = NULL; bool is_builtin; if (STREQ(filepath, FO_BUILTIN_NAME)) { @@ -256,7 +255,6 @@ VFont *BKE_vfont_load(Main *bmain, const char *filepath) else { BLI_split_file_part(filepath, filename, sizeof(filename)); pf = newPackedFile(NULL, filepath, BKE_main_blendfile_path(bmain)); - temp_pf = newPackedFile(NULL, filepath, BKE_main_blendfile_path(bmain)); is_builtin = false; } @@ -282,7 +280,7 @@ VFont *BKE_vfont_load(Main *bmain, const char *filepath) /* Do not add FO_BUILTIN_NAME to temporary listbase */ if (!STREQ(filename, FO_BUILTIN_NAME)) { - vfont->temp_pf = temp_pf; + vfont->temp_pf = newPackedFile(NULL, filepath, BKE_main_blendfile_path(bmain)); } } |