diff options
-rw-r--r-- | intern/cycles/device/opencl/device_opencl_impl.cpp | 10 | ||||
-rw-r--r-- | intern/cycles/device/opencl/opencl_util.cpp | 35 | ||||
-rw-r--r-- | intern/cycles/util/CMakeLists.txt | 1 | ||||
-rw-r--r-- | intern/cycles/util/util_semaphore.h | 61 |
4 files changed, 99 insertions, 8 deletions
diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp index b7a2be79804..2766f85d17c 100644 --- a/intern/cycles/device/opencl/device_opencl_impl.cpp +++ b/intern/cycles/device/opencl/device_opencl_impl.cpp @@ -257,16 +257,16 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels( /* Ordered with most complex kernels first, to reduce overall compile time. */ ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter); + ADD_SPLIT_KERNEL_PROGRAM(direct_lighting); + ADD_SPLIT_KERNEL_PROGRAM(indirect_background); if (requested_features.use_volume || is_preview) { ADD_SPLIT_KERNEL_PROGRAM(do_volume); } + ADD_SPLIT_KERNEL_PROGRAM(shader_eval); + ADD_SPLIT_KERNEL_PROGRAM(lamp_emission); + ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao); ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_dl); ADD_SPLIT_KERNEL_PROGRAM(shadow_blocked_ao); - ADD_SPLIT_KERNEL_PROGRAM(holdout_emission_blurring_pathtermination_ao); - ADD_SPLIT_KERNEL_PROGRAM(lamp_emission); - ADD_SPLIT_KERNEL_PROGRAM(direct_lighting); - ADD_SPLIT_KERNEL_PROGRAM(indirect_background); - ADD_SPLIT_KERNEL_PROGRAM(shader_eval); /* Quick kernels bundled in a single program to reduce overhead of starting * Blender processes. */ diff --git a/intern/cycles/device/opencl/opencl_util.cpp b/intern/cycles/device/opencl/opencl_util.cpp index 978c75d2e2c..b8b07cf2947 100644 --- a/intern/cycles/device/opencl/opencl_util.cpp +++ b/intern/cycles/device/opencl/opencl_util.cpp @@ -23,6 +23,7 @@ # include "util/util_logging.h" # include "util/util_md5.h" # include "util/util_path.h" +# include "util/util_semaphore.h" # include "util/util_system.h" # include "util/util_time.h" @@ -390,8 +391,27 @@ static void escape_python_string(string &str) string_replace(str, "'", "\'"); } +static int opencl_compile_process_limit() +{ + /* Limit number of concurrent processes compiling, with a heuristic based + * on total physical RAM and estimate of memory usage needed when compiling + * with all Cycles features enabled. + * + * This is somewhat arbitrary as we don't know the actual available RAM or + * how much the kernel compilation will needed depending on the features, but + * better than not limiting at all. */ + static const int64_t GB = 1024LL * 1024LL * 1024LL; + static const int64_t process_memory = 2 * GB; + static const int64_t base_memory = 2 * GB; + static const int64_t system_memory = system_physical_ram(); + static const int64_t process_limit = (system_memory - base_memory) / process_memory; + + return max((int)process_limit, 1); +} + bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin) { + /* Construct arguments. */ vector<string> args; args.push_back("--background"); args.push_back("--factory-startup"); @@ -419,14 +439,23 @@ bool OpenCLDevice::OpenCLProgram::compile_separate(const string &clbin) kernel_file_escaped.c_str(), clbin_escaped.c_str())); - double starttime = time_dt(); + /* Limit number of concurrent processes compiling. */ + static thread_counting_semaphore semaphore(opencl_compile_process_limit()); + semaphore.acquire(); + + /* Compile. */ + const double starttime = time_dt(); add_log(string("Cycles: compiling OpenCL program ") + program_name + "...", false); add_log(string("Build flags: ") + kernel_build_options, true); - if (!system_call_self(args) || !path_exists(clbin)) { + const bool success = system_call_self(args); + const double elapsed = time_dt() - starttime; + + semaphore.release(); + + if (!success || !path_exists(clbin)) { return false; } - double elapsed = time_dt() - starttime; add_log( string_printf("Kernel compilation of %s finished in %.2lfs.", program_name.c_str(), elapsed), false); diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt index ef100c12453..c1f71461dfd 100644 --- a/intern/cycles/util/CMakeLists.txt +++ b/intern/cycles/util/CMakeLists.txt @@ -102,6 +102,7 @@ set(SRC_HEADERS util_sky_model_data.h util_avxf.h util_avxb.h + util_semaphore.h util_sseb.h util_ssef.h util_ssei.h diff --git a/intern/cycles/util/util_semaphore.h b/intern/cycles/util/util_semaphore.h new file mode 100644 index 00000000000..d995b0732b8 --- /dev/null +++ b/intern/cycles/util/util_semaphore.h @@ -0,0 +1,61 @@ +/* + * Copyright 2011-2020 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __UTIL_SEMAPHORE_H__ +#define __UTIL_SEMAPHORE_H__ + +#include "util/util_thread.h" + +CCL_NAMESPACE_BEGIN + +/* Counting Semaphore + * + * To restrict concurrent access to a resource to a specified number + * of threads. Similar to std::counting_semaphore from C++20. */ + +class thread_counting_semaphore { + public: + explicit thread_counting_semaphore(const int count) : count(count) + { + } + + thread_counting_semaphore(const thread_counting_semaphore &) = delete; + + void acquire() + { + thread_scoped_lock lock(mutex); + while (count == 0) { + condition.wait(lock); + } + count--; + } + + void release() + { + thread_scoped_lock lock(mutex); + count++; + condition.notify_one(); + } + + protected: + thread_mutex mutex; + thread_condition_variable condition; + int count; +}; + +CCL_NAMESPACE_END + +#endif /* __UTIL_SEMAPHORE_H__ */ |