diff options
author | Sybren A. Stüvel <sybren@blender.org> | 2021-05-20 14:00:07 +0300 |
---|---|---|
committer | Sybren A. Stüvel <sybren@blender.org> | 2021-05-20 14:00:07 +0300 |
commit | 0745afeddb6041e5bc9f862a155e5f30d431a88c (patch) | |
tree | 5ff02a23ce6784381389fa21e30bf0f78abc6c1f /intern | |
parent | 3e3ecc329caa25285a4bb6cfbb69f4eb40797fe4 (diff) | |
parent | a1954e380730ae9d116245343d9296d587907673 (diff) |
Merge remote-tracking branch 'origin/blender-v2.93-release'
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/bvh/bvh_optix.cpp | 7 | ||||
-rw-r--r-- | intern/cycles/bvh/bvh_optix.h | 1 | ||||
-rw-r--r-- | intern/cycles/device/device.h | 4 | ||||
-rw-r--r-- | intern/cycles/device/device_memory.cpp | 46 | ||||
-rw-r--r-- | intern/cycles/device/device_memory.h | 5 | ||||
-rw-r--r-- | intern/cycles/device/device_multi.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/device/device_optix.cpp | 25 | ||||
-rw-r--r-- | intern/cycles/device/opencl/device_opencl.h | 9 | ||||
-rw-r--r-- | intern/cycles/device/opencl/device_opencl_impl.cpp | 90 | ||||
-rw-r--r-- | intern/cycles/kernel/kernel_types.h | 38 | ||||
-rw-r--r-- | intern/cycles/render/scene.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/render/session.cpp | 10 | ||||
-rw-r--r-- | intern/cycles/util/util_vector.h | 4 |
13 files changed, 117 insertions, 129 deletions
diff --git a/intern/cycles/bvh/bvh_optix.cpp b/intern/cycles/bvh/bvh_optix.cpp index d630e8965dc..cd266f72f89 100644 --- a/intern/cycles/bvh/bvh_optix.cpp +++ b/intern/cycles/bvh/bvh_optix.cpp @@ -17,6 +17,8 @@ #ifdef WITH_OPTIX +# include "device/device.h" + # include "bvh/bvh_optix.h" CCL_NAMESPACE_BEGIN @@ -26,6 +28,7 @@ BVHOptiX::BVHOptiX(const BVHParams ¶ms_, const vector<Object *> &objects_, Device *device) : BVH(params_, geometry_, objects_), + device(device), traversable_handle(0), as_data(device, params_.top_level ? "optix tlas" : "optix blas", false), motion_transform_data(device, "optix motion transform", false) @@ -34,7 +37,9 @@ BVHOptiX::BVHOptiX(const BVHParams ¶ms_, BVHOptiX::~BVHOptiX() { - // Acceleration structure memory is freed via the 'as_data' destructor + // Acceleration structure memory is delayed freed on device, since deleting the + // BVH may happen while still being used for rendering. + device->release_optix_bvh(this); } CCL_NAMESPACE_END diff --git a/intern/cycles/bvh/bvh_optix.h b/intern/cycles/bvh/bvh_optix.h index aa514beae0d..ba5d90471d1 100644 --- a/intern/cycles/bvh/bvh_optix.h +++ b/intern/cycles/bvh/bvh_optix.h @@ -28,6 +28,7 @@ CCL_NAMESPACE_BEGIN class BVHOptiX : public BVH { public: + Device *device; uint64_t traversable_handle; device_only_memory<char> as_data; device_only_memory<char> motion_transform_data; diff --git a/intern/cycles/device/device.h b/intern/cycles/device/device.h index b5468248e5a..bdf18d09b31 100644 --- a/intern/cycles/device/device.h +++ b/intern/cycles/device/device.h @@ -61,7 +61,6 @@ enum DeviceTypeMask { }; enum DeviceKernelStatus { - DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL = 0, DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE, DEVICE_KERNEL_USING_FEATURE_KERNEL, DEVICE_KERNEL_FEATURE_KERNEL_INVALID, @@ -427,6 +426,9 @@ class Device { /* acceleration structure building */ virtual void build_bvh(BVH *bvh, Progress &progress, bool refit); + /* OptiX specific destructor. */ + virtual void release_optix_bvh(BVH *){}; + #ifdef WITH_NETWORK /* networking */ void server_run(); diff --git a/intern/cycles/device/device_memory.cpp b/intern/cycles/device/device_memory.cpp index 9eee86b0814..80a05fc32fe 100644 --- a/intern/cycles/device/device_memory.cpp +++ b/intern/cycles/device/device_memory.cpp @@ -35,10 +35,54 @@ device_memory::device_memory(Device *device, const char *name, MemoryType type) device_pointer(0), host_pointer(0), shared_pointer(0), - shared_counter(0) + shared_counter(0), + original_device_ptr(0), + original_device_size(0), + original_device(0), + need_realloc_(false), + modified(false) { } +device_memory::device_memory(device_memory &&other) noexcept + : data_type(other.data_type), + data_elements(other.data_elements), + data_size(other.data_size), + device_size(other.device_size), + data_width(other.data_width), + data_height(other.data_height), + data_depth(other.data_depth), + type(other.type), + name(other.name), + device(other.device), + device_pointer(other.device_pointer), + host_pointer(other.host_pointer), + shared_pointer(other.shared_pointer), + shared_counter(other.shared_counter), + original_device_ptr(other.original_device_ptr), + original_device_size(other.original_device_size), + original_device(other.original_device), + need_realloc_(other.need_realloc_), + modified(other.modified) +{ + other.data_elements = 0; + other.data_size = 0; + other.device_size = 0; + other.data_width = 0; + other.data_height = 0; + other.data_depth = 0; + other.device = 0; + other.device_pointer = 0; + other.host_pointer = 0; + other.shared_pointer = 0; + other.shared_counter = 0; + other.original_device_ptr = 0; + other.original_device_size = 0; + other.original_device = 0; + other.need_realloc_ = false; + other.modified = false; +} + device_memory::~device_memory() { assert(shared_pointer == 0); diff --git a/intern/cycles/device/device_memory.h b/intern/cycles/device/device_memory.h index 97459b9ae6a..80f4d7b0468 100644 --- a/intern/cycles/device/device_memory.h +++ b/intern/cycles/device/device_memory.h @@ -238,6 +238,7 @@ class device_memory { /* Only create through subclasses. */ device_memory(Device *device, const char *name, MemoryType type); + device_memory(device_memory &&other) noexcept; /* No copying allowed. */ device_memory(const device_memory &) = delete; @@ -277,6 +278,10 @@ template<typename T> class device_only_memory : public device_memory { data_elements = max(device_type_traits<T>::num_elements, 1); } + device_only_memory(device_only_memory &&other) noexcept : device_memory(std::move(other)) + { + } + virtual ~device_only_memory() { free(); diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 35faadcbec5..85ffa5fcd52 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -232,10 +232,6 @@ class MultiDevice : public Device { foreach (SubDevice &sub, devices) { DeviceKernelStatus subresult = sub.device->get_active_kernel_switch_state(); switch (subresult) { - case DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL: - result = subresult; - break; - case DEVICE_KERNEL_FEATURE_KERNEL_INVALID: case DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE: return subresult; diff --git a/intern/cycles/device/device_optix.cpp b/intern/cycles/device/device_optix.cpp index cce11507fa1..01de0724cb2 100644 --- a/intern/cycles/device/device_optix.cpp +++ b/intern/cycles/device/device_optix.cpp @@ -193,6 +193,9 @@ class OptiXDevice : public CUDADevice { device_only_memory<unsigned char> denoiser_state; int denoiser_input_passes = 0; + vector<device_only_memory<char>> delayed_free_bvh_memory; + thread_mutex delayed_free_bvh_mutex; + public: OptiXDevice(DeviceInfo &info_, Stats &stats_, Profiler &profiler_, bool background_) : CUDADevice(info_, stats_, profiler_, background_), @@ -258,6 +261,8 @@ class OptiXDevice : public CUDADevice { // Make CUDA context current const CUDAContextScope scope(cuContext); + free_bvh_memory_delayed(); + sbt_data.free(); texture_info.free(); launch_params.free(); @@ -1297,6 +1302,8 @@ class OptiXDevice : public CUDADevice { return; } + free_bvh_memory_delayed(); + BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh); progress.set_substatus("Building OptiX acceleration structure"); @@ -1767,6 +1774,24 @@ class OptiXDevice : public CUDADevice { } } + void release_optix_bvh(BVH *bvh) override + { + thread_scoped_lock lock(delayed_free_bvh_mutex); + /* Do delayed free of BVH memory, since geometry holding BVH might be deleted + * while GPU is still rendering. */ + BVHOptiX *const bvh_optix = static_cast<BVHOptiX *>(bvh); + + delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->as_data)); + delayed_free_bvh_memory.emplace_back(std::move(bvh_optix->motion_transform_data)); + bvh_optix->traversable_handle = 0; + } + + void free_bvh_memory_delayed() + { + thread_scoped_lock lock(delayed_free_bvh_mutex); + delayed_free_bvh_memory.free_memory(); + } + void const_copy_to(const char *name, void *host, size_t size) override { // Set constant memory for CUDA module diff --git a/intern/cycles/device/opencl/device_opencl.h b/intern/cycles/device/opencl/device_opencl.h index 2d6c6d04214..a65e764b0d4 100644 --- a/intern/cycles/device/opencl/device_opencl.h +++ b/intern/cycles/device/opencl/device_opencl.h @@ -269,7 +269,6 @@ class OpenCLDevice : public Device { cl_device_id cdDevice; cl_int ciErr; int device_num; - bool use_preview_kernels; class OpenCLProgram { public: @@ -369,8 +368,7 @@ class OpenCLDevice : public Device { /* Load the kernels and put the created kernels in the given * `programs` parameter. */ void load_kernels(vector<OpenCLProgram *> &programs, - const DeviceRequestedFeatures &requested_features, - bool is_preview = false); + const DeviceRequestedFeatures &requested_features); }; DeviceSplitKernel *split_kernel; @@ -382,7 +380,6 @@ class OpenCLDevice : public Device { OpenCLProgram denoising_program; OpenCLSplitPrograms kernel_programs; - OpenCLSplitPrograms preview_programs; typedef map<string, device_vector<uchar> *> ConstMemMap; typedef map<string, device_ptr> MemMap; @@ -412,7 +409,6 @@ class OpenCLDevice : public Device { string device_md5_hash(string kernel_custom_build_options = ""); bool load_kernels(const DeviceRequestedFeatures &requested_features); void load_required_kernels(const DeviceRequestedFeatures &requested_features); - void load_preview_kernels(); bool wait_for_availability(const DeviceRequestedFeatures &requested_features); DeviceKernelStatus get_active_kernel_switch_state(); @@ -422,8 +418,7 @@ class OpenCLDevice : public Device { /* Get the program file name to compile (*.cl) for the given kernel */ const string get_opencl_program_filename(const string &kernel_name); string get_build_options(const DeviceRequestedFeatures &requested_features, - const string &opencl_program_name, - bool preview_kernel = false); + const string &opencl_program_name); /* Enable the default features to reduce recompilation events */ void enable_default_features(DeviceRequestedFeatures &features); diff --git a/intern/cycles/device/opencl/device_opencl_impl.cpp b/intern/cycles/device/opencl/device_opencl_impl.cpp index b1d6284171a..715213175c9 100644 --- a/intern/cycles/device/opencl/device_opencl_impl.cpp +++ b/intern/cycles/device/opencl/device_opencl_impl.cpp @@ -107,8 +107,7 @@ void OpenCLDevice::enable_default_features(DeviceRequestedFeatures &features) } string OpenCLDevice::get_build_options(const DeviceRequestedFeatures &requested_features, - const string &opencl_program_name, - bool preview_kernel) + const string &opencl_program_name) { /* first check for non-split kernel programs */ if (opencl_program_name == "base" || opencl_program_name == "denoising") { @@ -185,13 +184,7 @@ string OpenCLDevice::get_build_options(const DeviceRequestedFeatures &requested_ enable_default_features(nofeatures); /* Add program specific optimized compile directives */ - if (preview_kernel) { - DeviceRequestedFeatures preview_features; - preview_features.use_hair = true; - build_options += "-D__KERNEL_AO_PREVIEW__ "; - build_options += preview_features.get_build_options(); - } - else if (opencl_program_name == "split_do_volume" && !requested_features.use_volume) { + if (opencl_program_name == "split_do_volume" && !requested_features.use_volume) { build_options += nofeatures.get_build_options(); } else { @@ -238,9 +231,7 @@ OpenCLDevice::OpenCLSplitPrograms::~OpenCLSplitPrograms() } void OpenCLDevice::OpenCLSplitPrograms::load_kernels( - vector<OpenCLProgram *> &programs, - const DeviceRequestedFeatures &requested_features, - bool is_preview) + vector<OpenCLProgram *> &programs, const DeviceRequestedFeatures &requested_features) { if (!requested_features.use_baking) { # define ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(kernel_name) \ @@ -251,7 +242,7 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels( device, \ program_name_##kernel_name, \ "kernel_" #kernel_name ".cl", \ - device->get_build_options(requested_features, program_name_##kernel_name, is_preview)); \ + device->get_build_options(requested_features, program_name_##kernel_name)); \ program_##kernel_name.add_kernel(ustring("path_trace_" #kernel_name)); \ programs.push_back(&program_##kernel_name); @@ -259,7 +250,7 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels( ADD_SPLIT_KERNEL_PROGRAM(subsurface_scatter); ADD_SPLIT_KERNEL_PROGRAM(direct_lighting); ADD_SPLIT_KERNEL_PROGRAM(indirect_background); - if (requested_features.use_volume || is_preview) { + if (requested_features.use_volume) { ADD_SPLIT_KERNEL_PROGRAM(do_volume); } ADD_SPLIT_KERNEL_PROGRAM(shader_eval); @@ -274,7 +265,7 @@ void OpenCLDevice::OpenCLSplitPrograms::load_kernels( device, "split_bundle", "kernel_split_bundle.cl", - device->get_build_options(requested_features, "split_bundle", is_preview)); + device->get_build_options(requested_features, "split_bundle")); ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(data_init); ADD_SPLIT_KERNEL_BUNDLE_PROGRAM(state_buffer_size); @@ -403,7 +394,7 @@ class OpenCLSplitKernel : public DeviceSplitKernel { device, program_name, device->get_opencl_program_filename(kernel_name), - device->get_build_options(requested_features, program_name, device->use_preview_kernels)); + device->get_build_options(requested_features, program_name)); kernel->program.add_kernel(ustring("path_trace_" + kernel_name)); kernel->program.load(); @@ -617,7 +608,6 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b : Device(info, stats, profiler, background), load_kernel_num_compiling(0), kernel_programs(this), - preview_programs(this), memory_manager(this), texture_info(this, "__texture_info", MEM_GLOBAL) { @@ -627,7 +617,6 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b cqCommandQueue = NULL; device_initialized = false; textures_need_update = true; - use_preview_kernels = !background; vector<OpenCLPlatformDevice> usable_devices; OpenCLInfo::get_usable_devices(&usable_devices); @@ -683,9 +672,6 @@ OpenCLDevice::OpenCLDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, b device_initialized = true; split_kernel = new OpenCLSplitKernel(this); - if (use_preview_kernels) { - load_preview_kernels(); - } } OpenCLDevice::~OpenCLDevice() @@ -776,7 +762,7 @@ bool OpenCLDevice::load_kernels(const DeviceRequestedFeatures &requested_feature load_required_kernels(requested_features); vector<OpenCLProgram *> programs; - kernel_programs.load_kernels(programs, requested_features, false); + kernel_programs.load_kernels(programs, requested_features); if (!requested_features.use_baking && requested_features.use_denoising) { denoising_program = OpenCLProgram( @@ -854,19 +840,6 @@ void OpenCLDevice::load_required_kernels(const DeviceRequestedFeatures &requeste } } -void OpenCLDevice::load_preview_kernels() -{ - DeviceRequestedFeatures no_features; - vector<OpenCLProgram *> programs; - preview_programs.load_kernels(programs, no_features, true); - - foreach (OpenCLProgram *program, programs) { - if (!program->load()) { - load_required_kernel_task_pool.push(function_bind(&OpenCLProgram::compile, program)); - } - } -} - bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures &requested_features) { if (requested_features.use_baking) { @@ -874,59 +847,18 @@ bool OpenCLDevice::wait_for_availability(const DeviceRequestedFeatures &requeste return true; } - if (background) { - load_kernel_task_pool.wait_work(); - use_preview_kernels = false; - } - else { - /* We use a device setting to determine to load preview kernels or not - * Better to check on device level than per kernel as mixing preview and - * non-preview kernels does not work due to different data types */ - if (use_preview_kernels) { - use_preview_kernels = load_kernel_num_compiling.load() > 0; - } - } + load_kernel_task_pool.wait_work(); return split_kernel->load_kernels(requested_features); } OpenCLDevice::OpenCLSplitPrograms *OpenCLDevice::get_split_programs() { - return use_preview_kernels ? &preview_programs : &kernel_programs; + return &kernel_programs; } DeviceKernelStatus OpenCLDevice::get_active_kernel_switch_state() { - /* Do not switch kernels for background renderings - * We do foreground rendering but use the preview kernels - * Check for the optimized kernels - * - * This works also the other way around, where we are using - * optimized kernels but new ones are being compiled due - * to other features that are needed */ - if (background) { - /* The if-statements below would find the same result, - * But as the `finished` method uses a mutex we added - * this as an early exit */ - return DEVICE_KERNEL_USING_FEATURE_KERNEL; - } - - bool other_kernels_finished = load_kernel_num_compiling.load() == 0; - if (use_preview_kernels) { - if (other_kernels_finished) { - return DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE; - } - else { - return DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL; - } - } - else { - if (other_kernels_finished) { - return DEVICE_KERNEL_USING_FEATURE_KERNEL; - } - else { - return DEVICE_KERNEL_FEATURE_KERNEL_INVALID; - } - } + return DEVICE_KERNEL_USING_FEATURE_KERNEL; } void OpenCLDevice::mem_alloc(device_memory &mem) diff --git a/intern/cycles/kernel/kernel_types.h b/intern/cycles/kernel/kernel_types.h index 18c4d2f86ad..74fa2826cd4 100644 --- a/intern/cycles/kernel/kernel_types.h +++ b/intern/cycles/kernel/kernel_types.h @@ -99,27 +99,23 @@ CCL_NAMESPACE_BEGIN #define __AO__ #define __PASSES__ #define __HAIR__ - -/* Without these we get an AO render, used by OpenCL preview kernel. */ -#ifndef __KERNEL_AO_PREVIEW__ -# define __SVM__ -# define __EMISSION__ -# define __HOLDOUT__ -# define __MULTI_CLOSURE__ -# define __TRANSPARENT_SHADOWS__ -# define __BACKGROUND_MIS__ -# define __LAMP_MIS__ -# define __CAMERA_MOTION__ -# define __OBJECT_MOTION__ -# define __BAKING__ -# define __PRINCIPLED__ -# define __SUBSURFACE__ -# define __VOLUME__ -# define __VOLUME_SCATTER__ -# define __CMJ__ -# define __SHADOW_RECORD_ALL__ -# define __BRANCHED_PATH__ -#endif +#define __SVM__ +#define __EMISSION__ +#define __HOLDOUT__ +#define __MULTI_CLOSURE__ +#define __TRANSPARENT_SHADOWS__ +#define __BACKGROUND_MIS__ +#define __LAMP_MIS__ +#define __CAMERA_MOTION__ +#define __OBJECT_MOTION__ +#define __BAKING__ +#define __PRINCIPLED__ +#define __SUBSURFACE__ +#define __VOLUME__ +#define __VOLUME_SCATTER__ +#define __CMJ__ +#define __SHADOW_RECORD_ALL__ +#define __BRANCHED_PATH__ /* Device specific features */ #ifdef __KERNEL_CPU__ diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index f753bb43c42..c4e7d2c79d6 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -542,9 +542,6 @@ bool Scene::update(Progress &progress, bool &kernel_switch_needed) DeviceKernelStatus kernel_switch_status = device->get_active_kernel_switch_state(); kernel_switch_needed = kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE || kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_INVALID; - if (kernel_switch_status == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) { - progress.set_kernel_status("Compiling render kernels"); - } if (new_kernels_needed || kernel_switch_needed) { progress.set_kernel_status("Compiling render kernels"); device->wait_for_availability(loaded_kernel_features); diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 3c601e18126..7830ca2293a 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -243,11 +243,6 @@ void Session::run_gpu() } } - /* Don't go in pause mode when image was rendered with preview kernels - * When feature kernels become available the session will be reset. */ - else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) { - time_sleep(0.1); - } else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) { reset_gpu(tile_manager.params, params.samples); } @@ -762,11 +757,6 @@ void Session::run_cpu() } } - /* Don't go in pause mode when preview kernels are used - * When feature kernels become available the session will be reset. */ - else if (no_tiles && kernel_state == DEVICE_KERNEL_WAITING_FOR_FEATURE_KERNEL) { - time_sleep(0.1); - } else if (no_tiles && kernel_state == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE) { reset_cpu(tile_manager.params, params.samples); } diff --git a/intern/cycles/util/util_vector.h b/intern/cycles/util/util_vector.h index 04fb33368d9..87cd4de8438 100644 --- a/intern/cycles/util/util_vector.h +++ b/intern/cycles/util/util_vector.h @@ -43,8 +43,8 @@ class vector : public std::vector<value_type, allocator_type> { /* Try as hard as possible to use zero memory. */ void free_memory() { - BaseClass::resize(0); - BaseClass::shrink_to_fit(); + vector<value_type, allocator_type> empty; + BaseClass::swap(empty); } /* Some external API might demand working with std::vector. */ |