Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/intern
diff options
context:
space:
mode:
authorPatrick Mours <pmours@nvidia.com>2020-06-09 20:46:16 +0300
committerPatrick Mours <pmours@nvidia.com>2020-06-10 15:12:13 +0300
commitf367f1e5a55e1c657f9d2088f6537fb2e73492f0 (patch)
treea0cd5f9e7923cfc3ddb427c3a4677d2cca9c74d7 /intern
parentd12f6a90b19084fa0ab1689a01b11a7b6cfa1cca (diff)
Cycles: Improve OptiX viewport denoising performance with CUDA rendering
With this patch Cycles recognizing when a logical OptiX and CUDA device represent the same physical GPU and attempts to eliminate unnecessary tile copies for viewport rendering if that is the case for all active devices. In addition, denoising is now no longer performed on the first available OptiX device only, but instead it will try to match CUDA and OptiX rendering/denoising devices exactly to maximize utilization. Reviewed By: brecht Differential Revision: https://developer.blender.org/D7975
Diffstat (limited to 'intern')
-rw-r--r--intern/cycles/blender/blender_device.cpp23
-rw-r--r--intern/cycles/device/device_multi.cpp98
-rw-r--r--intern/cycles/device/device_task.cpp3
-rw-r--r--intern/cycles/render/session.cpp13
4 files changed, 95 insertions, 42 deletions
diff --git a/intern/cycles/blender/blender_device.cpp b/intern/cycles/blender/blender_device.cpp
index 3a923459782..ac52948806c 100644
--- a/intern/cycles/blender/blender_device.cpp
+++ b/intern/cycles/blender/blender_device.cpp
@@ -141,10 +141,25 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen
device.multi_devices.push_back(device);
}
- /* Simply use the first available OptiX device. */
- const DeviceInfo optix_device = optix_devices.front();
- device.id += optix_device.id; /* Uniquely identify this special multi device. */
- device.denoising_devices.push_back(optix_device);
+ /* Try to use the same physical devices for denoising. */
+ for (const DeviceInfo &cuda_device : device.multi_devices) {
+ if (cuda_device.type == DEVICE_CUDA) {
+ for (const DeviceInfo &optix_device : optix_devices) {
+ if (cuda_device.num == optix_device.num) {
+ device.id += optix_device.id;
+ device.denoising_devices.push_back(optix_device);
+ break;
+ }
+ }
+ }
+ }
+
+ if (device.denoising_devices.empty()) {
+ /* Simply use the first available OptiX device. */
+ const DeviceInfo optix_device = optix_devices.front();
+ device.id += optix_device.id; /* Uniquely identify this special multi device. */
+ device.denoising_devices.push_back(optix_device);
+ }
}
}
diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp
index 77ede3bf62a..ef1687ddd3a 100644
--- a/intern/cycles/device/device_multi.cpp
+++ b/intern/cycles/device/device_multi.cpp
@@ -43,6 +43,7 @@ class MultiDevice : public Device {
list<SubDevice> devices, denoising_devices;
device_ptr unique_key;
vector<vector<SubDevice *>> peer_islands;
+ bool matching_rendering_and_denoising_devices;
MultiDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_)
: Device(info, stats, profiler, background_), unique_key(1)
@@ -66,8 +67,8 @@ class MultiDevice : public Device {
}
foreach (DeviceInfo &subinfo, info.denoising_devices) {
- denoising_devices.emplace_back();
- SubDevice *sub = &denoising_devices.back();
+ denoising_devices.emplace_front();
+ SubDevice *sub = &denoising_devices.front();
sub->device = Device::create(subinfo, sub->stats, profiler, background);
}
@@ -96,6 +97,27 @@ class MultiDevice : public Device {
}
}
+ /* Try to re-use memory when denoising and render devices use the same physical devices
+ * (e.g. OptiX denoising and CUDA rendering device pointing to the same GPU).
+ * Ordering has to match as well, so that 'DeviceTask::split' behaves consistent. */
+ matching_rendering_and_denoising_devices = denoising_devices.empty() ||
+ (devices.size() == denoising_devices.size());
+ if (matching_rendering_and_denoising_devices) {
+ for (list<SubDevice>::iterator device_it = devices.begin(),
+ denoising_device_it = denoising_devices.begin();
+ device_it != devices.end() && denoising_device_it != denoising_devices.end();
+ ++device_it, ++denoising_device_it) {
+ const DeviceInfo &info = device_it->device->info;
+ const DeviceInfo &denoising_info = denoising_device_it->device->info;
+ if ((info.type != DEVICE_CUDA && info.type != DEVICE_OPTIX) ||
+ (denoising_info.type != DEVICE_CUDA && denoising_info.type != DEVICE_OPTIX) ||
+ info.num != denoising_info.num) {
+ matching_rendering_and_denoising_devices = false;
+ break;
+ }
+ }
+ }
+
#ifdef WITH_NETWORK
/* try to add network devices */
ServerDiscovery discovery(true);
@@ -232,7 +254,7 @@ class MultiDevice : public Device {
SubDevice *find_matching_mem_device(device_ptr key, SubDevice &sub)
{
- assert(sub.peer_island_index >= 0 && key != 0);
+ assert(key != 0 && (sub.peer_island_index >= 0 || sub.ptr_map.find(key) != sub.ptr_map.end()));
/* Get the memory owner of this key (first try current device, then peer devices) */
SubDevice *owner_sub = &sub;
@@ -377,6 +399,9 @@ class MultiDevice : public Device {
* Similarily the tile buffers also need to be allocated separately on all devices so any
* overlap rendered for denoising does not interfer with each other */
if (strcmp(mem.name, "RenderBuffers") == 0) {
+ vector<device_ptr> device_pointers;
+ device_pointers.reserve(devices.size());
+
foreach (SubDevice &sub, devices) {
mem.device = sub.device;
mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
@@ -384,14 +409,22 @@ class MultiDevice : public Device {
sub.device->mem_zero(mem);
sub.ptr_map[key] = mem.device_pointer;
+
+ device_pointers.push_back(mem.device_pointer);
}
foreach (SubDevice &sub, denoising_devices) {
- mem.device = sub.device;
- mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
- mem.device_size = existing_size;
+ if (matching_rendering_and_denoising_devices) {
+ sub.ptr_map[key] = device_pointers.front();
+ device_pointers.erase(device_pointers.begin());
+ }
+ else {
+ mem.device = sub.device;
+ mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0;
+ mem.device_size = existing_size;
- sub.device->mem_zero(mem);
- sub.ptr_map[key] = mem.device_pointer;
+ sub.device->mem_zero(mem);
+ sub.ptr_map[key] = mem.device_pointer;
+ }
}
}
else {
@@ -427,12 +460,17 @@ class MultiDevice : public Device {
sub.ptr_map.erase(sub.ptr_map.find(key));
}
foreach (SubDevice &sub, denoising_devices) {
- mem.device = sub.device;
- mem.device_pointer = sub.ptr_map[key];
- mem.device_size = existing_size;
+ if (matching_rendering_and_denoising_devices) {
+ sub.ptr_map.erase(key);
+ }
+ else {
+ mem.device = sub.device;
+ mem.device_pointer = sub.ptr_map[key];
+ mem.device_size = existing_size;
- sub.device->mem_free(mem);
- sub.ptr_map.erase(sub.ptr_map.find(key));
+ sub.device->mem_free(mem);
+ sub.ptr_map.erase(sub.ptr_map.find(key));
+ }
}
}
else {
@@ -553,7 +591,7 @@ class MultiDevice : public Device {
device_vector<float> &mem = tiles[i].buffers->buffer;
tiles[i].buffer = mem.device_pointer;
- if (mem.device == this && denoising_devices.empty()) {
+ if (mem.device == this && matching_rendering_and_denoising_devices) {
/* Skip unnecessary copies in viewport mode (buffer covers the
* whole image), but still need to fix up the tile device pointer. */
map_tile(sub_device, tiles[i]);
@@ -597,7 +635,7 @@ class MultiDevice : public Device {
{
device_vector<float> &mem = tiles[9].buffers->buffer;
- if (mem.device == this && denoising_devices.empty()) {
+ if (mem.device == this && matching_rendering_and_denoising_devices) {
return;
}
@@ -670,23 +708,23 @@ class MultiDevice : public Device {
DeviceTask subtask = tasks.front();
tasks.pop_front();
- if (task.type == DeviceTask::DENOISE_BUFFER && !denoising_devices.empty()) {
- subtask.buffer = sub.ptr_map[task.buffer];
- }
- else {
- if (task.buffer)
- subtask.buffer = find_matching_mem(task.buffer, sub);
- if (task.rgba_byte)
- subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
- if (task.rgba_half)
- subtask.rgba_half = sub.ptr_map[task.rgba_half];
- if (task.shader_input)
- subtask.shader_input = find_matching_mem(task.shader_input, sub);
- if (task.shader_output)
- subtask.shader_output = find_matching_mem(task.shader_output, sub);
- }
+ if (task.buffer)
+ subtask.buffer = find_matching_mem(task.buffer, sub);
+ if (task.rgba_byte)
+ subtask.rgba_byte = sub.ptr_map[task.rgba_byte];
+ if (task.rgba_half)
+ subtask.rgba_half = sub.ptr_map[task.rgba_half];
+ if (task.shader_input)
+ subtask.shader_input = find_matching_mem(task.shader_input, sub);
+ if (task.shader_output)
+ subtask.shader_output = find_matching_mem(task.shader_output, sub);
sub.device->task_add(subtask);
+
+ if (task.buffers && task.buffers->buffer.device == this) {
+ /* Synchronize access to RenderBuffers, since 'map_neighbor_tiles' is not thread-safe. */
+ sub.device->task_wait();
+ }
}
}
}
diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp
index d2447eae867..7485e1b41de 100644
--- a/intern/cycles/device/device_task.cpp
+++ b/intern/cycles/device/device_task.cpp
@@ -44,7 +44,8 @@ DeviceTask::DeviceTask(Type type_)
shader_eval_type(0),
shader_filter(0),
shader_x(0),
- shader_w(0)
+ shader_w(0),
+ buffers(nullptr)
{
last_update_time = time_dt();
}
diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp
index 361a1465aac..7c50140ecfe 100644
--- a/intern/cycles/render/session.cpp
+++ b/intern/cycles/render/session.cpp
@@ -436,6 +436,12 @@ bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_typ
/* Reset copy state, since buffer contents change after the tile was acquired */
buffers->map_neighbor_copied = false;
+ /* This hack ensures that the copy in 'MultiDevice::map_neighbor_tiles' accounts
+ * for the buffer resolution divider. */
+ buffers->buffer.data_width = (buffers->params.width * buffers->params.get_passes_size()) /
+ tile_manager.state.resolution_divider;
+ buffers->buffer.data_height = buffers->params.height / tile_manager.state.resolution_divider;
+
return true;
}
@@ -1125,13 +1131,6 @@ bool Session::render_need_denoise(bool &delayed)
return false;
}
- /* Cannot denoise with resolution divider and separate denoising devices.
- * It breaks the copy in 'MultiDevice::map_neighbor_tiles' (which operates on
- * the full buffer dimensions and not the scaled ones). */
- if (!params.device.denoising_devices.empty() && tile_manager.state.resolution_divider > 1) {
- return false;
- }
-
/* Avoid excessive denoising in viewport after reaching a certain amount of samples. */
delayed = (tile_manager.state.sample >= 20 &&
(time_dt() - last_display_time) < params.progressive_update_timeout);