diff options
Diffstat (limited to 'intern')
-rw-r--r-- | intern/cycles/blender/blender_device.cpp | 23 | ||||
-rw-r--r-- | intern/cycles/device/device_multi.cpp | 98 | ||||
-rw-r--r-- | intern/cycles/device/device_task.cpp | 3 | ||||
-rw-r--r-- | intern/cycles/render/session.cpp | 13 |
4 files changed, 95 insertions, 42 deletions
diff --git a/intern/cycles/blender/blender_device.cpp b/intern/cycles/blender/blender_device.cpp index 3a923459782..ac52948806c 100644 --- a/intern/cycles/blender/blender_device.cpp +++ b/intern/cycles/blender/blender_device.cpp @@ -141,10 +141,25 @@ DeviceInfo blender_device_info(BL::Preferences &b_preferences, BL::Scene &b_scen device.multi_devices.push_back(device); } - /* Simply use the first available OptiX device. */ - const DeviceInfo optix_device = optix_devices.front(); - device.id += optix_device.id; /* Uniquely identify this special multi device. */ - device.denoising_devices.push_back(optix_device); + /* Try to use the same physical devices for denoising. */ + for (const DeviceInfo &cuda_device : device.multi_devices) { + if (cuda_device.type == DEVICE_CUDA) { + for (const DeviceInfo &optix_device : optix_devices) { + if (cuda_device.num == optix_device.num) { + device.id += optix_device.id; + device.denoising_devices.push_back(optix_device); + break; + } + } + } + } + + if (device.denoising_devices.empty()) { + /* Simply use the first available OptiX device. */ + const DeviceInfo optix_device = optix_devices.front(); + device.id += optix_device.id; /* Uniquely identify this special multi device. */ + device.denoising_devices.push_back(optix_device); + } } } diff --git a/intern/cycles/device/device_multi.cpp b/intern/cycles/device/device_multi.cpp index 77ede3bf62a..ef1687ddd3a 100644 --- a/intern/cycles/device/device_multi.cpp +++ b/intern/cycles/device/device_multi.cpp @@ -43,6 +43,7 @@ class MultiDevice : public Device { list<SubDevice> devices, denoising_devices; device_ptr unique_key; vector<vector<SubDevice *>> peer_islands; + bool matching_rendering_and_denoising_devices; MultiDevice(DeviceInfo &info, Stats &stats, Profiler &profiler, bool background_) : Device(info, stats, profiler, background_), unique_key(1) @@ -66,8 +67,8 @@ class MultiDevice : public Device { } foreach (DeviceInfo &subinfo, info.denoising_devices) { - denoising_devices.emplace_back(); - SubDevice *sub = &denoising_devices.back(); + denoising_devices.emplace_front(); + SubDevice *sub = &denoising_devices.front(); sub->device = Device::create(subinfo, sub->stats, profiler, background); } @@ -96,6 +97,27 @@ class MultiDevice : public Device { } } + /* Try to re-use memory when denoising and render devices use the same physical devices + * (e.g. OptiX denoising and CUDA rendering device pointing to the same GPU). + * Ordering has to match as well, so that 'DeviceTask::split' behaves consistent. */ + matching_rendering_and_denoising_devices = denoising_devices.empty() || + (devices.size() == denoising_devices.size()); + if (matching_rendering_and_denoising_devices) { + for (list<SubDevice>::iterator device_it = devices.begin(), + denoising_device_it = denoising_devices.begin(); + device_it != devices.end() && denoising_device_it != denoising_devices.end(); + ++device_it, ++denoising_device_it) { + const DeviceInfo &info = device_it->device->info; + const DeviceInfo &denoising_info = denoising_device_it->device->info; + if ((info.type != DEVICE_CUDA && info.type != DEVICE_OPTIX) || + (denoising_info.type != DEVICE_CUDA && denoising_info.type != DEVICE_OPTIX) || + info.num != denoising_info.num) { + matching_rendering_and_denoising_devices = false; + break; + } + } + } + #ifdef WITH_NETWORK /* try to add network devices */ ServerDiscovery discovery(true); @@ -232,7 +254,7 @@ class MultiDevice : public Device { SubDevice *find_matching_mem_device(device_ptr key, SubDevice &sub) { - assert(sub.peer_island_index >= 0 && key != 0); + assert(key != 0 && (sub.peer_island_index >= 0 || sub.ptr_map.find(key) != sub.ptr_map.end())); /* Get the memory owner of this key (first try current device, then peer devices) */ SubDevice *owner_sub = ⊂ @@ -377,6 +399,9 @@ class MultiDevice : public Device { * Similarily the tile buffers also need to be allocated separately on all devices so any * overlap rendered for denoising does not interfer with each other */ if (strcmp(mem.name, "RenderBuffers") == 0) { + vector<device_ptr> device_pointers; + device_pointers.reserve(devices.size()); + foreach (SubDevice &sub, devices) { mem.device = sub.device; mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0; @@ -384,14 +409,22 @@ class MultiDevice : public Device { sub.device->mem_zero(mem); sub.ptr_map[key] = mem.device_pointer; + + device_pointers.push_back(mem.device_pointer); } foreach (SubDevice &sub, denoising_devices) { - mem.device = sub.device; - mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0; - mem.device_size = existing_size; + if (matching_rendering_and_denoising_devices) { + sub.ptr_map[key] = device_pointers.front(); + device_pointers.erase(device_pointers.begin()); + } + else { + mem.device = sub.device; + mem.device_pointer = (existing_key) ? sub.ptr_map[existing_key] : 0; + mem.device_size = existing_size; - sub.device->mem_zero(mem); - sub.ptr_map[key] = mem.device_pointer; + sub.device->mem_zero(mem); + sub.ptr_map[key] = mem.device_pointer; + } } } else { @@ -427,12 +460,17 @@ class MultiDevice : public Device { sub.ptr_map.erase(sub.ptr_map.find(key)); } foreach (SubDevice &sub, denoising_devices) { - mem.device = sub.device; - mem.device_pointer = sub.ptr_map[key]; - mem.device_size = existing_size; + if (matching_rendering_and_denoising_devices) { + sub.ptr_map.erase(key); + } + else { + mem.device = sub.device; + mem.device_pointer = sub.ptr_map[key]; + mem.device_size = existing_size; - sub.device->mem_free(mem); - sub.ptr_map.erase(sub.ptr_map.find(key)); + sub.device->mem_free(mem); + sub.ptr_map.erase(sub.ptr_map.find(key)); + } } } else { @@ -553,7 +591,7 @@ class MultiDevice : public Device { device_vector<float> &mem = tiles[i].buffers->buffer; tiles[i].buffer = mem.device_pointer; - if (mem.device == this && denoising_devices.empty()) { + if (mem.device == this && matching_rendering_and_denoising_devices) { /* Skip unnecessary copies in viewport mode (buffer covers the * whole image), but still need to fix up the tile device pointer. */ map_tile(sub_device, tiles[i]); @@ -597,7 +635,7 @@ class MultiDevice : public Device { { device_vector<float> &mem = tiles[9].buffers->buffer; - if (mem.device == this && denoising_devices.empty()) { + if (mem.device == this && matching_rendering_and_denoising_devices) { return; } @@ -670,23 +708,23 @@ class MultiDevice : public Device { DeviceTask subtask = tasks.front(); tasks.pop_front(); - if (task.type == DeviceTask::DENOISE_BUFFER && !denoising_devices.empty()) { - subtask.buffer = sub.ptr_map[task.buffer]; - } - else { - if (task.buffer) - subtask.buffer = find_matching_mem(task.buffer, sub); - if (task.rgba_byte) - subtask.rgba_byte = sub.ptr_map[task.rgba_byte]; - if (task.rgba_half) - subtask.rgba_half = sub.ptr_map[task.rgba_half]; - if (task.shader_input) - subtask.shader_input = find_matching_mem(task.shader_input, sub); - if (task.shader_output) - subtask.shader_output = find_matching_mem(task.shader_output, sub); - } + if (task.buffer) + subtask.buffer = find_matching_mem(task.buffer, sub); + if (task.rgba_byte) + subtask.rgba_byte = sub.ptr_map[task.rgba_byte]; + if (task.rgba_half) + subtask.rgba_half = sub.ptr_map[task.rgba_half]; + if (task.shader_input) + subtask.shader_input = find_matching_mem(task.shader_input, sub); + if (task.shader_output) + subtask.shader_output = find_matching_mem(task.shader_output, sub); sub.device->task_add(subtask); + + if (task.buffers && task.buffers->buffer.device == this) { + /* Synchronize access to RenderBuffers, since 'map_neighbor_tiles' is not thread-safe. */ + sub.device->task_wait(); + } } } } diff --git a/intern/cycles/device/device_task.cpp b/intern/cycles/device/device_task.cpp index d2447eae867..7485e1b41de 100644 --- a/intern/cycles/device/device_task.cpp +++ b/intern/cycles/device/device_task.cpp @@ -44,7 +44,8 @@ DeviceTask::DeviceTask(Type type_) shader_eval_type(0), shader_filter(0), shader_x(0), - shader_w(0) + shader_w(0), + buffers(nullptr) { last_update_time = time_dt(); } diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 361a1465aac..7c50140ecfe 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -436,6 +436,12 @@ bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_typ /* Reset copy state, since buffer contents change after the tile was acquired */ buffers->map_neighbor_copied = false; + /* This hack ensures that the copy in 'MultiDevice::map_neighbor_tiles' accounts + * for the buffer resolution divider. */ + buffers->buffer.data_width = (buffers->params.width * buffers->params.get_passes_size()) / + tile_manager.state.resolution_divider; + buffers->buffer.data_height = buffers->params.height / tile_manager.state.resolution_divider; + return true; } @@ -1125,13 +1131,6 @@ bool Session::render_need_denoise(bool &delayed) return false; } - /* Cannot denoise with resolution divider and separate denoising devices. - * It breaks the copy in 'MultiDevice::map_neighbor_tiles' (which operates on - * the full buffer dimensions and not the scaled ones). */ - if (!params.device.denoising_devices.empty() && tile_manager.state.resolution_divider > 1) { - return false; - } - /* Avoid excessive denoising in viewport after reaching a certain amount of samples. */ delayed = (tile_manager.state.sample >= 20 && (time_dt() - last_display_time) < params.progressive_update_timeout); |