Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/integrator')
-rw-r--r--intern/cycles/integrator/denoiser.cpp7
-rw-r--r--intern/cycles/integrator/pass_accessor.cpp6
-rw-r--r--intern/cycles/integrator/path_trace.cpp77
-rw-r--r--intern/cycles/integrator/path_trace.h4
-rw-r--r--intern/cycles/integrator/path_trace_tile.cpp2
-rw-r--r--intern/cycles/integrator/path_trace_tile.h2
-rw-r--r--intern/cycles/integrator/path_trace_work_gpu.cpp68
-rw-r--r--intern/cycles/integrator/path_trace_work_gpu.h3
8 files changed, 119 insertions, 50 deletions
diff --git a/intern/cycles/integrator/denoiser.cpp b/intern/cycles/integrator/denoiser.cpp
index 94991d63e4c..831bd3a4407 100644
--- a/intern/cycles/integrator/denoiser.cpp
+++ b/intern/cycles/integrator/denoiser.cpp
@@ -101,10 +101,17 @@ static Device *find_best_device(Device *device, DenoiserType type)
if ((sub_device->info.denoisers & type) == 0) {
return;
}
+
if (!best_device) {
best_device = sub_device;
}
else {
+ /* Prefer a device that can use graphics interop for faster display update. */
+ if (sub_device->should_use_graphics_interop() &&
+ !best_device->should_use_graphics_interop()) {
+ best_device = sub_device;
+ }
+
/* TODO(sergey): Choose fastest device from available ones. Taking into account performance
* of the device and data transfer cost. */
}
diff --git a/intern/cycles/integrator/pass_accessor.cpp b/intern/cycles/integrator/pass_accessor.cpp
index 05318b7545b..ab056e953c2 100644
--- a/intern/cycles/integrator/pass_accessor.cpp
+++ b/intern/cycles/integrator/pass_accessor.cpp
@@ -191,6 +191,12 @@ bool PassAccessor::get_render_tile_pixels(const RenderBuffers *render_buffers,
* had the computation done. */
if (pass_info.num_components == 3) {
get_pass_float3(render_buffers, buffer_params, destination);
+
+ /* Use alpha for colors passes. */
+ if (type == PASS_DIFFUSE_COLOR || type == PASS_GLOSSY_COLOR ||
+ type == PASS_TRANSMISSION_COLOR) {
+ num_written_components = destination.num_components;
+ }
}
else if (pass_info.num_components == 4) {
if (destination.num_components == 3) {
diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp
index 9ad1c465725..3ec7b601d9f 100644
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -26,6 +26,7 @@ PathTrace::PathTrace(Device *device,
RenderScheduler &render_scheduler,
TileManager &tile_manager)
: device_(device),
+ film_(film),
device_scene_(device_scene),
render_scheduler_(render_scheduler),
tile_manager_(tile_manager)
@@ -60,7 +61,17 @@ PathTrace::~PathTrace()
void PathTrace::load_kernels()
{
if (denoiser_) {
+ /* Activate graphics interop while denoiser device is created, so that it can choose a device
+ * that supports interop for faster display updates. */
+ if (display_ && path_trace_works_.size() > 1) {
+ display_->graphics_interop_activate();
+ }
+
denoiser_->load_kernels(progress_);
+
+ if (display_ && path_trace_works_.size() > 1) {
+ display_->graphics_interop_deactivate();
+ }
}
}
@@ -373,7 +384,7 @@ void PathTrace::path_trace(RenderWork &render_work)
work_balance_infos_[i].time_spent += work_time;
work_balance_infos_[i].occupancy = statistics.occupancy;
- VLOG_WORK << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
+ VLOG_INFO << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
<< work_time / num_samples
<< " seconds per sample), occupancy: " << statistics.occupancy;
});
@@ -506,28 +517,30 @@ void PathTrace::denoise(const RenderWork &render_work)
const double start_time = time_dt();
RenderBuffers *buffer_to_denoise = nullptr;
-
- unique_ptr<RenderBuffers> multi_device_buffers;
bool allow_inplace_modification = false;
- if (path_trace_works_.size() == 1) {
- buffer_to_denoise = path_trace_works_.front()->get_render_buffers();
+ Device *denoiser_device = denoiser_->get_denoiser_device();
+ if (path_trace_works_.size() > 1 && denoiser_device && !big_tile_denoise_work_) {
+ big_tile_denoise_work_ = PathTraceWork::create(denoiser_device, film_, device_scene_, nullptr);
}
- else {
- Device *denoiser_device = denoiser_->get_denoiser_device();
- if (!denoiser_device) {
- return;
- }
- multi_device_buffers = make_unique<RenderBuffers>(denoiser_device);
- multi_device_buffers->reset(render_state_.effective_big_tile_params);
+ if (big_tile_denoise_work_) {
+ big_tile_denoise_work_->set_effective_buffer_params(render_state_.effective_big_tile_params,
+ render_state_.effective_big_tile_params,
+ render_state_.effective_big_tile_params);
- buffer_to_denoise = multi_device_buffers.get();
+ buffer_to_denoise = big_tile_denoise_work_->get_render_buffers();
+ buffer_to_denoise->reset(render_state_.effective_big_tile_params);
- copy_to_render_buffers(multi_device_buffers.get());
+ copy_to_render_buffers(buffer_to_denoise);
allow_inplace_modification = true;
}
+ else {
+ DCHECK_EQ(path_trace_works_.size(), 1);
+
+ buffer_to_denoise = path_trace_works_.front()->get_render_buffers();
+ }
if (denoiser_->denoise_buffer(render_state_.effective_big_tile_params,
buffer_to_denoise,
@@ -536,14 +549,6 @@ void PathTrace::denoise(const RenderWork &render_work)
render_state_.has_denoised_result = true;
}
- if (multi_device_buffers) {
- multi_device_buffers->copy_from_device();
- parallel_for_each(
- path_trace_works_, [&multi_device_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
- path_trace_work->copy_from_denoised_render_buffers(multi_device_buffers.get());
- });
- }
-
render_scheduler_.report_denoise_time(render_work, time_dt() - start_time);
}
@@ -635,8 +640,13 @@ void PathTrace::update_display(const RenderWork &render_work)
/* TODO(sergey): When using multi-device rendering map the GPUDisplay once and copy data from
* all works in parallel. */
const int num_samples = get_num_samples_in_buffer();
- for (auto &&path_trace_work : path_trace_works_) {
- path_trace_work->copy_to_display(display_.get(), pass_mode, num_samples);
+ if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
+ big_tile_denoise_work_->copy_to_display(display_.get(), pass_mode, num_samples);
+ }
+ else {
+ for (auto &&path_trace_work : path_trace_works_) {
+ path_trace_work->copy_to_display(display_.get(), pass_mode, num_samples);
+ }
}
display_->update_end();
@@ -721,11 +731,10 @@ void PathTrace::write_tile_buffer(const RenderWork &render_work)
VLOG_WORK << "Write tile result via buffer write callback.";
tile_buffer_write();
}
-
/* Write tile to disk, so that the render work's render buffer can be re-used for the next tile.
*/
- if (has_multiple_tiles) {
- VLOG_WORK << "Write tile result into .";
+ else {
+ VLOG_WORK << "Write tile result to disk.";
tile_buffer_write_to_disk();
}
}
@@ -901,6 +910,10 @@ bool PathTrace::copy_render_tile_from_device()
return true;
}
+ if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
+ return big_tile_denoise_work_->copy_render_buffers_from_device();
+ }
+
bool success = true;
parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
@@ -1002,6 +1015,10 @@ bool PathTrace::get_render_tile_pixels(const PassAccessor &pass_accessor,
return pass_accessor.get_render_tile_pixels(full_frame_state_.render_buffers, destination);
}
+ if (big_tile_denoise_work_ && render_state_.has_denoised_result) {
+ return big_tile_denoise_work_->get_render_tile_pixels(pass_accessor, destination);
+ }
+
bool success = true;
parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
@@ -1082,6 +1099,10 @@ void PathTrace::destroy_gpu_resources()
for (auto &&path_trace_work : path_trace_works_) {
path_trace_work->destroy_gpu_resources(display_.get());
}
+
+ if (big_tile_denoise_work_) {
+ big_tile_denoise_work_->destroy_gpu_resources(display_.get());
+ }
}
}
@@ -1103,6 +1124,8 @@ static const char *device_type_for_description(const DeviceType type)
return "OptiX";
case DEVICE_HIP:
return "HIP";
+ case DEVICE_ONEAPI:
+ return "oneAPI";
case DEVICE_DUMMY:
return "Dummy";
case DEVICE_MULTI:
diff --git a/intern/cycles/integrator/path_trace.h b/intern/cycles/integrator/path_trace.h
index a470a6e1402..59382b51d23 100644
--- a/intern/cycles/integrator/path_trace.h
+++ b/intern/cycles/integrator/path_trace.h
@@ -236,6 +236,7 @@ class PathTrace {
/* CPU device for creating temporary render buffers on the CPU side. */
unique_ptr<Device> cpu_device_;
+ Film *film_;
DeviceScene *device_scene_;
RenderScheduler &render_scheduler_;
@@ -261,6 +262,9 @@ class PathTrace {
/* Denoiser which takes care of denoising the big tile. */
unique_ptr<Denoiser> denoiser_;
+ /* Denoiser device descriptor which holds the denoised big tile for multi-device workloads. */
+ unique_ptr<PathTraceWork> big_tile_denoise_work_;
+
/* State which is common for all the steps of the render work.
* Is brought up to date in the `render()` call and is accessed from all the steps involved into
* rendering the work. */
diff --git a/intern/cycles/integrator/path_trace_tile.cpp b/intern/cycles/integrator/path_trace_tile.cpp
index 2f1f4e810a3..dfe88695013 100644
--- a/intern/cycles/integrator/path_trace_tile.cpp
+++ b/intern/cycles/integrator/path_trace_tile.cpp
@@ -33,7 +33,7 @@ bool PathTraceTile::get_pass_pixels(const string_view pass_name,
if (!copied_from_device_) {
/* Copy from device on demand. */
path_trace_.copy_render_tile_from_device();
- const_cast<PathTraceTile *>(this)->copied_from_device_ = true;
+ copied_from_device_ = true;
}
const BufferParams &buffer_params = path_trace_.get_render_tile_params();
diff --git a/intern/cycles/integrator/path_trace_tile.h b/intern/cycles/integrator/path_trace_tile.h
index 99ae08d04d1..223fa96e113 100644
--- a/intern/cycles/integrator/path_trace_tile.h
+++ b/intern/cycles/integrator/path_trace_tile.h
@@ -24,7 +24,7 @@ class PathTraceTile : public OutputDriver::Tile {
private:
PathTrace &path_trace_;
- bool copied_from_device_;
+ mutable bool copied_from_device_;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/integrator/path_trace_work_gpu.cpp b/intern/cycles/integrator/path_trace_work_gpu.cpp
index 0acaeace4b0..ee250a6916b 100644
--- a/intern/cycles/integrator/path_trace_work_gpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_gpu.cpp
@@ -181,27 +181,49 @@ void PathTraceWorkGPU::alloc_integrator_queue()
void PathTraceWorkGPU::alloc_integrator_sorting()
{
- /* Allocate arrays for shader sorting. */
- const int max_shaders = device_scene_->data.max_shaders;
- if (integrator_shader_sort_counter_.size() < max_shaders) {
- integrator_shader_sort_counter_.alloc(max_shaders);
- integrator_shader_sort_counter_.zero_to_device();
+ /* Compute sort partitions, to balance between memory locality and coherence.
+ * Sort partitioning becomes less effective when more shaders are in the wavefront. In lieu of a
+ * more sophisticated heuristic we simply disable sort partitioning if the shader count is high.
+ */
+ num_sort_partitions_ = 1;
+ if (device_scene_->data.max_shaders < 300) {
+ const int num_elements = queue_->num_sort_partition_elements();
+ if (num_elements) {
+ num_sort_partitions_ = max(max_num_paths_ / num_elements, 1);
+ }
+ }
- integrator_shader_raytrace_sort_counter_.alloc(max_shaders);
- integrator_shader_raytrace_sort_counter_.zero_to_device();
+ integrator_state_gpu_.sort_partition_divisor = (int)divide_up(max_num_paths_,
+ num_sort_partitions_);
- integrator_shader_mnee_sort_counter_.alloc(max_shaders);
- integrator_shader_mnee_sort_counter_.zero_to_device();
+ /* Allocate arrays for shader sorting. */
+ const int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;
+ if (integrator_shader_sort_counter_.size() < sort_buckets) {
+ integrator_shader_sort_counter_.alloc(sort_buckets);
+ integrator_shader_sort_counter_.zero_to_device();
+ integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] =
+ (int *)integrator_shader_sort_counter_.device_pointer;
- integrator_shader_sort_prefix_sum_.alloc(max_shaders);
+ integrator_shader_sort_prefix_sum_.alloc(sort_buckets);
integrator_shader_sort_prefix_sum_.zero_to_device();
+ }
- integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE] =
- (int *)integrator_shader_sort_counter_.device_pointer;
- integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
- (int *)integrator_shader_raytrace_sort_counter_.device_pointer;
- integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
- (int *)integrator_shader_mnee_sort_counter_.device_pointer;
+ if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
+ if (integrator_shader_raytrace_sort_counter_.size() < sort_buckets) {
+ integrator_shader_raytrace_sort_counter_.alloc(sort_buckets);
+ integrator_shader_raytrace_sort_counter_.zero_to_device();
+ integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_RAYTRACE] =
+ (int *)integrator_shader_raytrace_sort_counter_.device_pointer;
+ }
+ }
+
+ if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
+ if (integrator_shader_mnee_sort_counter_.size() < sort_buckets) {
+ integrator_shader_mnee_sort_counter_.alloc(sort_buckets);
+ integrator_shader_mnee_sort_counter_.zero_to_device();
+ integrator_state_gpu_.sort_key_counter[DEVICE_KERNEL_INTEGRATOR_SHADE_SURFACE_MNEE] =
+ (int *)integrator_shader_mnee_sort_counter_.device_pointer;
+ }
}
}
@@ -239,7 +261,7 @@ void PathTraceWorkGPU::init_execution()
/* Copy to device side struct in constant memory. */
device_->const_copy_to(
- "__integrator_state", &integrator_state_gpu_, sizeof(integrator_state_gpu_));
+ "integrator_state", &integrator_state_gpu_, sizeof(integrator_state_gpu_));
}
void PathTraceWorkGPU::render_samples(RenderStatistics &statistics,
@@ -333,8 +355,12 @@ void PathTraceWorkGPU::enqueue_reset()
queue_->enqueue(DEVICE_KERNEL_INTEGRATOR_RESET, max_num_paths_, args);
queue_->zero_to_device(integrator_queue_counter_);
queue_->zero_to_device(integrator_shader_sort_counter_);
- queue_->zero_to_device(integrator_shader_raytrace_sort_counter_);
- queue_->zero_to_device(integrator_shader_mnee_sort_counter_);
+ if (device_scene_->data.kernel_features & KERNEL_FEATURE_NODE_RAYTRACE) {
+ queue_->zero_to_device(integrator_shader_raytrace_sort_counter_);
+ }
+ if (device_scene_->data.kernel_features & KERNEL_FEATURE_MNEE) {
+ queue_->zero_to_device(integrator_shader_mnee_sort_counter_);
+ }
/* Tiles enqueue need to know number of active paths, which is based on this counter. Zero the
* counter on the host side because `zero_to_device()` is not doing it. */
@@ -486,9 +512,9 @@ void PathTraceWorkGPU::compute_sorted_queued_paths(DeviceKernel kernel,
/* Compute prefix sum of number of active paths with each shader. */
{
const int work_size = 1;
- int max_shaders = device_scene_->data.max_shaders;
+ int sort_buckets = device_scene_->data.max_shaders * num_sort_partitions_;
- DeviceKernelArguments args(&d_counter, &d_prefix_sum, &max_shaders);
+ DeviceKernelArguments args(&d_counter, &d_prefix_sum, &sort_buckets);
queue_->enqueue(DEVICE_KERNEL_PREFIX_SUM, work_size, args);
}
diff --git a/intern/cycles/integrator/path_trace_work_gpu.h b/intern/cycles/integrator/path_trace_work_gpu.h
index 4c10a221a30..a805258d1b5 100644
--- a/intern/cycles/integrator/path_trace_work_gpu.h
+++ b/intern/cycles/integrator/path_trace_work_gpu.h
@@ -156,6 +156,9 @@ class PathTraceWorkGPU : public PathTraceWork {
bool interop_use_checked_ = false;
bool interop_use_ = false;
+ /* Number of partitions to sort state indices into prior to material sort. */
+ int num_sort_partitions_;
+
/* Maximum number of concurrent integrator states. */
int max_num_paths_;