Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2021-09-20 18:59:20 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-09-21 15:55:54 +0300
commit08031197250aeecbaca3803254e6f25b8c7b7b37 (patch)
tree6fe7ab045f0dc0a423d6557c4073f34309ef4740 /intern/cycles/integrator/path_trace.cpp
parentfa6b1007bad065440950cd67deb16a04f368856f (diff)
Cycles: merge of cycles-x branch, a major update to the renderer
This includes much improved GPU rendering performance, viewport interactivity, new shadow catcher, revamped sampling settings, subsurface scattering anisotropy, new GPU volume sampling, improved PMJ sampling pattern, and more. Some features have also been removed or changed, breaking backwards compatibility. Including the removal of the OpenCL backend, for which alternatives are under development. Release notes and code docs: https://wiki.blender.org/wiki/Reference/Release_Notes/3.0/Cycles https://wiki.blender.org/wiki/Source/Render/Cycles Credits: * Sergey Sharybin * Brecht Van Lommel * Patrick Mours (OptiX backend) * Christophe Hery (subsurface scattering anisotropy) * William Leeson (PMJ sampling pattern) * Alaska (various fixes and tweaks) * Thomas Dinges (various fixes) For the full commit history, see the cycles-x branch. This squashes together all the changes since intermediate changes would often fail building or tests. Ref T87839, T87837, T87836 Fixes T90734, T89353, T80267, T80267, T77185, T69800
Diffstat (limited to 'intern/cycles/integrator/path_trace.cpp')
-rw-r--r--intern/cycles/integrator/path_trace.cpp1147
1 files changed, 1147 insertions, 0 deletions
diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp
new file mode 100644
index 00000000000..6c02316ac2b
--- /dev/null
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -0,0 +1,1147 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "integrator/path_trace.h"
+
+#include "device/cpu/device.h"
+#include "device/device.h"
+#include "integrator/pass_accessor.h"
+#include "integrator/render_scheduler.h"
+#include "render/gpu_display.h"
+#include "render/pass.h"
+#include "render/scene.h"
+#include "render/tile.h"
+#include "util/util_algorithm.h"
+#include "util/util_logging.h"
+#include "util/util_progress.h"
+#include "util/util_tbb.h"
+#include "util/util_time.h"
+
+CCL_NAMESPACE_BEGIN
+
+PathTrace::PathTrace(Device *device,
+ Film *film,
+ DeviceScene *device_scene,
+ RenderScheduler &render_scheduler,
+ TileManager &tile_manager)
+ : device_(device),
+ device_scene_(device_scene),
+ render_scheduler_(render_scheduler),
+ tile_manager_(tile_manager)
+{
+ DCHECK_NE(device_, nullptr);
+
+ {
+ vector<DeviceInfo> cpu_devices;
+ device_cpu_info(cpu_devices);
+
+ cpu_device_.reset(device_cpu_create(cpu_devices[0], device->stats, device->profiler));
+ }
+
+ /* Create path tracing work in advance, so that it can be reused by incremental sampling as much
+ * as possible. */
+ device_->foreach_device([&](Device *path_trace_device) {
+ path_trace_works_.emplace_back(PathTraceWork::create(
+ path_trace_device, film, device_scene, &render_cancel_.is_requested));
+ });
+
+ work_balance_infos_.resize(path_trace_works_.size());
+ work_balance_do_initial(work_balance_infos_);
+
+ render_scheduler.set_need_schedule_rebalance(path_trace_works_.size() > 1);
+}
+
+PathTrace::~PathTrace()
+{
+ /* Destroy any GPU resource which was used for graphics interop.
+ * Need to have access to the GPUDisplay as it is the only source of drawing context which is
+ * used for interop. */
+ if (gpu_display_) {
+ for (auto &&path_trace_work : path_trace_works_) {
+ path_trace_work->destroy_gpu_resources(gpu_display_.get());
+ }
+ }
+}
+
+void PathTrace::load_kernels()
+{
+ if (denoiser_) {
+ denoiser_->load_kernels(progress_);
+ }
+}
+
+void PathTrace::alloc_work_memory()
+{
+ for (auto &&path_trace_work : path_trace_works_) {
+ path_trace_work->alloc_work_memory();
+ }
+}
+
+bool PathTrace::ready_to_reset()
+{
+ /* The logic here is optimized for the best feedback in the viewport, which implies having a GPU
+ * display. Of there is no such display, the logic here will break. */
+ DCHECK(gpu_display_);
+
+ /* The logic here tries to provide behavior which feels the most interactive feel to artists.
+ * General idea is to be able to reset as quickly as possible, while still providing interactive
+ * feel.
+ *
+ * If the render result was ever drawn after previous reset, consider that reset is now possible.
+ * This way camera navigation gives the quickest feedback of rendered pixels, regardless of
+ * whether CPU or GPU drawing pipeline is used.
+ *
+ * Consider reset happening after redraw "slow" enough to not clog anything. This is a bit
+ * arbitrary, but seems to work very well with viewport navigation in Blender. */
+
+ if (did_draw_after_reset_) {
+ return true;
+ }
+
+ return false;
+}
+
+void PathTrace::reset(const BufferParams &full_params, const BufferParams &big_tile_params)
+{
+ if (big_tile_params_.modified(big_tile_params)) {
+ big_tile_params_ = big_tile_params;
+ render_state_.need_reset_params = true;
+ }
+
+ full_params_ = full_params;
+
+ /* NOTE: GPU display checks for buffer modification and avoids unnecessary re-allocation.
+ * It is requires to inform about reset whenever it happens, so that the redraw state tracking is
+ * properly updated. */
+ if (gpu_display_) {
+ gpu_display_->reset(full_params);
+ }
+
+ render_state_.has_denoised_result = false;
+ render_state_.tile_written = false;
+
+ did_draw_after_reset_ = false;
+}
+
+void PathTrace::device_free()
+{
+ /* Free render buffers used by the path trace work to reduce memory peak. */
+ BufferParams empty_params;
+ empty_params.pass_stride = 0;
+ empty_params.update_offset_stride();
+ for (auto &&path_trace_work : path_trace_works_) {
+ path_trace_work->get_render_buffers()->reset(empty_params);
+ }
+ render_state_.need_reset_params = true;
+}
+
+void PathTrace::set_progress(Progress *progress)
+{
+ progress_ = progress;
+}
+
+void PathTrace::render(const RenderWork &render_work)
+{
+ /* Indicate that rendering has started and that it can be requested to cancel. */
+ {
+ thread_scoped_lock lock(render_cancel_.mutex);
+ if (render_cancel_.is_requested) {
+ return;
+ }
+ render_cancel_.is_rendering = true;
+ }
+
+ render_pipeline(render_work);
+
+ /* Indicate that rendering has finished, making it so thread which requested `cancel()` can carry
+ * on. */
+ {
+ thread_scoped_lock lock(render_cancel_.mutex);
+ render_cancel_.is_rendering = false;
+ render_cancel_.condition.notify_one();
+ }
+}
+
+void PathTrace::render_pipeline(RenderWork render_work)
+{
+ /* NOTE: Only check for "instant" cancel here. Ther user-requested cancel via progress is
+ * checked in Session and the work in the event of cancel is to be finished here. */
+
+ render_scheduler_.set_need_schedule_cryptomatte(device_scene_->data.film.cryptomatte_passes !=
+ 0);
+
+ render_init_kernel_execution();
+
+ render_scheduler_.report_work_begin(render_work);
+
+ init_render_buffers(render_work);
+
+ rebalance(render_work);
+
+ path_trace(render_work);
+ if (render_cancel_.is_requested) {
+ return;
+ }
+
+ adaptive_sample(render_work);
+ if (render_cancel_.is_requested) {
+ return;
+ }
+
+ cryptomatte_postprocess(render_work);
+ if (render_cancel_.is_requested) {
+ return;
+ }
+
+ denoise(render_work);
+ if (render_cancel_.is_requested) {
+ return;
+ }
+
+ write_tile_buffer(render_work);
+ update_display(render_work);
+
+ progress_update_if_needed(render_work);
+
+ finalize_full_buffer_on_disk(render_work);
+}
+
+void PathTrace::render_init_kernel_execution()
+{
+ for (auto &&path_trace_work : path_trace_works_) {
+ path_trace_work->init_execution();
+ }
+}
+
+/* TODO(sergey): Look into `std::function` rather than using a template. Should not be a
+ * measurable performance impact at runtime, but will make compilation faster and binary somewhat
+ * smaller. */
+template<typename Callback>
+static void foreach_sliced_buffer_params(const vector<unique_ptr<PathTraceWork>> &path_trace_works,
+ const vector<WorkBalanceInfo> &work_balance_infos,
+ const BufferParams &buffer_params,
+ const Callback &callback)
+{
+ const int num_works = path_trace_works.size();
+ const int height = buffer_params.height;
+
+ int current_y = 0;
+ for (int i = 0; i < num_works; ++i) {
+ const double weight = work_balance_infos[i].weight;
+ const int slice_height = max(lround(height * weight), 1);
+
+ /* Disallow negative values to deal with situations when there are more compute devices than
+ * scanlines. */
+ const int remaining_height = max(0, height - current_y);
+
+ BufferParams slide_params = buffer_params;
+ slide_params.full_y = buffer_params.full_y + current_y;
+ if (i < num_works - 1) {
+ slide_params.height = min(slice_height, remaining_height);
+ }
+ else {
+ slide_params.height = remaining_height;
+ }
+
+ slide_params.update_offset_stride();
+
+ callback(path_trace_works[i].get(), slide_params);
+
+ current_y += slide_params.height;
+ }
+}
+
+void PathTrace::update_allocated_work_buffer_params()
+{
+ foreach_sliced_buffer_params(path_trace_works_,
+ work_balance_infos_,
+ big_tile_params_,
+ [](PathTraceWork *path_trace_work, const BufferParams &params) {
+ RenderBuffers *buffers = path_trace_work->get_render_buffers();
+ buffers->reset(params);
+ });
+}
+
+static BufferParams scale_buffer_params(const BufferParams &params, int resolution_divider)
+{
+ BufferParams scaled_params = params;
+
+ scaled_params.width = max(1, params.width / resolution_divider);
+ scaled_params.height = max(1, params.height / resolution_divider);
+ scaled_params.full_x = params.full_x / resolution_divider;
+ scaled_params.full_y = params.full_y / resolution_divider;
+ scaled_params.full_width = params.full_width / resolution_divider;
+ scaled_params.full_height = params.full_height / resolution_divider;
+
+ scaled_params.update_offset_stride();
+
+ return scaled_params;
+}
+
+void PathTrace::update_effective_work_buffer_params(const RenderWork &render_work)
+{
+ const int resolution_divider = render_work.resolution_divider;
+
+ const BufferParams scaled_full_params = scale_buffer_params(full_params_, resolution_divider);
+ const BufferParams scaled_big_tile_params = scale_buffer_params(big_tile_params_,
+ resolution_divider);
+
+ foreach_sliced_buffer_params(path_trace_works_,
+ work_balance_infos_,
+ scaled_big_tile_params,
+ [&](PathTraceWork *path_trace_work, const BufferParams params) {
+ path_trace_work->set_effective_buffer_params(
+ scaled_full_params, scaled_big_tile_params, params);
+ });
+
+ render_state_.effective_big_tile_params = scaled_big_tile_params;
+}
+
+void PathTrace::update_work_buffer_params_if_needed(const RenderWork &render_work)
+{
+ if (render_state_.need_reset_params) {
+ update_allocated_work_buffer_params();
+ }
+
+ if (render_state_.need_reset_params ||
+ render_state_.resolution_divider != render_work.resolution_divider) {
+ update_effective_work_buffer_params(render_work);
+ }
+
+ render_state_.resolution_divider = render_work.resolution_divider;
+ render_state_.need_reset_params = false;
+}
+
+void PathTrace::init_render_buffers(const RenderWork &render_work)
+{
+ update_work_buffer_params_if_needed(render_work);
+
+ /* Handle initialization scheduled by the render scheduler. */
+ if (render_work.init_render_buffers) {
+ tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ path_trace_work->zero_render_buffers();
+ });
+
+ tile_buffer_read();
+ }
+}
+
+void PathTrace::path_trace(RenderWork &render_work)
+{
+ if (!render_work.path_trace.num_samples) {
+ return;
+ }
+
+ VLOG(3) << "Will path trace " << render_work.path_trace.num_samples
+ << " samples at the resolution divider " << render_work.resolution_divider;
+
+ const double start_time = time_dt();
+
+ const int num_works = path_trace_works_.size();
+
+ tbb::parallel_for(0, num_works, [&](int i) {
+ const double work_start_time = time_dt();
+ const int num_samples = render_work.path_trace.num_samples;
+
+ PathTraceWork *path_trace_work = path_trace_works_[i].get();
+
+ PathTraceWork::RenderStatistics statistics;
+ path_trace_work->render_samples(statistics, render_work.path_trace.start_sample, num_samples);
+
+ const double work_time = time_dt() - work_start_time;
+ work_balance_infos_[i].time_spent += work_time;
+ work_balance_infos_[i].occupancy = statistics.occupancy;
+
+ VLOG(3) << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
+ << work_time / num_samples
+ << " seconds per sample), occupancy: " << statistics.occupancy;
+ });
+
+ float occupancy_accum = 0.0f;
+ for (const WorkBalanceInfo &balance_info : work_balance_infos_) {
+ occupancy_accum += balance_info.occupancy;
+ }
+ const float occupancy = occupancy_accum / num_works;
+ render_scheduler_.report_path_trace_occupancy(render_work, occupancy);
+
+ render_scheduler_.report_path_trace_time(
+ render_work, time_dt() - start_time, is_cancel_requested());
+}
+
+void PathTrace::adaptive_sample(RenderWork &render_work)
+{
+ if (!render_work.adaptive_sampling.filter) {
+ return;
+ }
+
+ bool did_reschedule_on_idle = false;
+
+ while (true) {
+ VLOG(3) << "Will filter adaptive stopping buffer, threshold "
+ << render_work.adaptive_sampling.threshold;
+ if (render_work.adaptive_sampling.reset) {
+ VLOG(3) << "Will re-calculate convergency flag for currently converged pixels.";
+ }
+
+ const double start_time = time_dt();
+
+ uint num_active_pixels = 0;
+ tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ const uint num_active_pixels_in_work =
+ path_trace_work->adaptive_sampling_converge_filter_count_active(
+ render_work.adaptive_sampling.threshold, render_work.adaptive_sampling.reset);
+ if (num_active_pixels_in_work) {
+ atomic_add_and_fetch_u(&num_active_pixels, num_active_pixels_in_work);
+ }
+ });
+
+ render_scheduler_.report_adaptive_filter_time(
+ render_work, time_dt() - start_time, is_cancel_requested());
+
+ if (num_active_pixels == 0) {
+ VLOG(3) << "All pixels converged.";
+ if (!render_scheduler_.render_work_reschedule_on_converge(render_work)) {
+ break;
+ }
+ VLOG(3) << "Continuing with lower threshold.";
+ }
+ else if (did_reschedule_on_idle) {
+ break;
+ }
+ else if (num_active_pixels < 128 * 128) {
+ /* NOTE: The hardcoded value of 128^2 is more of an empirical value to keep GPU busy so that
+ * there is no performance loss from the progressive noise floor feature.
+ *
+ * A better heuristic is possible here: for example, use maximum of 128^2 and percentage of
+ * the final resolution. */
+ if (!render_scheduler_.render_work_reschedule_on_idle(render_work)) {
+ VLOG(3) << "Rescheduling is not possible: final threshold is reached.";
+ break;
+ }
+ VLOG(3) << "Rescheduling lower threshold.";
+ did_reschedule_on_idle = true;
+ }
+ else {
+ break;
+ }
+ }
+}
+
+void PathTrace::set_denoiser_params(const DenoiseParams &params)
+{
+ render_scheduler_.set_denoiser_params(params);
+
+ if (!params.use) {
+ denoiser_.reset();
+ return;
+ }
+
+ if (denoiser_) {
+ const DenoiseParams old_denoiser_params = denoiser_->get_params();
+ if (old_denoiser_params.type == params.type) {
+ denoiser_->set_params(params);
+ return;
+ }
+ }
+
+ denoiser_ = Denoiser::create(device_, params);
+ denoiser_->is_cancelled_cb = [this]() { return is_cancel_requested(); };
+}
+
+void PathTrace::set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling)
+{
+ render_scheduler_.set_adaptive_sampling(adaptive_sampling);
+}
+
+void PathTrace::cryptomatte_postprocess(const RenderWork &render_work)
+{
+ if (!render_work.cryptomatte.postprocess) {
+ return;
+ }
+ VLOG(3) << "Perform cryptomatte work.";
+
+ tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ path_trace_work->cryptomatte_postproces();
+ });
+}
+
+void PathTrace::denoise(const RenderWork &render_work)
+{
+ if (!render_work.tile.denoise) {
+ return;
+ }
+
+ if (!denoiser_) {
+ /* Denoiser was not configured, so nothing to do here. */
+ return;
+ }
+
+ VLOG(3) << "Perform denoising work.";
+
+ const double start_time = time_dt();
+
+ RenderBuffers *buffer_to_denoise = nullptr;
+
+ unique_ptr<RenderBuffers> multi_device_buffers;
+ bool allow_inplace_modification = false;
+
+ if (path_trace_works_.size() == 1) {
+ buffer_to_denoise = path_trace_works_.front()->get_render_buffers();
+ }
+ else {
+ Device *denoiser_device = denoiser_->get_denoiser_device();
+ if (!denoiser_device) {
+ return;
+ }
+
+ multi_device_buffers = make_unique<RenderBuffers>(denoiser_device);
+ multi_device_buffers->reset(render_state_.effective_big_tile_params);
+
+ buffer_to_denoise = multi_device_buffers.get();
+
+ copy_to_render_buffers(multi_device_buffers.get());
+
+ allow_inplace_modification = true;
+ }
+
+ if (denoiser_->denoise_buffer(render_state_.effective_big_tile_params,
+ buffer_to_denoise,
+ get_num_samples_in_buffer(),
+ allow_inplace_modification)) {
+ render_state_.has_denoised_result = true;
+ }
+
+ if (multi_device_buffers) {
+ multi_device_buffers->copy_from_device();
+ tbb::parallel_for_each(
+ path_trace_works_, [&multi_device_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
+ path_trace_work->copy_from_denoised_render_buffers(multi_device_buffers.get());
+ });
+ }
+
+ render_scheduler_.report_denoise_time(render_work, time_dt() - start_time);
+}
+
+void PathTrace::set_gpu_display(unique_ptr<GPUDisplay> gpu_display)
+{
+ gpu_display_ = move(gpu_display);
+}
+
+void PathTrace::clear_gpu_display()
+{
+ if (gpu_display_) {
+ gpu_display_->clear();
+ }
+}
+
+void PathTrace::draw()
+{
+ if (!gpu_display_) {
+ return;
+ }
+
+ did_draw_after_reset_ |= gpu_display_->draw();
+}
+
+void PathTrace::update_display(const RenderWork &render_work)
+{
+ if (!render_work.display.update) {
+ return;
+ }
+
+ if (!gpu_display_ && !tile_buffer_update_cb) {
+ VLOG(3) << "Ignore display update.";
+ return;
+ }
+
+ if (full_params_.width == 0 || full_params_.height == 0) {
+ VLOG(3) << "Skipping GPUDisplay update due to 0 size of the render buffer.";
+ return;
+ }
+
+ const double start_time = time_dt();
+
+ if (tile_buffer_update_cb) {
+ VLOG(3) << "Invoke buffer update callback.";
+
+ tile_buffer_update_cb();
+ }
+
+ if (gpu_display_) {
+ VLOG(3) << "Perform copy to GPUDisplay work.";
+
+ const int resolution_divider = render_work.resolution_divider;
+ const int texture_width = max(1, full_params_.width / resolution_divider);
+ const int texture_height = max(1, full_params_.height / resolution_divider);
+ if (!gpu_display_->update_begin(texture_width, texture_height)) {
+ LOG(ERROR) << "Error beginning GPUDisplay update.";
+ return;
+ }
+
+ const PassMode pass_mode = render_work.display.use_denoised_result &&
+ render_state_.has_denoised_result ?
+ PassMode::DENOISED :
+ PassMode::NOISY;
+
+ /* TODO(sergey): When using multi-device rendering map the GPUDisplay once and copy data from
+ * all works in parallel. */
+ const int num_samples = get_num_samples_in_buffer();
+ for (auto &&path_trace_work : path_trace_works_) {
+ path_trace_work->copy_to_gpu_display(gpu_display_.get(), pass_mode, num_samples);
+ }
+
+ gpu_display_->update_end();
+ }
+
+ render_scheduler_.report_display_update_time(render_work, time_dt() - start_time);
+}
+
+void PathTrace::rebalance(const RenderWork &render_work)
+{
+ static const int kLogLevel = 3;
+
+ if (!render_work.rebalance) {
+ return;
+ }
+
+ const int num_works = path_trace_works_.size();
+
+ if (num_works == 1) {
+ VLOG(kLogLevel) << "Ignoring rebalance work due to single device render.";
+ return;
+ }
+
+ const double start_time = time_dt();
+
+ if (VLOG_IS_ON(kLogLevel)) {
+ VLOG(kLogLevel) << "Perform rebalance work.";
+ VLOG(kLogLevel) << "Per-device path tracing time (seconds):";
+ for (int i = 0; i < num_works; ++i) {
+ VLOG(kLogLevel) << path_trace_works_[i]->get_device()->info.description << ": "
+ << work_balance_infos_[i].time_spent;
+ }
+ }
+
+ const bool did_rebalance = work_balance_do_rebalance(work_balance_infos_);
+
+ if (VLOG_IS_ON(kLogLevel)) {
+ VLOG(kLogLevel) << "Calculated per-device weights for works:";
+ for (int i = 0; i < num_works; ++i) {
+ VLOG(kLogLevel) << path_trace_works_[i]->get_device()->info.description << ": "
+ << work_balance_infos_[i].weight;
+ }
+ }
+
+ if (!did_rebalance) {
+ VLOG(kLogLevel) << "Balance in path trace works did not change.";
+ render_scheduler_.report_rebalance_time(render_work, time_dt() - start_time, false);
+ return;
+ }
+
+ RenderBuffers big_tile_cpu_buffers(cpu_device_.get());
+ big_tile_cpu_buffers.reset(render_state_.effective_big_tile_params);
+
+ copy_to_render_buffers(&big_tile_cpu_buffers);
+
+ render_state_.need_reset_params = true;
+ update_work_buffer_params_if_needed(render_work);
+
+ copy_from_render_buffers(&big_tile_cpu_buffers);
+
+ render_scheduler_.report_rebalance_time(render_work, time_dt() - start_time, true);
+}
+
+void PathTrace::write_tile_buffer(const RenderWork &render_work)
+{
+ if (!render_work.tile.write) {
+ return;
+ }
+
+ VLOG(3) << "Write tile result.";
+
+ render_state_.tile_written = true;
+
+ const bool has_multiple_tiles = tile_manager_.has_multiple_tiles();
+
+ /* Write render tile result, but only if not using tiled rendering.
+ *
+ * Tiles are written to a file during rendering, and written to the software at the end
+ * of rendering (wither when all tiles are finished, or when rendering was requested to be
+ * cancelled).
+ *
+ * Important thing is: tile should be written to the software via callback only once. */
+ if (!has_multiple_tiles) {
+ VLOG(3) << "Write tile result via buffer write callback.";
+ tile_buffer_write();
+ }
+
+ /* Write tile to disk, so that the render work's render buffer can be re-used for the next tile.
+ */
+ if (has_multiple_tiles) {
+ VLOG(3) << "Write tile result into .";
+ tile_buffer_write_to_disk();
+ }
+}
+
+void PathTrace::finalize_full_buffer_on_disk(const RenderWork &render_work)
+{
+ if (!render_work.full.write) {
+ return;
+ }
+
+ VLOG(3) << "Handle full-frame render buffer work.";
+
+ if (!tile_manager_.has_written_tiles()) {
+ VLOG(3) << "No tiles on disk.";
+ return;
+ }
+
+ /* Make sure writing to the file is fully finished.
+ * This will include writing all possible missing tiles, ensuring validness of the file. */
+ tile_manager_.finish_write_tiles();
+
+ /* NOTE: The rest of full-frame post-processing (such as full-frame denoising) will be done after
+ * all scenes and layers are rendered by the Session (which happens after freeing Session memory,
+ * so that we never hold scene and full-frame buffer in memory at the same time). */
+}
+
+void PathTrace::cancel()
+{
+ thread_scoped_lock lock(render_cancel_.mutex);
+
+ render_cancel_.is_requested = true;
+
+ while (render_cancel_.is_rendering) {
+ render_cancel_.condition.wait(lock);
+ }
+
+ render_cancel_.is_requested = false;
+}
+
+int PathTrace::get_num_samples_in_buffer()
+{
+ return render_scheduler_.get_num_rendered_samples();
+}
+
+bool PathTrace::is_cancel_requested()
+{
+ if (render_cancel_.is_requested) {
+ return true;
+ }
+
+ if (progress_ != nullptr) {
+ if (progress_->get_cancel()) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void PathTrace::tile_buffer_write()
+{
+ if (!tile_buffer_write_cb) {
+ return;
+ }
+
+ tile_buffer_write_cb();
+}
+
+void PathTrace::tile_buffer_read()
+{
+ if (!tile_buffer_read_cb) {
+ return;
+ }
+
+ if (tile_buffer_read_cb()) {
+ tbb::parallel_for_each(path_trace_works_, [](unique_ptr<PathTraceWork> &path_trace_work) {
+ path_trace_work->copy_render_buffers_to_device();
+ });
+ }
+}
+
+void PathTrace::tile_buffer_write_to_disk()
+{
+ /* Sample count pass is required to support per-tile partial results stored in the file. */
+ DCHECK_NE(big_tile_params_.get_pass_offset(PASS_SAMPLE_COUNT), PASS_UNUSED);
+
+ const int num_rendered_samples = render_scheduler_.get_num_rendered_samples();
+
+ if (num_rendered_samples == 0) {
+ /* The tile has zero samples, no need to write it. */
+ return;
+ }
+
+ /* Get access to the CPU-side render buffers of the current big tile. */
+ RenderBuffers *buffers;
+ RenderBuffers big_tile_cpu_buffers(cpu_device_.get());
+
+ if (path_trace_works_.size() == 1) {
+ path_trace_works_[0]->copy_render_buffers_from_device();
+ buffers = path_trace_works_[0]->get_render_buffers();
+ }
+ else {
+ big_tile_cpu_buffers.reset(render_state_.effective_big_tile_params);
+ copy_to_render_buffers(&big_tile_cpu_buffers);
+
+ buffers = &big_tile_cpu_buffers;
+ }
+
+ if (!tile_manager_.write_tile(*buffers)) {
+ LOG(ERROR) << "Error writing tile to file.";
+ }
+}
+
+void PathTrace::progress_update_if_needed(const RenderWork &render_work)
+{
+ if (progress_ != nullptr) {
+ const int2 tile_size = get_render_tile_size();
+ const int num_samples_added = tile_size.x * tile_size.y * render_work.path_trace.num_samples;
+ const int current_sample = render_work.path_trace.start_sample +
+ render_work.path_trace.num_samples;
+ progress_->add_samples(num_samples_added, current_sample);
+ }
+
+ if (progress_update_cb) {
+ progress_update_cb();
+ }
+}
+
+void PathTrace::progress_set_status(const string &status, const string &substatus)
+{
+ if (progress_ != nullptr) {
+ progress_->set_status(status, substatus);
+ }
+}
+
+void PathTrace::copy_to_render_buffers(RenderBuffers *render_buffers)
+{
+ tbb::parallel_for_each(path_trace_works_,
+ [&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
+ path_trace_work->copy_to_render_buffers(render_buffers);
+ });
+ render_buffers->copy_to_device();
+}
+
+void PathTrace::copy_from_render_buffers(RenderBuffers *render_buffers)
+{
+ render_buffers->copy_from_device();
+ tbb::parallel_for_each(path_trace_works_,
+ [&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
+ path_trace_work->copy_from_render_buffers(render_buffers);
+ });
+}
+
+bool PathTrace::copy_render_tile_from_device()
+{
+ if (full_frame_state_.render_buffers) {
+ /* Full-frame buffer is always allocated on CPU. */
+ return true;
+ }
+
+ bool success = true;
+
+ tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ if (!success) {
+ return;
+ }
+ if (!path_trace_work->copy_render_buffers_from_device()) {
+ success = false;
+ }
+ });
+
+ return success;
+}
+
+static string get_layer_view_name(const RenderBuffers &buffers)
+{
+ string result;
+
+ if (buffers.params.layer.size()) {
+ result += string(buffers.params.layer);
+ }
+
+ if (buffers.params.view.size()) {
+ if (!result.empty()) {
+ result += ", ";
+ }
+ result += string(buffers.params.view);
+ }
+
+ return result;
+}
+
+void PathTrace::process_full_buffer_from_disk(string_view filename)
+{
+ VLOG(3) << "Processing full frame buffer file " << filename;
+
+ progress_set_status("Reading full buffer from disk");
+
+ RenderBuffers full_frame_buffers(cpu_device_.get());
+
+ DenoiseParams denoise_params;
+ if (!tile_manager_.read_full_buffer_from_disk(filename, &full_frame_buffers, &denoise_params)) {
+ LOG(ERROR) << "Error reading tiles from file.";
+ return;
+ }
+
+ const string layer_view_name = get_layer_view_name(full_frame_buffers);
+
+ render_state_.has_denoised_result = false;
+
+ if (denoise_params.use) {
+ progress_set_status(layer_view_name, "Denoising");
+
+ /* Re-use the denoiser as much as possible, avoiding possible device re-initialization.
+ *
+ * It will not conflict with the regular rendering as:
+ * - Rendering is supposed to be finished here.
+ * - The next rendering will go via Session's `run_update_for_next_iteration` which will
+ * ensure proper denoiser is used. */
+ set_denoiser_params(denoise_params);
+
+ /* Number of samples doesn't matter too much, since the sampels count pass will be used. */
+ denoiser_->denoise_buffer(full_frame_buffers.params, &full_frame_buffers, 0, false);
+
+ render_state_.has_denoised_result = true;
+ }
+
+ full_frame_state_.render_buffers = &full_frame_buffers;
+
+ progress_set_status(layer_view_name, "Finishing");
+
+ /* Write the full result pretending that there is a single tile.
+ * Requires some state change, but allows to use same communication API with the software. */
+ tile_buffer_write();
+
+ full_frame_state_.render_buffers = nullptr;
+}
+
+int PathTrace::get_num_render_tile_samples() const
+{
+ if (full_frame_state_.render_buffers) {
+ /* If the full-frame buffer is read from disk the number of samples is not used as there is a
+ * sample count pass for that in the buffer. Just avoid access to badly defined state of the
+ * path state. */
+ return 0;
+ }
+
+ return render_scheduler_.get_num_rendered_samples();
+}
+
+bool PathTrace::get_render_tile_pixels(const PassAccessor &pass_accessor,
+ const PassAccessor::Destination &destination)
+{
+ if (full_frame_state_.render_buffers) {
+ return pass_accessor.get_render_tile_pixels(full_frame_state_.render_buffers, destination);
+ }
+
+ bool success = true;
+
+ tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ if (!success) {
+ return;
+ }
+ if (!path_trace_work->get_render_tile_pixels(pass_accessor, destination)) {
+ success = false;
+ }
+ });
+
+ return success;
+}
+
+bool PathTrace::set_render_tile_pixels(PassAccessor &pass_accessor,
+ const PassAccessor::Source &source)
+{
+ bool success = true;
+
+ tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ if (!success) {
+ return;
+ }
+ if (!path_trace_work->set_render_tile_pixels(pass_accessor, source)) {
+ success = false;
+ }
+ });
+
+ return success;
+}
+
+int2 PathTrace::get_render_tile_size() const
+{
+ if (full_frame_state_.render_buffers) {
+ return make_int2(full_frame_state_.render_buffers->params.width,
+ full_frame_state_.render_buffers->params.height);
+ }
+
+ const Tile &tile = tile_manager_.get_current_tile();
+ return make_int2(tile.width, tile.height);
+}
+
+int2 PathTrace::get_render_tile_offset() const
+{
+ if (full_frame_state_.render_buffers) {
+ return make_int2(0, 0);
+ }
+
+ const Tile &tile = tile_manager_.get_current_tile();
+ return make_int2(tile.x, tile.y);
+}
+
+const BufferParams &PathTrace::get_render_tile_params() const
+{
+ if (full_frame_state_.render_buffers) {
+ return full_frame_state_.render_buffers->params;
+ }
+
+ return big_tile_params_;
+}
+
+bool PathTrace::has_denoised_result() const
+{
+ return render_state_.has_denoised_result;
+}
+
+/* --------------------------------------------------------------------
+ * Report generation.
+ */
+
+static const char *device_type_for_description(const DeviceType type)
+{
+ switch (type) {
+ case DEVICE_NONE:
+ return "None";
+
+ case DEVICE_CPU:
+ return "CPU";
+ case DEVICE_CUDA:
+ return "CUDA";
+ case DEVICE_OPTIX:
+ return "OptiX";
+ case DEVICE_DUMMY:
+ return "Dummy";
+ case DEVICE_MULTI:
+ return "Multi";
+ }
+
+ return "UNKNOWN";
+}
+
+/* Construct description of the device which will appear in the full report. */
+/* TODO(sergey): Consider making it more reusable utility. */
+static string full_device_info_description(const DeviceInfo &device_info)
+{
+ string full_description = device_info.description;
+
+ full_description += " (" + string(device_type_for_description(device_info.type)) + ")";
+
+ if (device_info.display_device) {
+ full_description += " (display)";
+ }
+
+ if (device_info.type == DEVICE_CPU) {
+ full_description += " (" + to_string(device_info.cpu_threads) + " threads)";
+ }
+
+ full_description += " [" + device_info.id + "]";
+
+ return full_description;
+}
+
+/* Construct string which will contain information about devices, possibly multiple of the devices.
+ *
+ * In the simple case the result looks like:
+ *
+ * Message: Full Device Description
+ *
+ * If there are multiple devices then the result looks like:
+ *
+ * Message: Full First Device Description
+ * Full Second Device Description
+ *
+ * Note that the newlines are placed in a way so that the result can be easily concatenated to the
+ * full report. */
+static string device_info_list_report(const string &message, const DeviceInfo &device_info)
+{
+ string result = "\n" + message + ": ";
+ const string pad(message.length() + 2, ' ');
+
+ if (device_info.multi_devices.empty()) {
+ result += full_device_info_description(device_info) + "\n";
+ return result;
+ }
+
+ bool is_first = true;
+ for (const DeviceInfo &sub_device_info : device_info.multi_devices) {
+ if (!is_first) {
+ result += pad;
+ }
+
+ result += full_device_info_description(sub_device_info) + "\n";
+
+ is_first = false;
+ }
+
+ return result;
+}
+
+static string path_trace_devices_report(const vector<unique_ptr<PathTraceWork>> &path_trace_works)
+{
+ DeviceInfo device_info;
+ device_info.type = DEVICE_MULTI;
+
+ for (auto &&path_trace_work : path_trace_works) {
+ device_info.multi_devices.push_back(path_trace_work->get_device()->info);
+ }
+
+ return device_info_list_report("Path tracing on", device_info);
+}
+
+static string denoiser_device_report(const Denoiser *denoiser)
+{
+ if (!denoiser) {
+ return "";
+ }
+
+ if (!denoiser->get_params().use) {
+ return "";
+ }
+
+ const Device *denoiser_device = denoiser->get_denoiser_device();
+ if (!denoiser_device) {
+ return "";
+ }
+
+ return device_info_list_report("Denoising on", denoiser_device->info);
+}
+
+string PathTrace::full_report() const
+{
+ string result = "\nFull path tracing report\n";
+
+ result += path_trace_devices_report(path_trace_works_);
+ result += denoiser_device_report(denoiser_.get());
+
+ /* Report from the render scheduler, which includes:
+ * - Render mode (interactive, offline, headless)
+ * - Adaptive sampling and denoiser parameters
+ * - Breakdown of timing. */
+ result += render_scheduler_.full_report();
+
+ return result;
+}
+
+CCL_NAMESPACE_END