Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles/integrator/path_trace.cpp')
-rw-r--r--intern/cycles/integrator/path_trace.cpp1144
1 files changed, 1144 insertions, 0 deletions
diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp
new file mode 100644
index 00000000000..b62a06aea43
--- /dev/null
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -0,0 +1,1144 @@
+/*
+ * Copyright 2011-2021 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "integrator/path_trace.h"
+
+#include "device/cpu/device.h"
+#include "device/device.h"
+#include "integrator/pass_accessor.h"
+#include "integrator/render_scheduler.h"
+#include "render/gpu_display.h"
+#include "render/pass.h"
+#include "render/scene.h"
+#include "render/tile.h"
+#include "util/util_algorithm.h"
+#include "util/util_logging.h"
+#include "util/util_progress.h"
+#include "util/util_tbb.h"
+#include "util/util_time.h"
+
+CCL_NAMESPACE_BEGIN
+
+PathTrace::PathTrace(Device *device,
+ Film *film,
+ DeviceScene *device_scene,
+ RenderScheduler &render_scheduler,
+ TileManager &tile_manager)
+ : device_(device),
+ device_scene_(device_scene),
+ render_scheduler_(render_scheduler),
+ tile_manager_(tile_manager)
+{
+ DCHECK_NE(device_, nullptr);
+
+ {
+ vector<DeviceInfo> cpu_devices;
+ device_cpu_info(cpu_devices);
+
+ cpu_device_.reset(device_cpu_create(cpu_devices[0], device->stats, device->profiler));
+ }
+
+ /* Create path tracing work in advance, so that it can be reused by incremental sampling as much
+ * as possible. */
+ device_->foreach_device([&](Device *path_trace_device) {
+ path_trace_works_.emplace_back(PathTraceWork::create(
+ path_trace_device, film, device_scene, &render_cancel_.is_requested));
+ });
+
+ work_balance_infos_.resize(path_trace_works_.size());
+ work_balance_do_initial(work_balance_infos_);
+
+ render_scheduler.set_need_schedule_rebalance(path_trace_works_.size() > 1);
+}
+
+PathTrace::~PathTrace()
+{
+ /* Destroy any GPU resource which was used for graphics interop.
+ * Need to have access to the GPUDisplay as it is the only source of drawing context which is
+ * used for interop. */
+ if (gpu_display_) {
+ for (auto &&path_trace_work : path_trace_works_) {
+ path_trace_work->destroy_gpu_resources(gpu_display_.get());
+ }
+ }
+}
+
+void PathTrace::load_kernels()
+{
+ if (denoiser_) {
+ denoiser_->load_kernels(progress_);
+ }
+}
+
+void PathTrace::alloc_work_memory()
+{
+ for (auto &&path_trace_work : path_trace_works_) {
+ path_trace_work->alloc_work_memory();
+ }
+}
+
+bool PathTrace::ready_to_reset()
+{
+ /* The logic here is optimized for the best feedback in the viewport, which implies having a GPU
+ * display. Of there is no such display, the logic here will break. */
+ DCHECK(gpu_display_);
+
+ /* The logic here tries to provide behavior which feels the most interactive feel to artists.
+ * General idea is to be able to reset as quickly as possible, while still providing interactive
+ * feel.
+ *
+ * If the render result was ever drawn after previous reset, consider that reset is now possible.
+ * This way camera navigation gives the quickest feedback of rendered pixels, regardless of
+ * whether CPU or GPU drawing pipeline is used.
+ *
+ * Consider reset happening after redraw "slow" enough to not clog anything. This is a bit
+ * arbitrary, but seems to work very well with viewport navigation in Blender. */
+
+ if (did_draw_after_reset_) {
+ return true;
+ }
+
+ return false;
+}
+
+void PathTrace::reset(const BufferParams &full_params, const BufferParams &big_tile_params)
+{
+ if (big_tile_params_.modified(big_tile_params)) {
+ big_tile_params_ = big_tile_params;
+ render_state_.need_reset_params = true;
+ }
+
+ full_params_ = full_params;
+
+ /* NOTE: GPU display checks for buffer modification and avoids unnecessary re-allocation.
+ * It is requires to inform about reset whenever it happens, so that the redraw state tracking is
+ * properly updated. */
+ if (gpu_display_) {
+ gpu_display_->reset(full_params);
+ }
+
+ render_state_.has_denoised_result = false;
+ render_state_.tile_written = false;
+
+ did_draw_after_reset_ = false;
+}
+
+void PathTrace::device_free()
+{
+ /* Free render buffers used by the path trace work to reduce memory peak. */
+ BufferParams empty_params;
+ empty_params.pass_stride = 0;
+ empty_params.update_offset_stride();
+ for (auto &&path_trace_work : path_trace_works_) {
+ path_trace_work->get_render_buffers()->reset(empty_params);
+ }
+ render_state_.need_reset_params = true;
+}
+
+void PathTrace::set_progress(Progress *progress)
+{
+ progress_ = progress;
+}
+
+void PathTrace::render(const RenderWork &render_work)
+{
+ /* Indicate that rendering has started and that it can be requested to cancel. */
+ {
+ thread_scoped_lock lock(render_cancel_.mutex);
+ if (render_cancel_.is_requested) {
+ return;
+ }
+ render_cancel_.is_rendering = true;
+ }
+
+ render_pipeline(render_work);
+
+ /* Indicate that rendering has finished, making it so thread which requested `cancel()` can carry
+ * on. */
+ {
+ thread_scoped_lock lock(render_cancel_.mutex);
+ render_cancel_.is_rendering = false;
+ render_cancel_.condition.notify_one();
+ }
+}
+
+void PathTrace::render_pipeline(RenderWork render_work)
+{
+ /* NOTE: Only check for "instant" cancel here. The user-requested cancel via progress is
+ * checked in Session and the work in the event of cancel is to be finished here. */
+
+ render_scheduler_.set_need_schedule_cryptomatte(device_scene_->data.film.cryptomatte_passes !=
+ 0);
+
+ render_init_kernel_execution();
+
+ render_scheduler_.report_work_begin(render_work);
+
+ init_render_buffers(render_work);
+
+ rebalance(render_work);
+
+ path_trace(render_work);
+ if (render_cancel_.is_requested) {
+ return;
+ }
+
+ adaptive_sample(render_work);
+ if (render_cancel_.is_requested) {
+ return;
+ }
+
+ cryptomatte_postprocess(render_work);
+ if (render_cancel_.is_requested) {
+ return;
+ }
+
+ denoise(render_work);
+ if (render_cancel_.is_requested) {
+ return;
+ }
+
+ write_tile_buffer(render_work);
+ update_display(render_work);
+
+ progress_update_if_needed(render_work);
+
+ finalize_full_buffer_on_disk(render_work);
+}
+
+void PathTrace::render_init_kernel_execution()
+{
+ for (auto &&path_trace_work : path_trace_works_) {
+ path_trace_work->init_execution();
+ }
+}
+
+/* TODO(sergey): Look into `std::function` rather than using a template. Should not be a
+ * measurable performance impact at runtime, but will make compilation faster and binary somewhat
+ * smaller. */
+template<typename Callback>
+static void foreach_sliced_buffer_params(const vector<unique_ptr<PathTraceWork>> &path_trace_works,
+ const vector<WorkBalanceInfo> &work_balance_infos,
+ const BufferParams &buffer_params,
+ const Callback &callback)
+{
+ const int num_works = path_trace_works.size();
+ const int height = buffer_params.height;
+
+ int current_y = 0;
+ for (int i = 0; i < num_works; ++i) {
+ const double weight = work_balance_infos[i].weight;
+ const int slice_height = max(lround(height * weight), 1);
+
+ /* Disallow negative values to deal with situations when there are more compute devices than
+ * scanlines. */
+ const int remaining_height = max(0, height - current_y);
+
+ BufferParams slide_params = buffer_params;
+ slide_params.full_y = buffer_params.full_y + current_y;
+ if (i < num_works - 1) {
+ slide_params.height = min(slice_height, remaining_height);
+ }
+ else {
+ slide_params.height = remaining_height;
+ }
+
+ slide_params.update_offset_stride();
+
+ callback(path_trace_works[i].get(), slide_params);
+
+ current_y += slide_params.height;
+ }
+}
+
+void PathTrace::update_allocated_work_buffer_params()
+{
+ foreach_sliced_buffer_params(path_trace_works_,
+ work_balance_infos_,
+ big_tile_params_,
+ [](PathTraceWork *path_trace_work, const BufferParams &params) {
+ RenderBuffers *buffers = path_trace_work->get_render_buffers();
+ buffers->reset(params);
+ });
+}
+
+static BufferParams scale_buffer_params(const BufferParams &params, int resolution_divider)
+{
+ BufferParams scaled_params = params;
+
+ scaled_params.width = max(1, params.width / resolution_divider);
+ scaled_params.height = max(1, params.height / resolution_divider);
+ scaled_params.full_x = params.full_x / resolution_divider;
+ scaled_params.full_y = params.full_y / resolution_divider;
+ scaled_params.full_width = params.full_width / resolution_divider;
+ scaled_params.full_height = params.full_height / resolution_divider;
+
+ scaled_params.update_offset_stride();
+
+ return scaled_params;
+}
+
+void PathTrace::update_effective_work_buffer_params(const RenderWork &render_work)
+{
+ const int resolution_divider = render_work.resolution_divider;
+
+ const BufferParams scaled_full_params = scale_buffer_params(full_params_, resolution_divider);
+ const BufferParams scaled_big_tile_params = scale_buffer_params(big_tile_params_,
+ resolution_divider);
+
+ foreach_sliced_buffer_params(path_trace_works_,
+ work_balance_infos_,
+ scaled_big_tile_params,
+ [&](PathTraceWork *path_trace_work, const BufferParams params) {
+ path_trace_work->set_effective_buffer_params(
+ scaled_full_params, scaled_big_tile_params, params);
+ });
+
+ render_state_.effective_big_tile_params = scaled_big_tile_params;
+}
+
+void PathTrace::update_work_buffer_params_if_needed(const RenderWork &render_work)
+{
+ if (render_state_.need_reset_params) {
+ update_allocated_work_buffer_params();
+ }
+
+ if (render_state_.need_reset_params ||
+ render_state_.resolution_divider != render_work.resolution_divider) {
+ update_effective_work_buffer_params(render_work);
+ }
+
+ render_state_.resolution_divider = render_work.resolution_divider;
+ render_state_.need_reset_params = false;
+}
+
+void PathTrace::init_render_buffers(const RenderWork &render_work)
+{
+ update_work_buffer_params_if_needed(render_work);
+
+ /* Handle initialization scheduled by the render scheduler. */
+ if (render_work.init_render_buffers) {
+ tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ path_trace_work->zero_render_buffers();
+ });
+
+ tile_buffer_read();
+ }
+}
+
+void PathTrace::path_trace(RenderWork &render_work)
+{
+ if (!render_work.path_trace.num_samples) {
+ return;
+ }
+
+ VLOG(3) << "Will path trace " << render_work.path_trace.num_samples
+ << " samples at the resolution divider " << render_work.resolution_divider;
+
+ const double start_time = time_dt();
+
+ const int num_works = path_trace_works_.size();
+
+ tbb::parallel_for(0, num_works, [&](int i) {
+ const double work_start_time = time_dt();
+ const int num_samples = render_work.path_trace.num_samples;
+
+ PathTraceWork *path_trace_work = path_trace_works_[i].get();
+
+ PathTraceWork::RenderStatistics statistics;
+ path_trace_work->render_samples(statistics, render_work.path_trace.start_sample, num_samples);
+
+ const double work_time = time_dt() - work_start_time;
+ work_balance_infos_[i].time_spent += work_time;
+ work_balance_infos_[i].occupancy = statistics.occupancy;
+
+ VLOG(3) << "Rendered " << num_samples << " samples in " << work_time << " seconds ("
+ << work_time / num_samples
+ << " seconds per sample), occupancy: " << statistics.occupancy;
+ });
+
+ float occupancy_accum = 0.0f;
+ for (const WorkBalanceInfo &balance_info : work_balance_infos_) {
+ occupancy_accum += balance_info.occupancy;
+ }
+ const float occupancy = occupancy_accum / num_works;
+ render_scheduler_.report_path_trace_occupancy(render_work, occupancy);
+
+ render_scheduler_.report_path_trace_time(
+ render_work, time_dt() - start_time, is_cancel_requested());
+}
+
+void PathTrace::adaptive_sample(RenderWork &render_work)
+{
+ if (!render_work.adaptive_sampling.filter) {
+ return;
+ }
+
+ bool did_reschedule_on_idle = false;
+
+ while (true) {
+ VLOG(3) << "Will filter adaptive stopping buffer, threshold "
+ << render_work.adaptive_sampling.threshold;
+ if (render_work.adaptive_sampling.reset) {
+ VLOG(3) << "Will re-calculate convergency flag for currently converged pixels.";
+ }
+
+ const double start_time = time_dt();
+
+ uint num_active_pixels = 0;
+ tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ const uint num_active_pixels_in_work =
+ path_trace_work->adaptive_sampling_converge_filter_count_active(
+ render_work.adaptive_sampling.threshold, render_work.adaptive_sampling.reset);
+ if (num_active_pixels_in_work) {
+ atomic_add_and_fetch_u(&num_active_pixels, num_active_pixels_in_work);
+ }
+ });
+
+ render_scheduler_.report_adaptive_filter_time(
+ render_work, time_dt() - start_time, is_cancel_requested());
+
+ if (num_active_pixels == 0) {
+ VLOG(3) << "All pixels converged.";
+ if (!render_scheduler_.render_work_reschedule_on_converge(render_work)) {
+ break;
+ }
+ VLOG(3) << "Continuing with lower threshold.";
+ }
+ else if (did_reschedule_on_idle) {
+ break;
+ }
+ else if (num_active_pixels < 128 * 128) {
+ /* NOTE: The hardcoded value of 128^2 is more of an empirical value to keep GPU busy so that
+ * there is no performance loss from the progressive noise floor feature.
+ *
+ * A better heuristic is possible here: for example, use maximum of 128^2 and percentage of
+ * the final resolution. */
+ if (!render_scheduler_.render_work_reschedule_on_idle(render_work)) {
+ VLOG(3) << "Rescheduling is not possible: final threshold is reached.";
+ break;
+ }
+ VLOG(3) << "Rescheduling lower threshold.";
+ did_reschedule_on_idle = true;
+ }
+ else {
+ break;
+ }
+ }
+}
+
+void PathTrace::set_denoiser_params(const DenoiseParams &params)
+{
+ render_scheduler_.set_denoiser_params(params);
+
+ if (!params.use) {
+ denoiser_.reset();
+ return;
+ }
+
+ if (denoiser_) {
+ const DenoiseParams old_denoiser_params = denoiser_->get_params();
+ if (old_denoiser_params.type == params.type) {
+ denoiser_->set_params(params);
+ return;
+ }
+ }
+
+ denoiser_ = Denoiser::create(device_, params);
+ denoiser_->is_cancelled_cb = [this]() { return is_cancel_requested(); };
+}
+
+void PathTrace::set_adaptive_sampling(const AdaptiveSampling &adaptive_sampling)
+{
+ render_scheduler_.set_adaptive_sampling(adaptive_sampling);
+}
+
+void PathTrace::cryptomatte_postprocess(const RenderWork &render_work)
+{
+ if (!render_work.cryptomatte.postprocess) {
+ return;
+ }
+ VLOG(3) << "Perform cryptomatte work.";
+
+ tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ path_trace_work->cryptomatte_postproces();
+ });
+}
+
+void PathTrace::denoise(const RenderWork &render_work)
+{
+ if (!render_work.tile.denoise) {
+ return;
+ }
+
+ if (!denoiser_) {
+ /* Denoiser was not configured, so nothing to do here. */
+ return;
+ }
+
+ VLOG(3) << "Perform denoising work.";
+
+ const double start_time = time_dt();
+
+ RenderBuffers *buffer_to_denoise = nullptr;
+
+ unique_ptr<RenderBuffers> multi_device_buffers;
+ bool allow_inplace_modification = false;
+
+ if (path_trace_works_.size() == 1) {
+ buffer_to_denoise = path_trace_works_.front()->get_render_buffers();
+ }
+ else {
+ Device *denoiser_device = denoiser_->get_denoiser_device();
+ if (!denoiser_device) {
+ return;
+ }
+
+ multi_device_buffers = make_unique<RenderBuffers>(denoiser_device);
+ multi_device_buffers->reset(render_state_.effective_big_tile_params);
+
+ buffer_to_denoise = multi_device_buffers.get();
+
+ copy_to_render_buffers(multi_device_buffers.get());
+
+ allow_inplace_modification = true;
+ }
+
+ if (denoiser_->denoise_buffer(render_state_.effective_big_tile_params,
+ buffer_to_denoise,
+ get_num_samples_in_buffer(),
+ allow_inplace_modification)) {
+ render_state_.has_denoised_result = true;
+ }
+
+ if (multi_device_buffers) {
+ multi_device_buffers->copy_from_device();
+ tbb::parallel_for_each(
+ path_trace_works_, [&multi_device_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
+ path_trace_work->copy_from_denoised_render_buffers(multi_device_buffers.get());
+ });
+ }
+
+ render_scheduler_.report_denoise_time(render_work, time_dt() - start_time);
+}
+
+void PathTrace::set_gpu_display(unique_ptr<GPUDisplay> gpu_display)
+{
+ gpu_display_ = move(gpu_display);
+}
+
+void PathTrace::clear_gpu_display()
+{
+ if (gpu_display_) {
+ gpu_display_->clear();
+ }
+}
+
+void PathTrace::draw()
+{
+ if (!gpu_display_) {
+ return;
+ }
+
+ did_draw_after_reset_ |= gpu_display_->draw();
+}
+
+void PathTrace::update_display(const RenderWork &render_work)
+{
+ if (!render_work.display.update) {
+ return;
+ }
+
+ if (!gpu_display_ && !tile_buffer_update_cb) {
+ VLOG(3) << "Ignore display update.";
+ return;
+ }
+
+ if (full_params_.width == 0 || full_params_.height == 0) {
+ VLOG(3) << "Skipping GPUDisplay update due to 0 size of the render buffer.";
+ return;
+ }
+
+ const double start_time = time_dt();
+
+ if (tile_buffer_update_cb) {
+ VLOG(3) << "Invoke buffer update callback.";
+
+ tile_buffer_update_cb();
+ }
+
+ if (gpu_display_) {
+ VLOG(3) << "Perform copy to GPUDisplay work.";
+
+ const int resolution_divider = render_work.resolution_divider;
+ const int texture_width = max(1, full_params_.width / resolution_divider);
+ const int texture_height = max(1, full_params_.height / resolution_divider);
+ if (!gpu_display_->update_begin(texture_width, texture_height)) {
+ LOG(ERROR) << "Error beginning GPUDisplay update.";
+ return;
+ }
+
+ const PassMode pass_mode = render_work.display.use_denoised_result &&
+ render_state_.has_denoised_result ?
+ PassMode::DENOISED :
+ PassMode::NOISY;
+
+ /* TODO(sergey): When using multi-device rendering map the GPUDisplay once and copy data from
+ * all works in parallel. */
+ const int num_samples = get_num_samples_in_buffer();
+ for (auto &&path_trace_work : path_trace_works_) {
+ path_trace_work->copy_to_gpu_display(gpu_display_.get(), pass_mode, num_samples);
+ }
+
+ gpu_display_->update_end();
+ }
+
+ render_scheduler_.report_display_update_time(render_work, time_dt() - start_time);
+}
+
+void PathTrace::rebalance(const RenderWork &render_work)
+{
+ static const int kLogLevel = 3;
+
+ if (!render_work.rebalance) {
+ return;
+ }
+
+ const int num_works = path_trace_works_.size();
+
+ if (num_works == 1) {
+ VLOG(kLogLevel) << "Ignoring rebalance work due to single device render.";
+ return;
+ }
+
+ const double start_time = time_dt();
+
+ if (VLOG_IS_ON(kLogLevel)) {
+ VLOG(kLogLevel) << "Perform rebalance work.";
+ VLOG(kLogLevel) << "Per-device path tracing time (seconds):";
+ for (int i = 0; i < num_works; ++i) {
+ VLOG(kLogLevel) << path_trace_works_[i]->get_device()->info.description << ": "
+ << work_balance_infos_[i].time_spent;
+ }
+ }
+
+ const bool did_rebalance = work_balance_do_rebalance(work_balance_infos_);
+
+ if (VLOG_IS_ON(kLogLevel)) {
+ VLOG(kLogLevel) << "Calculated per-device weights for works:";
+ for (int i = 0; i < num_works; ++i) {
+ VLOG(kLogLevel) << path_trace_works_[i]->get_device()->info.description << ": "
+ << work_balance_infos_[i].weight;
+ }
+ }
+
+ if (!did_rebalance) {
+ VLOG(kLogLevel) << "Balance in path trace works did not change.";
+ render_scheduler_.report_rebalance_time(render_work, time_dt() - start_time, false);
+ return;
+ }
+
+ RenderBuffers big_tile_cpu_buffers(cpu_device_.get());
+ big_tile_cpu_buffers.reset(render_state_.effective_big_tile_params);
+
+ copy_to_render_buffers(&big_tile_cpu_buffers);
+
+ render_state_.need_reset_params = true;
+ update_work_buffer_params_if_needed(render_work);
+
+ copy_from_render_buffers(&big_tile_cpu_buffers);
+
+ render_scheduler_.report_rebalance_time(render_work, time_dt() - start_time, true);
+}
+
+void PathTrace::write_tile_buffer(const RenderWork &render_work)
+{
+ if (!render_work.tile.write) {
+ return;
+ }
+
+ VLOG(3) << "Write tile result.";
+
+ render_state_.tile_written = true;
+
+ const bool has_multiple_tiles = tile_manager_.has_multiple_tiles();
+
+ /* Write render tile result, but only if not using tiled rendering.
+ *
+ * Tiles are written to a file during rendering, and written to the software at the end
+ * of rendering (wither when all tiles are finished, or when rendering was requested to be
+ * canceled).
+ *
+ * Important thing is: tile should be written to the software via callback only once. */
+ if (!has_multiple_tiles) {
+ VLOG(3) << "Write tile result via buffer write callback.";
+ tile_buffer_write();
+ }
+
+ /* Write tile to disk, so that the render work's render buffer can be re-used for the next tile.
+ */
+ if (has_multiple_tiles) {
+ VLOG(3) << "Write tile result into .";
+ tile_buffer_write_to_disk();
+ }
+}
+
+void PathTrace::finalize_full_buffer_on_disk(const RenderWork &render_work)
+{
+ if (!render_work.full.write) {
+ return;
+ }
+
+ VLOG(3) << "Handle full-frame render buffer work.";
+
+ if (!tile_manager_.has_written_tiles()) {
+ VLOG(3) << "No tiles on disk.";
+ return;
+ }
+
+ /* Make sure writing to the file is fully finished.
+ * This will include writing all possible missing tiles, ensuring validness of the file. */
+ tile_manager_.finish_write_tiles();
+
+ /* NOTE: The rest of full-frame post-processing (such as full-frame denoising) will be done after
+ * all scenes and layers are rendered by the Session (which happens after freeing Session memory,
+ * so that we never hold scene and full-frame buffer in memory at the same time). */
+}
+
+void PathTrace::cancel()
+{
+ thread_scoped_lock lock(render_cancel_.mutex);
+
+ render_cancel_.is_requested = true;
+
+ while (render_cancel_.is_rendering) {
+ render_cancel_.condition.wait(lock);
+ }
+
+ render_cancel_.is_requested = false;
+}
+
+int PathTrace::get_num_samples_in_buffer()
+{
+ return render_scheduler_.get_num_rendered_samples();
+}
+
+bool PathTrace::is_cancel_requested()
+{
+ if (render_cancel_.is_requested) {
+ return true;
+ }
+
+ if (progress_ != nullptr) {
+ if (progress_->get_cancel()) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void PathTrace::tile_buffer_write()
+{
+ if (!tile_buffer_write_cb) {
+ return;
+ }
+
+ tile_buffer_write_cb();
+}
+
+void PathTrace::tile_buffer_read()
+{
+ if (!tile_buffer_read_cb) {
+ return;
+ }
+
+ if (tile_buffer_read_cb()) {
+ tbb::parallel_for_each(path_trace_works_, [](unique_ptr<PathTraceWork> &path_trace_work) {
+ path_trace_work->copy_render_buffers_to_device();
+ });
+ }
+}
+
+void PathTrace::tile_buffer_write_to_disk()
+{
+ /* Sample count pass is required to support per-tile partial results stored in the file. */
+ DCHECK_NE(big_tile_params_.get_pass_offset(PASS_SAMPLE_COUNT), PASS_UNUSED);
+
+ const int num_rendered_samples = render_scheduler_.get_num_rendered_samples();
+
+ if (num_rendered_samples == 0) {
+ /* The tile has zero samples, no need to write it. */
+ return;
+ }
+
+ /* Get access to the CPU-side render buffers of the current big tile. */
+ RenderBuffers *buffers;
+ RenderBuffers big_tile_cpu_buffers(cpu_device_.get());
+
+ if (path_trace_works_.size() == 1) {
+ path_trace_works_[0]->copy_render_buffers_from_device();
+ buffers = path_trace_works_[0]->get_render_buffers();
+ }
+ else {
+ big_tile_cpu_buffers.reset(render_state_.effective_big_tile_params);
+ copy_to_render_buffers(&big_tile_cpu_buffers);
+
+ buffers = &big_tile_cpu_buffers;
+ }
+
+ if (!tile_manager_.write_tile(*buffers)) {
+ LOG(ERROR) << "Error writing tile to file.";
+ }
+}
+
+void PathTrace::progress_update_if_needed(const RenderWork &render_work)
+{
+ if (progress_ != nullptr) {
+ const int2 tile_size = get_render_tile_size();
+ const int num_samples_added = tile_size.x * tile_size.y * render_work.path_trace.num_samples;
+ const int current_sample = render_work.path_trace.start_sample +
+ render_work.path_trace.num_samples;
+ progress_->add_samples(num_samples_added, current_sample);
+ }
+
+ if (progress_update_cb) {
+ progress_update_cb();
+ }
+}
+
+void PathTrace::progress_set_status(const string &status, const string &substatus)
+{
+ if (progress_ != nullptr) {
+ progress_->set_status(status, substatus);
+ }
+}
+
+void PathTrace::copy_to_render_buffers(RenderBuffers *render_buffers)
+{
+ tbb::parallel_for_each(path_trace_works_,
+ [&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
+ path_trace_work->copy_to_render_buffers(render_buffers);
+ });
+ render_buffers->copy_to_device();
+}
+
+void PathTrace::copy_from_render_buffers(RenderBuffers *render_buffers)
+{
+ render_buffers->copy_from_device();
+ tbb::parallel_for_each(path_trace_works_,
+ [&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
+ path_trace_work->copy_from_render_buffers(render_buffers);
+ });
+}
+
+bool PathTrace::copy_render_tile_from_device()
+{
+ if (full_frame_state_.render_buffers) {
+ /* Full-frame buffer is always allocated on CPU. */
+ return true;
+ }
+
+ bool success = true;
+
+ tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ if (!success) {
+ return;
+ }
+ if (!path_trace_work->copy_render_buffers_from_device()) {
+ success = false;
+ }
+ });
+
+ return success;
+}
+
+static string get_layer_view_name(const RenderBuffers &buffers)
+{
+ string result;
+
+ if (buffers.params.layer.size()) {
+ result += string(buffers.params.layer);
+ }
+
+ if (buffers.params.view.size()) {
+ if (!result.empty()) {
+ result += ", ";
+ }
+ result += string(buffers.params.view);
+ }
+
+ return result;
+}
+
+void PathTrace::process_full_buffer_from_disk(string_view filename)
+{
+ VLOG(3) << "Processing full frame buffer file " << filename;
+
+ progress_set_status("Reading full buffer from disk");
+
+ RenderBuffers full_frame_buffers(cpu_device_.get());
+
+ DenoiseParams denoise_params;
+ if (!tile_manager_.read_full_buffer_from_disk(filename, &full_frame_buffers, &denoise_params)) {
+ LOG(ERROR) << "Error reading tiles from file.";
+ return;
+ }
+
+ const string layer_view_name = get_layer_view_name(full_frame_buffers);
+
+ render_state_.has_denoised_result = false;
+
+ if (denoise_params.use) {
+ progress_set_status(layer_view_name, "Denoising");
+
+ /* Re-use the denoiser as much as possible, avoiding possible device re-initialization.
+ *
+ * It will not conflict with the regular rendering as:
+ * - Rendering is supposed to be finished here.
+ * - The next rendering will go via Session's `run_update_for_next_iteration` which will
+ * ensure proper denoiser is used. */
+ set_denoiser_params(denoise_params);
+
+ /* Number of samples doesn't matter too much, since the samples count pass will be used. */
+ denoiser_->denoise_buffer(full_frame_buffers.params, &full_frame_buffers, 0, false);
+
+ render_state_.has_denoised_result = true;
+ }
+
+ full_frame_state_.render_buffers = &full_frame_buffers;
+
+ progress_set_status(layer_view_name, "Finishing");
+
+ /* Write the full result pretending that there is a single tile.
+ * Requires some state change, but allows to use same communication API with the software. */
+ tile_buffer_write();
+
+ full_frame_state_.render_buffers = nullptr;
+}
+
+int PathTrace::get_num_render_tile_samples() const
+{
+ if (full_frame_state_.render_buffers) {
+ return full_frame_state_.render_buffers->params.samples;
+ }
+
+ return render_scheduler_.get_num_rendered_samples();
+}
+
+bool PathTrace::get_render_tile_pixels(const PassAccessor &pass_accessor,
+ const PassAccessor::Destination &destination)
+{
+ if (full_frame_state_.render_buffers) {
+ return pass_accessor.get_render_tile_pixels(full_frame_state_.render_buffers, destination);
+ }
+
+ bool success = true;
+
+ tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ if (!success) {
+ return;
+ }
+ if (!path_trace_work->get_render_tile_pixels(pass_accessor, destination)) {
+ success = false;
+ }
+ });
+
+ return success;
+}
+
+bool PathTrace::set_render_tile_pixels(PassAccessor &pass_accessor,
+ const PassAccessor::Source &source)
+{
+ bool success = true;
+
+ tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ if (!success) {
+ return;
+ }
+ if (!path_trace_work->set_render_tile_pixels(pass_accessor, source)) {
+ success = false;
+ }
+ });
+
+ return success;
+}
+
+int2 PathTrace::get_render_tile_size() const
+{
+ if (full_frame_state_.render_buffers) {
+ return make_int2(full_frame_state_.render_buffers->params.width,
+ full_frame_state_.render_buffers->params.height);
+ }
+
+ const Tile &tile = tile_manager_.get_current_tile();
+ return make_int2(tile.width, tile.height);
+}
+
+int2 PathTrace::get_render_tile_offset() const
+{
+ if (full_frame_state_.render_buffers) {
+ return make_int2(0, 0);
+ }
+
+ const Tile &tile = tile_manager_.get_current_tile();
+ return make_int2(tile.x, tile.y);
+}
+
+const BufferParams &PathTrace::get_render_tile_params() const
+{
+ if (full_frame_state_.render_buffers) {
+ return full_frame_state_.render_buffers->params;
+ }
+
+ return big_tile_params_;
+}
+
+bool PathTrace::has_denoised_result() const
+{
+ return render_state_.has_denoised_result;
+}
+
+/* --------------------------------------------------------------------
+ * Report generation.
+ */
+
+static const char *device_type_for_description(const DeviceType type)
+{
+ switch (type) {
+ case DEVICE_NONE:
+ return "None";
+
+ case DEVICE_CPU:
+ return "CPU";
+ case DEVICE_CUDA:
+ return "CUDA";
+ case DEVICE_OPTIX:
+ return "OptiX";
+ case DEVICE_DUMMY:
+ return "Dummy";
+ case DEVICE_MULTI:
+ return "Multi";
+ }
+
+ return "UNKNOWN";
+}
+
+/* Construct description of the device which will appear in the full report. */
+/* TODO(sergey): Consider making it more reusable utility. */
+static string full_device_info_description(const DeviceInfo &device_info)
+{
+ string full_description = device_info.description;
+
+ full_description += " (" + string(device_type_for_description(device_info.type)) + ")";
+
+ if (device_info.display_device) {
+ full_description += " (display)";
+ }
+
+ if (device_info.type == DEVICE_CPU) {
+ full_description += " (" + to_string(device_info.cpu_threads) + " threads)";
+ }
+
+ full_description += " [" + device_info.id + "]";
+
+ return full_description;
+}
+
+/* Construct string which will contain information about devices, possibly multiple of the devices.
+ *
+ * In the simple case the result looks like:
+ *
+ * Message: Full Device Description
+ *
+ * If there are multiple devices then the result looks like:
+ *
+ * Message: Full First Device Description
+ * Full Second Device Description
+ *
+ * Note that the newlines are placed in a way so that the result can be easily concatenated to the
+ * full report. */
+static string device_info_list_report(const string &message, const DeviceInfo &device_info)
+{
+ string result = "\n" + message + ": ";
+ const string pad(message.length() + 2, ' ');
+
+ if (device_info.multi_devices.empty()) {
+ result += full_device_info_description(device_info) + "\n";
+ return result;
+ }
+
+ bool is_first = true;
+ for (const DeviceInfo &sub_device_info : device_info.multi_devices) {
+ if (!is_first) {
+ result += pad;
+ }
+
+ result += full_device_info_description(sub_device_info) + "\n";
+
+ is_first = false;
+ }
+
+ return result;
+}
+
+static string path_trace_devices_report(const vector<unique_ptr<PathTraceWork>> &path_trace_works)
+{
+ DeviceInfo device_info;
+ device_info.type = DEVICE_MULTI;
+
+ for (auto &&path_trace_work : path_trace_works) {
+ device_info.multi_devices.push_back(path_trace_work->get_device()->info);
+ }
+
+ return device_info_list_report("Path tracing on", device_info);
+}
+
+static string denoiser_device_report(const Denoiser *denoiser)
+{
+ if (!denoiser) {
+ return "";
+ }
+
+ if (!denoiser->get_params().use) {
+ return "";
+ }
+
+ const Device *denoiser_device = denoiser->get_denoiser_device();
+ if (!denoiser_device) {
+ return "";
+ }
+
+ return device_info_list_report("Denoising on", denoiser_device->info);
+}
+
+string PathTrace::full_report() const
+{
+ string result = "\nFull path tracing report\n";
+
+ result += path_trace_devices_report(path_trace_works_);
+ result += denoiser_device_report(denoiser_.get());
+
+ /* Report from the render scheduler, which includes:
+ * - Render mode (interactive, offline, headless)
+ * - Adaptive sampling and denoiser parameters
+ * - Breakdown of timing. */
+ result += render_scheduler_.full_report();
+
+ return result;
+}
+
+CCL_NAMESPACE_END