From d7d40745fa09061a3117bd3669c5a46bbf611eae Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Sun, 24 Oct 2021 14:19:19 +0200 Subject: Cycles: changes to source code folders structure * Split render/ into scene/ and session/. The scene/ folder now contains the scene and its nodes. The session/ folder contains the render session and associated data structures like drivers and render buffers. * Move top level kernel headers into new folders kernel/camera/, kernel/film/, kernel/light/, kernel/sample/, kernel/util/ * Move integrator related kernel headers into kernel/integrator/ * Move OSL shaders from kernel/shaders/ to kernel/osl/shaders/ For patches and branches, git merge and rebase should be able to detect the renames and move over code to the right file. --- intern/cycles/session/CMakeLists.txt | 48 ++ intern/cycles/session/buffers.cpp | 384 ++++++++++++++ intern/cycles/session/buffers.h | 199 +++++++ intern/cycles/session/denoising.cpp | 934 +++++++++++++++++++++++++++++++++ intern/cycles/session/denoising.h | 216 ++++++++ intern/cycles/session/display_driver.h | 131 +++++ intern/cycles/session/merge.cpp | 516 ++++++++++++++++++ intern/cycles/session/merge.h | 43 ++ intern/cycles/session/output_driver.h | 82 +++ intern/cycles/session/session.cpp | 624 ++++++++++++++++++++++ intern/cycles/session/session.h | 229 ++++++++ intern/cycles/session/tile.cpp | 629 ++++++++++++++++++++++ intern/cycles/session/tile.h | 182 +++++++ 13 files changed, 4217 insertions(+) create mode 100644 intern/cycles/session/CMakeLists.txt create mode 100644 intern/cycles/session/buffers.cpp create mode 100644 intern/cycles/session/buffers.h create mode 100644 intern/cycles/session/denoising.cpp create mode 100644 intern/cycles/session/denoising.h create mode 100644 intern/cycles/session/display_driver.h create mode 100644 intern/cycles/session/merge.cpp create mode 100644 intern/cycles/session/merge.h create mode 100644 intern/cycles/session/output_driver.h create mode 100644 intern/cycles/session/session.cpp create mode 100644 intern/cycles/session/session.h create mode 100644 intern/cycles/session/tile.cpp create mode 100644 intern/cycles/session/tile.h (limited to 'intern/cycles/session') diff --git a/intern/cycles/session/CMakeLists.txt b/intern/cycles/session/CMakeLists.txt new file mode 100644 index 00000000000..f441def128e --- /dev/null +++ b/intern/cycles/session/CMakeLists.txt @@ -0,0 +1,48 @@ +# Copyright 2011-2021 Blender Foundation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set(INC + .. +) + +set(SRC + buffers.cpp + denoising.cpp + merge.cpp + session.cpp + tile.cpp +) + +set(SRC_HEADERS + buffers.h + display_driver.h + denoising.h + merge.h + output_driver.h + session.h + tile.h +) + +set(LIB + cycles_device + cycles_integrator + cycles_util +) + +include_directories(${INC}) +include_directories(SYSTEM ${INC_SYS}) + +add_definitions(${GL_DEFINITIONS}) + +cycles_add_library(cycles_session "${LIB}" ${SRC} ${SRC_HEADERS}) diff --git a/intern/cycles/session/buffers.cpp b/intern/cycles/session/buffers.cpp new file mode 100644 index 00000000000..439c0f826ea --- /dev/null +++ b/intern/cycles/session/buffers.cpp @@ -0,0 +1,384 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "device/device.h" +#include "session/buffers.h" + +#include "util/util_foreach.h" +#include "util/util_hash.h" +#include "util/util_math.h" +#include "util/util_time.h" +#include "util/util_types.h" + +CCL_NAMESPACE_BEGIN + +/* -------------------------------------------------------------------- + * Convert part information to an index of `BufferParams::pass_offset_`. + */ + +static int pass_type_mode_to_index(PassType pass_type, PassMode mode) +{ + int index = static_cast(pass_type) * 2; + + if (mode == PassMode::DENOISED) { + ++index; + } + + return index; +} + +static int pass_to_index(const BufferPass &pass) +{ + return pass_type_mode_to_index(pass.type, pass.mode); +} + +/* -------------------------------------------------------------------- + * Buffer pass. + */ + +NODE_DEFINE(BufferPass) +{ + NodeType *type = NodeType::add("buffer_pass", create); + + const NodeEnum *pass_type_enum = Pass::get_type_enum(); + const NodeEnum *pass_mode_enum = Pass::get_mode_enum(); + + SOCKET_ENUM(type, "Type", *pass_type_enum, PASS_COMBINED); + SOCKET_ENUM(mode, "Mode", *pass_mode_enum, static_cast(PassMode::DENOISED)); + SOCKET_STRING(name, "Name", ustring()); + SOCKET_BOOLEAN(include_albedo, "Include Albedo", false); + + SOCKET_INT(offset, "Offset", -1); + + return type; +} + +BufferPass::BufferPass() : Node(get_node_type()) +{ +} + +BufferPass::BufferPass(const Pass *scene_pass) + : Node(get_node_type()), + type(scene_pass->get_type()), + mode(scene_pass->get_mode()), + name(scene_pass->get_name()), + include_albedo(scene_pass->get_include_albedo()) +{ +} + +PassInfo BufferPass::get_info() const +{ + return Pass::get_info(type, include_albedo); +} + +/* -------------------------------------------------------------------- + * Buffer Params. + */ + +NODE_DEFINE(BufferParams) +{ + NodeType *type = NodeType::add("buffer_params", create); + + SOCKET_INT(width, "Width", 0); + SOCKET_INT(height, "Height", 0); + + SOCKET_INT(window_x, "Window X", 0); + SOCKET_INT(window_y, "Window Y", 0); + SOCKET_INT(window_width, "Window Width", 0); + SOCKET_INT(window_height, "Window Height", 0); + + SOCKET_INT(full_x, "Full X", 0); + SOCKET_INT(full_y, "Full Y", 0); + SOCKET_INT(full_width, "Full Width", 0); + SOCKET_INT(full_height, "Full Height", 0); + + SOCKET_STRING(layer, "Layer", ustring()); + SOCKET_STRING(view, "View", ustring()); + SOCKET_INT(samples, "Samples", 0); + SOCKET_FLOAT(exposure, "Exposure", 1.0f); + SOCKET_BOOLEAN(use_approximate_shadow_catcher, "Use Approximate Shadow Catcher", false); + SOCKET_BOOLEAN(use_transparent_background, "Transparent Background", false); + + /* Notes: + * - Skip passes since they do not follow typical container socket definition. + * Might look into covering those as a socket in the future. + * + * - Skip offset, stride, and pass stride since those can be delivered from the passes and + * rest of the sockets. */ + + return type; +} + +BufferParams::BufferParams() : Node(get_node_type()) +{ + reset_pass_offset(); +} + +void BufferParams::update_passes() +{ + update_offset_stride(); + reset_pass_offset(); + + pass_stride = 0; + for (const BufferPass &pass : passes) { + if (pass.offset != PASS_UNUSED) { + const int index = pass_to_index(pass); + if (pass_offset_[index] == PASS_UNUSED) { + pass_offset_[index] = pass_stride; + } + + pass_stride += pass.get_info().num_components; + } + } +} + +void BufferParams::update_passes(const vector &scene_passes) +{ + passes.clear(); + + pass_stride = 0; + for (const Pass *scene_pass : scene_passes) { + BufferPass buffer_pass(scene_pass); + + if (scene_pass->is_written()) { + buffer_pass.offset = pass_stride; + pass_stride += scene_pass->get_info().num_components; + } + else { + buffer_pass.offset = PASS_UNUSED; + } + + passes.emplace_back(std::move(buffer_pass)); + } + + update_passes(); +} + +void BufferParams::reset_pass_offset() +{ + for (int i = 0; i < kNumPassOffsets; ++i) { + pass_offset_[i] = PASS_UNUSED; + } +} + +int BufferParams::get_pass_offset(PassType pass_type, PassMode mode) const +{ + if (pass_type == PASS_NONE || pass_type == PASS_UNUSED) { + return PASS_UNUSED; + } + + const int index = pass_type_mode_to_index(pass_type, mode); + return pass_offset_[index]; +} + +const BufferPass *BufferParams::find_pass(string_view name) const +{ + for (const BufferPass &pass : passes) { + if (pass.name == name) { + return &pass; + } + } + + return nullptr; +} + +const BufferPass *BufferParams::find_pass(PassType type, PassMode mode) const +{ + for (const BufferPass &pass : passes) { + if (pass.type == type && pass.mode == mode) { + return &pass; + } + } + + return nullptr; +} + +const BufferPass *BufferParams::get_actual_display_pass(PassType type, PassMode mode) const +{ + const BufferPass *pass = find_pass(type, mode); + return get_actual_display_pass(pass); +} + +const BufferPass *BufferParams::get_actual_display_pass(const BufferPass *pass) const +{ + if (!pass) { + return nullptr; + } + + if (pass->type == PASS_COMBINED) { + const BufferPass *shadow_catcher_matte_pass = find_pass(PASS_SHADOW_CATCHER_MATTE, pass->mode); + if (shadow_catcher_matte_pass) { + pass = shadow_catcher_matte_pass; + } + } + + return pass; +} + +void BufferParams::update_offset_stride() +{ + offset = -(full_x + full_y * width); + stride = width; +} + +bool BufferParams::modified(const BufferParams &other) const +{ + if (width != other.width || height != other.height) { + return true; + } + + if (full_x != other.full_x || full_y != other.full_y || full_width != other.full_width || + full_height != other.full_height) { + return true; + } + + if (window_x != other.window_x || window_y != other.window_y || + window_width != other.window_width || window_height != other.window_height) { + return true; + } + + if (offset != other.offset || stride != other.stride || pass_stride != other.pass_stride) { + return true; + } + + if (layer != other.layer || view != other.view) { + return false; + } + + if (exposure != other.exposure || + use_approximate_shadow_catcher != other.use_approximate_shadow_catcher || + use_transparent_background != other.use_transparent_background) { + return true; + } + + return !(passes == other.passes); +} + +/* -------------------------------------------------------------------- + * Render Buffers. + */ + +RenderBuffers::RenderBuffers(Device *device) : buffer(device, "RenderBuffers", MEM_READ_WRITE) +{ +} + +RenderBuffers::~RenderBuffers() +{ + buffer.free(); +} + +void RenderBuffers::reset(const BufferParams ¶ms_) +{ + DCHECK(params_.pass_stride != -1); + + params = params_; + + /* re-allocate buffer */ + buffer.alloc(params.width * params.pass_stride, params.height); +} + +void RenderBuffers::zero() +{ + buffer.zero_to_device(); +} + +bool RenderBuffers::copy_from_device() +{ + DCHECK(params.pass_stride != -1); + + if (!buffer.device_pointer) + return false; + + buffer.copy_from_device(0, params.width * params.pass_stride, params.height); + + return true; +} + +void RenderBuffers::copy_to_device() +{ + buffer.copy_to_device(); +} + +void render_buffers_host_copy_denoised(RenderBuffers *dst, + const BufferParams &dst_params, + const RenderBuffers *src, + const BufferParams &src_params, + const size_t src_offset) +{ + DCHECK_EQ(dst_params.width, src_params.width); + /* TODO(sergey): More sanity checks to avoid buffer overrun. */ + + /* Create a map of pass offsets to be copied. + * Assume offsets are different to allow copying passes between buffers with different set of + * passes. */ + + struct { + int dst_offset; + int src_offset; + } pass_offsets[PASS_NUM]; + + int num_passes = 0; + + for (int i = 0; i < PASS_NUM; ++i) { + const PassType pass_type = static_cast(i); + + const int dst_pass_offset = dst_params.get_pass_offset(pass_type, PassMode::DENOISED); + if (dst_pass_offset == PASS_UNUSED) { + continue; + } + + const int src_pass_offset = src_params.get_pass_offset(pass_type, PassMode::DENOISED); + if (src_pass_offset == PASS_UNUSED) { + continue; + } + + pass_offsets[num_passes].dst_offset = dst_pass_offset; + pass_offsets[num_passes].src_offset = src_pass_offset; + ++num_passes; + } + + /* Copy passes. */ + /* TODO(sergey): Make it more reusable, allowing implement copy of noisy passes. */ + + const int64_t dst_width = dst_params.width; + const int64_t dst_height = dst_params.height; + const int64_t dst_pass_stride = dst_params.pass_stride; + const int64_t dst_num_pixels = dst_width * dst_height; + + const int64_t src_pass_stride = src_params.pass_stride; + const int64_t src_offset_in_floats = src_offset * src_pass_stride; + + const float *src_pixel = src->buffer.data() + src_offset_in_floats; + float *dst_pixel = dst->buffer.data(); + + for (int i = 0; i < dst_num_pixels; + ++i, src_pixel += src_pass_stride, dst_pixel += dst_pass_stride) { + for (int pass_offset_idx = 0; pass_offset_idx < num_passes; ++pass_offset_idx) { + const int dst_pass_offset = pass_offsets[pass_offset_idx].dst_offset; + const int src_pass_offset = pass_offsets[pass_offset_idx].src_offset; + + /* TODO(sergey): Support non-RGBA passes. */ + dst_pixel[dst_pass_offset + 0] = src_pixel[src_pass_offset + 0]; + dst_pixel[dst_pass_offset + 1] = src_pixel[src_pass_offset + 1]; + dst_pixel[dst_pass_offset + 2] = src_pixel[src_pass_offset + 2]; + dst_pixel[dst_pass_offset + 3] = src_pixel[src_pass_offset + 3]; + } + } +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/session/buffers.h b/intern/cycles/session/buffers.h new file mode 100644 index 00000000000..4c261430bb6 --- /dev/null +++ b/intern/cycles/session/buffers.h @@ -0,0 +1,199 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __BUFFERS_H__ +#define __BUFFERS_H__ + +#include "device/device_memory.h" +#include "graph/node.h" +#include "scene/pass.h" + +#include "kernel/kernel_types.h" + +#include "util/util_half.h" +#include "util/util_string.h" +#include "util/util_thread.h" +#include "util/util_types.h" + +CCL_NAMESPACE_BEGIN + +class Device; +struct DeviceDrawParams; +struct float4; + +/* NOTE: Is not a real scene node. Using Node API for ease of (de)serialization. */ +class BufferPass : public Node { + public: + NODE_DECLARE + + PassType type = PASS_NONE; + PassMode mode = PassMode::NOISY; + ustring name; + bool include_albedo = false; + + int offset = -1; + + BufferPass(); + explicit BufferPass(const Pass *scene_pass); + + BufferPass(BufferPass &&other) noexcept = default; + BufferPass(const BufferPass &other) = default; + + BufferPass &operator=(BufferPass &&other) = default; + BufferPass &operator=(const BufferPass &other) = default; + + ~BufferPass() = default; + + PassInfo get_info() const; + + inline bool operator==(const BufferPass &other) const + { + return type == other.type && mode == other.mode && name == other.name && + include_albedo == other.include_albedo && offset == other.offset; + } + inline bool operator!=(const BufferPass &other) const + { + return !(*this == other); + } +}; + +/* Buffer Parameters + * Size of render buffer and how it fits in the full image (border render). */ + +/* NOTE: Is not a real scene node. Using Node API for ease of (de)serialization. */ +class BufferParams : public Node { + public: + NODE_DECLARE + + /* Width/height of the physical buffer. */ + int width = 0; + int height = 0; + + /* Windows defines which part of the buffers is visible. The part outside of the window is + * considered an "overscan". + * + * Window X and Y are relative to the position of the buffer in the full buffer. */ + int window_x = 0; + int window_y = 0; + int window_width = 0; + int window_height = 0; + + /* Offset into and width/height of the full buffer. */ + int full_x = 0; + int full_y = 0; + int full_width = 0; + int full_height = 0; + + /* Runtime fields, only valid after `update_passes()` or `update_offset_stride()`. */ + int offset = -1, stride = -1; + + /* Runtime fields, only valid after `update_passes()`. */ + int pass_stride = -1; + + /* Properties which are used for accessing buffer pixels outside of scene graph. */ + vector passes; + ustring layer; + ustring view; + int samples = 0; + float exposure = 1.0f; + bool use_approximate_shadow_catcher = false; + bool use_transparent_background = false; + + BufferParams(); + + BufferParams(BufferParams &&other) noexcept = default; + BufferParams(const BufferParams &other) = default; + + BufferParams &operator=(BufferParams &&other) = default; + BufferParams &operator=(const BufferParams &other) = default; + + ~BufferParams() = default; + + /* Pre-calculate all fields which depends on the passes. + * + * When the scene passes are given, the buffer passes will be created from them and stored in + * this params, and then params are updated for those passes. + * The `update_passes()` without parameters updates offsets and strides which are stored outside + * of the passes. */ + void update_passes(); + void update_passes(const vector &scene_passes); + + /* Returns PASS_UNUSED if there is no such pass in the buffer. */ + int get_pass_offset(PassType type, PassMode mode = PassMode::NOISY) const; + + /* Returns nullptr if pass with given name does not exist. */ + const BufferPass *find_pass(string_view name) const; + const BufferPass *find_pass(PassType type, PassMode mode = PassMode::NOISY) const; + + /* Get display pass from its name. + * Will do special logic to replace combined pass with shadow catcher matte. */ + const BufferPass *get_actual_display_pass(PassType type, PassMode mode = PassMode::NOISY) const; + const BufferPass *get_actual_display_pass(const BufferPass *pass) const; + + void update_offset_stride(); + + bool modified(const BufferParams &other) const; + + protected: + void reset_pass_offset(); + + /* Multiplied by 2 to be able to store noisy and denoised pass types. */ + static constexpr int kNumPassOffsets = PASS_NUM * 2; + + /* Indexed by an index derived from pass type and mode, indicates offset of the corresponding + * pass in the buffer. + * If there are multiple passes with same type and mode contains lowest offset of all of them. */ + int pass_offset_[kNumPassOffsets]; +}; + +/* Render Buffers */ + +class RenderBuffers { + public: + /* buffer parameters */ + BufferParams params; + + /* float buffer */ + device_vector buffer; + + explicit RenderBuffers(Device *device); + ~RenderBuffers(); + + void reset(const BufferParams ¶ms); + void zero(); + + bool copy_from_device(); + void copy_to_device(); +}; + +/* Copy denoised passes form source to destination. + * + * Buffer parameters are provided explicitly, allowing to copy pixels between render buffers which + * content corresponds to a render result at a non-unit resolution divider. + * + * `src_offset` allows to offset source pixel index which is used when a fraction of the source + * buffer is to be copied. + * + * Copy happens of the number of pixels in the destination. */ +void render_buffers_host_copy_denoised(RenderBuffers *dst, + const BufferParams &dst_params, + const RenderBuffers *src, + const BufferParams &src_params, + const size_t src_offset = 0); + +CCL_NAMESPACE_END + +#endif /* __BUFFERS_H__ */ diff --git a/intern/cycles/session/denoising.cpp b/intern/cycles/session/denoising.cpp new file mode 100644 index 00000000000..21df068092a --- /dev/null +++ b/intern/cycles/session/denoising.cpp @@ -0,0 +1,934 @@ +/* + * Copyright 2011-2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "session/denoising.h" + +#if 0 + +# include "kernel/filter/filter_defines.h" + +# include "util/util_foreach.h" +# include "util/util_map.h" +# include "util/util_system.h" +# include "util/util_task.h" +# include "util/util_time.h" + +# include + +CCL_NAMESPACE_BEGIN + +/* Utility Functions */ + +static void print_progress(int num, int total, int frame, int num_frames) +{ + const char *label = "Denoise Frame "; + int cols = system_console_width(); + + cols -= strlen(label); + + int len = 1; + for (int x = total; x > 9; x /= 10) { + len++; + } + + int bars = cols - 2 * len - 6; + + printf("\r%s", label); + + if (num_frames > 1) { + int frame_len = 1; + for (int x = num_frames - 1; x > 9; x /= 10) { + frame_len++; + } + bars -= frame_len + 2; + printf("%*d ", frame_len, frame); + } + + int v = int(float(num) * bars / total); + printf("["); + for (int i = 0; i < v; i++) { + printf("="); + } + if (v < bars) { + printf(">"); + } + for (int i = v + 1; i < bars; i++) { + printf(" "); + } + printf(string_printf("] %%%dd / %d", len, total).c_str(), num); + fflush(stdout); +} + +/* Splits in at its last dot, setting suffix to the part after the dot and in to the part before + * it. Returns whether a dot was found. */ +static bool split_last_dot(string &in, string &suffix) +{ + size_t pos = in.rfind("."); + if (pos == string::npos) { + return false; + } + suffix = in.substr(pos + 1); + in = in.substr(0, pos); + return true; +} + +/* Separate channel names as generated by Blender. + * If views is true: + * Inputs are expected in the form RenderLayer.Pass.View.Channel, sets renderlayer to + * "RenderLayer.View" Otherwise: Inputs are expected in the form RenderLayer.Pass.Channel */ +static bool parse_channel_name( + string name, string &renderlayer, string &pass, string &channel, bool multiview_channels) +{ + if (!split_last_dot(name, channel)) { + return false; + } + string view; + if (multiview_channels && !split_last_dot(name, view)) { + return false; + } + if (!split_last_dot(name, pass)) { + return false; + } + renderlayer = name; + + if (multiview_channels) { + renderlayer += "." + view; + } + + return true; +} + +/* Channel Mapping */ + +struct ChannelMapping { + int channel; + string name; +}; + +static void fill_mapping(vector &map, int pos, string name, string channels) +{ + for (const char *chan = channels.c_str(); *chan; chan++) { + map.push_back({pos++, name + "." + *chan}); + } +} + +static const int INPUT_NUM_CHANNELS = 15; +static const int INPUT_DENOISING_DEPTH = 0; +static const int INPUT_DENOISING_NORMAL = 1; +static const int INPUT_DENOISING_SHADOWING = 4; +static const int INPUT_DENOISING_ALBEDO = 5; +static const int INPUT_NOISY_IMAGE = 8; +static const int INPUT_DENOISING_VARIANCE = 11; +static const int INPUT_DENOISING_INTENSITY = 14; +static vector input_channels() +{ + vector map; + fill_mapping(map, INPUT_DENOISING_DEPTH, "Denoising Depth", "Z"); + fill_mapping(map, INPUT_DENOISING_NORMAL, "Denoising Normal", "XYZ"); + fill_mapping(map, INPUT_DENOISING_SHADOWING, "Denoising Shadowing", "X"); + fill_mapping(map, INPUT_DENOISING_ALBEDO, "Denoising Albedo", "RGB"); + fill_mapping(map, INPUT_NOISY_IMAGE, "Noisy Image", "RGB"); + fill_mapping(map, INPUT_DENOISING_VARIANCE, "Denoising Variance", "RGB"); + fill_mapping(map, INPUT_DENOISING_INTENSITY, "Denoising Intensity", "X"); + return map; +} + +static const int OUTPUT_NUM_CHANNELS = 3; +static vector output_channels() +{ + vector map; + fill_mapping(map, 0, "Combined", "RGB"); + return map; +} + +/* Renderlayer Handling */ + +bool DenoiseImageLayer::detect_denoising_channels() +{ + /* Map device input to image channels. */ + input_to_image_channel.clear(); + input_to_image_channel.resize(INPUT_NUM_CHANNELS, -1); + + foreach (const ChannelMapping &mapping, input_channels()) { + vector::iterator i = find(channels.begin(), channels.end(), mapping.name); + if (i == channels.end()) { + return false; + } + + size_t input_channel = mapping.channel; + size_t layer_channel = i - channels.begin(); + input_to_image_channel[input_channel] = layer_to_image_channel[layer_channel]; + } + + /* Map device output to image channels. */ + output_to_image_channel.clear(); + output_to_image_channel.resize(OUTPUT_NUM_CHANNELS, -1); + + foreach (const ChannelMapping &mapping, output_channels()) { + vector::iterator i = find(channels.begin(), channels.end(), mapping.name); + if (i == channels.end()) { + return false; + } + + size_t output_channel = mapping.channel; + size_t layer_channel = i - channels.begin(); + output_to_image_channel[output_channel] = layer_to_image_channel[layer_channel]; + } + + /* Check that all buffer channels are correctly set. */ + for (int i = 0; i < INPUT_NUM_CHANNELS; i++) { + assert(input_to_image_channel[i] >= 0); + } + for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) { + assert(output_to_image_channel[i] >= 0); + } + + return true; +} + +bool DenoiseImageLayer::match_channels(int neighbor, + const std::vector &channelnames, + const std::vector &neighbor_channelnames) +{ + neighbor_input_to_image_channel.resize(neighbor + 1); + vector &mapping = neighbor_input_to_image_channel[neighbor]; + + assert(mapping.size() == 0); + mapping.resize(input_to_image_channel.size(), -1); + + for (int i = 0; i < input_to_image_channel.size(); i++) { + const string &channel = channelnames[input_to_image_channel[i]]; + std::vector::const_iterator frame_channel = find( + neighbor_channelnames.begin(), neighbor_channelnames.end(), channel); + + if (frame_channel == neighbor_channelnames.end()) { + return false; + } + + mapping[i] = frame_channel - neighbor_channelnames.begin(); + } + + return true; +} + +/* Denoise Task */ + +DenoiseTask::DenoiseTask(Device *device, + DenoiserPipeline *denoiser, + int frame, + const vector &neighbor_frames) + : denoiser(denoiser), + device(device), + frame(frame), + neighbor_frames(neighbor_frames), + current_layer(0), + input_pixels(device, "filter input buffer", MEM_READ_ONLY), + num_tiles(0) +{ + image.samples = denoiser->samples_override; +} + +DenoiseTask::~DenoiseTask() +{ + free(); +} + +/* Device callbacks */ + +bool DenoiseTask::acquire_tile(Device *device, Device *tile_device, RenderTile &tile) +{ + thread_scoped_lock tile_lock(tiles_mutex); + + if (tiles.empty()) { + return false; + } + + tile = tiles.front(); + tiles.pop_front(); + + device->map_tile(tile_device, tile); + + print_progress(num_tiles - tiles.size(), num_tiles, frame, denoiser->num_frames); + + return true; +} + +/* Mapping tiles is required for regular rendering since each tile has its separate memory + * which may be allocated on a different device. + * For standalone denoising, there is a single memory that is present on all devices, so the only + * thing that needs to be done here is to specify the surrounding tile geometry. + * + * However, since there is only one large memory, the denoised result has to be written to + * a different buffer to avoid having to copy an entire horizontal slice of the image. */ +void DenoiseTask::map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device) +{ + RenderTile ¢er_tile = neighbors.tiles[RenderTileNeighbors::CENTER]; + RenderTile &target_tile = neighbors.target; + + /* Fill tile information. */ + for (int i = 0; i < RenderTileNeighbors::SIZE; i++) { + if (i == RenderTileNeighbors::CENTER) { + continue; + } + + RenderTile &tile = neighbors.tiles[i]; + int dx = (i % 3) - 1; + int dy = (i / 3) - 1; + tile.x = clamp(center_tile.x + dx * denoiser->tile_size.x, 0, image.width); + tile.w = clamp(center_tile.x + (dx + 1) * denoiser->tile_size.x, 0, image.width) - tile.x; + tile.y = clamp(center_tile.y + dy * denoiser->tile_size.y, 0, image.height); + tile.h = clamp(center_tile.y + (dy + 1) * denoiser->tile_size.y, 0, image.height) - tile.y; + + tile.buffer = center_tile.buffer; + tile.offset = center_tile.offset; + tile.stride = image.width; + } + + /* Allocate output buffer. */ + device_vector *output_mem = new device_vector( + tile_device, "denoising_output", MEM_READ_WRITE); + output_mem->alloc(OUTPUT_NUM_CHANNELS * center_tile.w * center_tile.h); + + /* Fill output buffer with noisy image, assumed by kernel_filter_finalize + * when skipping denoising of some pixels. */ + float *result = output_mem->data(); + float *in = &image.pixels[image.num_channels * (center_tile.y * image.width + center_tile.x)]; + + const DenoiseImageLayer &layer = image.layers[current_layer]; + const int *input_to_image_channel = layer.input_to_image_channel.data(); + + for (int y = 0; y < center_tile.h; y++) { + for (int x = 0; x < center_tile.w; x++, result += OUTPUT_NUM_CHANNELS) { + for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) { + result[i] = in[image.num_channels * x + input_to_image_channel[INPUT_NOISY_IMAGE + i]]; + } + } + in += image.num_channels * image.width; + } + + output_mem->copy_to_device(); + + /* Fill output tile info. */ + target_tile = center_tile; + target_tile.buffer = output_mem->device_pointer; + target_tile.stride = target_tile.w; + target_tile.offset -= target_tile.x + target_tile.y * target_tile.stride; + + thread_scoped_lock output_lock(output_mutex); + assert(output_pixels.count(center_tile.tile_index) == 0); + output_pixels[target_tile.tile_index] = output_mem; +} + +void DenoiseTask::unmap_neighboring_tiles(RenderTileNeighbors &neighbors) +{ + RenderTile ¢er_tile = neighbors.tiles[RenderTileNeighbors::CENTER]; + RenderTile &target_tile = neighbors.target; + + thread_scoped_lock output_lock(output_mutex); + assert(output_pixels.count(center_tile.tile_index) == 1); + device_vector *output_mem = output_pixels[target_tile.tile_index]; + output_pixels.erase(center_tile.tile_index); + output_lock.unlock(); + + /* Copy denoised pixels from device. */ + output_mem->copy_from_device(0, OUTPUT_NUM_CHANNELS * target_tile.w, target_tile.h); + + float *result = output_mem->data(); + float *out = &image.pixels[image.num_channels * (target_tile.y * image.width + target_tile.x)]; + + const DenoiseImageLayer &layer = image.layers[current_layer]; + const int *output_to_image_channel = layer.output_to_image_channel.data(); + + for (int y = 0; y < target_tile.h; y++) { + for (int x = 0; x < target_tile.w; x++, result += OUTPUT_NUM_CHANNELS) { + for (int i = 0; i < OUTPUT_NUM_CHANNELS; i++) { + out[image.num_channels * x + output_to_image_channel[i]] = result[i]; + } + } + out += image.num_channels * image.width; + } + + /* Free device buffer. */ + output_mem->free(); + delete output_mem; +} + +void DenoiseTask::release_tile() +{ +} + +bool DenoiseTask::get_cancel() +{ + return false; +} + +void DenoiseTask::create_task(DeviceTask &task) +{ + /* Callback functions. */ + task.acquire_tile = function_bind(&DenoiseTask::acquire_tile, this, device, _1, _2); + task.map_neighbor_tiles = function_bind(&DenoiseTask::map_neighboring_tiles, this, _1, _2); + task.unmap_neighbor_tiles = function_bind(&DenoiseTask::unmap_neighboring_tiles, this, _1); + task.release_tile = function_bind(&DenoiseTask::release_tile, this); + task.get_cancel = function_bind(&DenoiseTask::get_cancel, this); + + /* Denoising parameters. */ + task.denoising = denoiser->params; + task.denoising.type = DENOISER_NLM; + task.denoising.use = true; + task.denoising_from_render = false; + + task.denoising_frames.resize(neighbor_frames.size()); + for (int i = 0; i < neighbor_frames.size(); i++) { + task.denoising_frames[i] = neighbor_frames[i] - frame; + } + + /* Buffer parameters. */ + task.pass_stride = INPUT_NUM_CHANNELS; + task.target_pass_stride = OUTPUT_NUM_CHANNELS; + task.pass_denoising_data = 0; + task.pass_denoising_clean = -1; + task.frame_stride = image.width * image.height * INPUT_NUM_CHANNELS; + + /* Create tiles. */ + thread_scoped_lock tile_lock(tiles_mutex); + thread_scoped_lock output_lock(output_mutex); + + tiles.clear(); + assert(output_pixels.empty()); + output_pixels.clear(); + + int tiles_x = divide_up(image.width, denoiser->tile_size.x); + int tiles_y = divide_up(image.height, denoiser->tile_size.y); + + for (int ty = 0; ty < tiles_y; ty++) { + for (int tx = 0; tx < tiles_x; tx++) { + RenderTile tile; + tile.x = tx * denoiser->tile_size.x; + tile.y = ty * denoiser->tile_size.y; + tile.w = min(image.width - tile.x, denoiser->tile_size.x); + tile.h = min(image.height - tile.y, denoiser->tile_size.y); + tile.start_sample = 0; + tile.num_samples = image.layers[current_layer].samples; + tile.sample = 0; + tile.offset = 0; + tile.stride = image.width; + tile.tile_index = ty * tiles_x + tx; + tile.task = RenderTile::DENOISE; + tile.buffers = NULL; + tile.buffer = input_pixels.device_pointer; + tiles.push_back(tile); + } + } + + num_tiles = tiles.size(); +} + +/* Denoiser Operations */ + +bool DenoiseTask::load_input_pixels(int layer) +{ + int w = image.width; + int h = image.height; + int num_pixels = image.width * image.height; + int frame_stride = num_pixels * INPUT_NUM_CHANNELS; + + /* Load center image */ + DenoiseImageLayer &image_layer = image.layers[layer]; + + float *buffer_data = input_pixels.data(); + image.read_pixels(image_layer, buffer_data); + buffer_data += frame_stride; + + /* Load neighbor images */ + for (int i = 0; i < image.in_neighbors.size(); i++) { + if (!image.read_neighbor_pixels(i, image_layer, buffer_data)) { + error = "Failed to read neighbor frame pixels"; + return false; + } + buffer_data += frame_stride; + } + + /* Preprocess */ + buffer_data = input_pixels.data(); + for (int neighbor = 0; neighbor < image.in_neighbors.size() + 1; neighbor++) { + /* Clamp */ + if (denoiser->params.clamp_input) { + for (int i = 0; i < num_pixels * INPUT_NUM_CHANNELS; i++) { + buffer_data[i] = clamp(buffer_data[i], -1e8f, 1e8f); + } + } + + /* Box blur */ + int r = 5 * denoiser->params.radius; + float *data = buffer_data + 14; + array temp(num_pixels); + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + int n = 0; + float sum = 0.0f; + for (int dx = max(x - r, 0); dx < min(x + r + 1, w); dx++, n++) { + sum += data[INPUT_NUM_CHANNELS * (y * w + dx)]; + } + temp[y * w + x] = sum / n; + } + } + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + int n = 0; + float sum = 0.0f; + + for (int dy = max(y - r, 0); dy < min(y + r + 1, h); dy++, n++) { + sum += temp[dy * w + x]; + } + + data[INPUT_NUM_CHANNELS * (y * w + x)] = sum / n; + } + } + + /* Highlight compression */ + data = buffer_data + 8; + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + int idx = INPUT_NUM_CHANNELS * (y * w + x); + float3 color = make_float3(data[idx], data[idx + 1], data[idx + 2]); + color = color_highlight_compress(color, NULL); + data[idx] = color.x; + data[idx + 1] = color.y; + data[idx + 2] = color.z; + } + } + + buffer_data += frame_stride; + } + + /* Copy to device */ + input_pixels.copy_to_device(); + + return true; +} + +/* Task stages */ + +bool DenoiseTask::load() +{ + string center_filepath = denoiser->input[frame]; + if (!image.load(center_filepath, error)) { + return false; + } + + if (!image.load_neighbors(denoiser->input, neighbor_frames, error)) { + return false; + } + + if (image.layers.empty()) { + error = "No image layers found to denoise in " + center_filepath; + return false; + } + + /* Allocate device buffer. */ + int num_frames = image.in_neighbors.size() + 1; + input_pixels.alloc(image.width * INPUT_NUM_CHANNELS, image.height * num_frames); + input_pixels.zero_to_device(); + + /* Read pixels for first layer. */ + current_layer = 0; + if (!load_input_pixels(current_layer)) { + return false; + } + + return true; +} + +bool DenoiseTask::exec() +{ + for (current_layer = 0; current_layer < image.layers.size(); current_layer++) { + /* Read pixels for secondary layers, first was already loaded. */ + if (current_layer > 0) { + if (!load_input_pixels(current_layer)) { + return false; + } + } + + /* Run task on device. */ + DeviceTask task(DeviceTask::RENDER); + create_task(task); + device->task_add(task); + device->task_wait(); + + printf("\n"); + } + + return true; +} + +bool DenoiseTask::save() +{ + bool ok = image.save_output(denoiser->output[frame], error); + free(); + return ok; +} + +void DenoiseTask::free() +{ + image.free(); + input_pixels.free(); + assert(output_pixels.empty()); +} + +/* Denoise Image Storage */ + +DenoiseImage::DenoiseImage() +{ + width = 0; + height = 0; + num_channels = 0; + samples = 0; +} + +DenoiseImage::~DenoiseImage() +{ + free(); +} + +void DenoiseImage::close_input() +{ + in_neighbors.clear(); +} + +void DenoiseImage::free() +{ + close_input(); + pixels.clear(); +} + +bool DenoiseImage::parse_channels(const ImageSpec &in_spec, string &error) +{ + const std::vector &channels = in_spec.channelnames; + const ParamValue *multiview = in_spec.find_attribute("multiView"); + const bool multiview_channels = (multiview && multiview->type().basetype == TypeDesc::STRING && + multiview->type().arraylen >= 2); + + layers.clear(); + + /* Loop over all the channels in the file, parse their name and sort them + * by RenderLayer. + * Channels that can't be parsed are directly passed through to the output. */ + map file_layers; + for (int i = 0; i < channels.size(); i++) { + string layer, pass, channel; + if (parse_channel_name(channels[i], layer, pass, channel, multiview_channels)) { + file_layers[layer].channels.push_back(pass + "." + channel); + file_layers[layer].layer_to_image_channel.push_back(i); + } + } + + /* Loop over all detected RenderLayers, check whether they contain a full set of input channels. + * Any channels that won't be processed internally are also passed through. */ + for (map::iterator i = file_layers.begin(); i != file_layers.end(); + ++i) { + const string &name = i->first; + DenoiseImageLayer &layer = i->second; + + /* Check for full pass set. */ + if (!layer.detect_denoising_channels()) { + continue; + } + + layer.name = name; + layer.samples = samples; + + /* If the sample value isn't set yet, check if there is a layer-specific one in the input file. + */ + if (layer.samples < 1) { + string sample_string = in_spec.get_string_attribute("cycles." + name + ".samples", ""); + if (sample_string != "") { + if (!sscanf(sample_string.c_str(), "%d", &layer.samples)) { + error = "Failed to parse samples metadata: " + sample_string; + return false; + } + } + } + + if (layer.samples < 1) { + error = string_printf( + "No sample number specified in the file for layer %s or on the command line", + name.c_str()); + return false; + } + + layers.push_back(layer); + } + + return true; +} + +void DenoiseImage::read_pixels(const DenoiseImageLayer &layer, float *input_pixels) +{ + /* Pixels from center file have already been loaded into pixels. + * We copy a subset into the device input buffer with channels reshuffled. */ + const int *input_to_image_channel = layer.input_to_image_channel.data(); + + for (int i = 0; i < width * height; i++) { + for (int j = 0; j < INPUT_NUM_CHANNELS; j++) { + int image_channel = input_to_image_channel[j]; + input_pixels[i * INPUT_NUM_CHANNELS + j] = + pixels[((size_t)i) * num_channels + image_channel]; + } + } +} + +bool DenoiseImage::read_neighbor_pixels(int neighbor, + const DenoiseImageLayer &layer, + float *input_pixels) +{ + /* Load pixels from neighboring frames, and copy them into device buffer + * with channels reshuffled. */ + size_t num_pixels = (size_t)width * (size_t)height; + array neighbor_pixels(num_pixels * num_channels); + if (!in_neighbors[neighbor]->read_image(TypeDesc::FLOAT, neighbor_pixels.data())) { + return false; + } + + const int *input_to_image_channel = layer.neighbor_input_to_image_channel[neighbor].data(); + + for (int i = 0; i < width * height; i++) { + for (int j = 0; j < INPUT_NUM_CHANNELS; j++) { + int image_channel = input_to_image_channel[j]; + input_pixels[i * INPUT_NUM_CHANNELS + j] = + neighbor_pixels[((size_t)i) * num_channels + image_channel]; + } + } + + return true; +} + +bool DenoiseImage::load(const string &in_filepath, string &error) +{ + if (!Filesystem::is_regular(in_filepath)) { + error = "Couldn't find file: " + in_filepath; + return false; + } + + unique_ptr in(ImageInput::open(in_filepath)); + if (!in) { + error = "Couldn't open file: " + in_filepath; + return false; + } + + in_spec = in->spec(); + width = in_spec.width; + height = in_spec.height; + num_channels = in_spec.nchannels; + + if (!parse_channels(in_spec, error)) { + return false; + } + + if (layers.size() == 0) { + error = "Could not find a render layer containing denoising info"; + return false; + } + + size_t num_pixels = (size_t)width * (size_t)height; + pixels.resize(num_pixels * num_channels); + + /* Read all channels into buffer. Reading all channels at once is faster + * than individually due to interleaved EXR channel storage. */ + if (!in->read_image(TypeDesc::FLOAT, pixels.data())) { + error = "Failed to read image: " + in_filepath; + return false; + } + + return true; +} + +bool DenoiseImage::load_neighbors(const vector &filepaths, + const vector &frames, + string &error) +{ + if (frames.size() > DENOISE_MAX_FRAMES - 1) { + error = string_printf("Maximum number of neighbors (%d) exceeded\n", DENOISE_MAX_FRAMES - 1); + return false; + } + + for (int neighbor = 0; neighbor < frames.size(); neighbor++) { + int frame = frames[neighbor]; + const string &filepath = filepaths[frame]; + + if (!Filesystem::is_regular(filepath)) { + error = "Couldn't find neighbor frame: " + filepath; + return false; + } + + unique_ptr in_neighbor(ImageInput::open(filepath)); + if (!in_neighbor) { + error = "Couldn't open neighbor frame: " + filepath; + return false; + } + + const ImageSpec &neighbor_spec = in_neighbor->spec(); + if (neighbor_spec.width != width || neighbor_spec.height != height) { + error = "Neighbor frame has different dimensions: " + filepath; + return false; + } + + foreach (DenoiseImageLayer &layer, layers) { + if (!layer.match_channels(neighbor, in_spec.channelnames, neighbor_spec.channelnames)) { + error = "Neighbor frame misses denoising data passes: " + filepath; + return false; + } + } + + in_neighbors.push_back(std::move(in_neighbor)); + } + + return true; +} + +bool DenoiseImage::save_output(const string &out_filepath, string &error) +{ + /* Save image with identical dimensions, channels and metadata. */ + ImageSpec out_spec = in_spec; + + /* Ensure that the output frame contains sample information even if the input didn't. */ + for (int i = 0; i < layers.size(); i++) { + string name = "cycles." + layers[i].name + ".samples"; + if (!out_spec.find_attribute(name, TypeDesc::STRING)) { + out_spec.attribute(name, TypeDesc::STRING, string_printf("%d", layers[i].samples)); + } + } + + /* We don't need input anymore at this point, and will possibly + * overwrite the same file. */ + close_input(); + + /* Write to temporary file path, so we denoise images in place and don't + * risk destroying files when something goes wrong in file saving. */ + string extension = OIIO::Filesystem::extension(out_filepath); + string unique_name = ".denoise-tmp-" + OIIO::Filesystem::unique_path(); + string tmp_filepath = out_filepath + unique_name + extension; + unique_ptr out(ImageOutput::create(tmp_filepath)); + + if (!out) { + error = "Failed to open temporary file " + tmp_filepath + " for writing"; + return false; + } + + /* Open temporary file and write image buffers. */ + if (!out->open(tmp_filepath, out_spec)) { + error = "Failed to open file " + tmp_filepath + " for writing: " + out->geterror(); + return false; + } + + bool ok = true; + if (!out->write_image(TypeDesc::FLOAT, pixels.data())) { + error = "Failed to write to file " + tmp_filepath + ": " + out->geterror(); + ok = false; + } + + if (!out->close()) { + error = "Failed to save to file " + tmp_filepath + ": " + out->geterror(); + ok = false; + } + + out.reset(); + + /* Copy temporary file to output filepath. */ + string rename_error; + if (ok && !OIIO::Filesystem::rename(tmp_filepath, out_filepath, rename_error)) { + error = "Failed to move denoised image to " + out_filepath + ": " + rename_error; + ok = false; + } + + if (!ok) { + OIIO::Filesystem::remove(tmp_filepath); + } + + return ok; +} + +/* File pattern handling and outer loop over frames */ + +DenoiserPipeline::DenoiserPipeline(DeviceInfo &device_info) +{ + samples_override = 0; + tile_size = make_int2(64, 64); + + num_frames = 0; + + /* Initialize task scheduler. */ + TaskScheduler::init(); + + /* Initialize device. */ + device = Device::create(device_info, stats, profiler, true); + + device->load_kernels(KERNEL_FEATURE_DENOISING); +} + +DenoiserPipeline::~DenoiserPipeline() +{ + delete device; + TaskScheduler::exit(); +} + +bool DenoiserPipeline::run() +{ + assert(input.size() == output.size()); + + num_frames = output.size(); + + for (int frame = 0; frame < num_frames; frame++) { + /* Skip empty output paths. */ + if (output[frame].empty()) { + continue; + } + + /* Determine neighbor frame numbers that should be used for filtering. */ + vector neighbor_frames; + for (int f = frame - params.neighbor_frames; f <= frame + params.neighbor_frames; f++) { + if (f >= 0 && f < num_frames && f != frame) { + neighbor_frames.push_back(f); + } + } + + /* Execute task. */ + DenoiseTask task(device, this, frame, neighbor_frames); + if (!task.load()) { + error = task.error; + return false; + } + + if (!task.exec()) { + error = task.error; + return false; + } + + if (!task.save()) { + error = task.error; + return false; + } + + task.free(); + } + + return true; +} + +CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/session/denoising.h b/intern/cycles/session/denoising.h new file mode 100644 index 00000000000..097cc570d06 --- /dev/null +++ b/intern/cycles/session/denoising.h @@ -0,0 +1,216 @@ +/* + * Copyright 2011-2018 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __DENOISING_H__ +#define __DENOISING_H__ + +#if 0 + +/* TODO(sergey): Make it explicit and clear when something is a denoiser, its pipeline or + * parameters. Currently it is an annoying mixture of terms used interchangeably. */ + +# include "device/device.h" + +# include "render/buffers.h" + +# include "util/util_string.h" +# include "util/util_unique_ptr.h" +# include "util/util_vector.h" + +# include + +OIIO_NAMESPACE_USING + +CCL_NAMESPACE_BEGIN + +/* Denoiser pipeline */ + +class DenoiserPipeline { + public: + DenoiserPipeline(DeviceInfo &device_info); + ~DenoiserPipeline(); + + bool run(); + + /* Error message after running, in case of failure. */ + string error; + + /* Sequential list of frame filepaths to denoise. */ + vector input; + /* Sequential list of frame filepaths to write result to. Empty entries + * are skipped, so only a subset of the sequence can be denoised while + * taking into account all input frames. */ + vector output; + + /* Sample number override, takes precedence over values from input frames. */ + int samples_override; + /* Tile size for processing on device. */ + int2 tile_size; + + /* Equivalent to the settings in the regular denoiser. */ + DenoiseParams params; + + protected: + friend class DenoiseTask; + + Stats stats; + Profiler profiler; + Device *device; + + int num_frames; +}; + +/* Denoise Image Layer */ + +struct DenoiseImageLayer { + string name; + /* All channels belonging to this DenoiseImageLayer. */ + vector channels; + /* Layer to image channel mapping. */ + vector layer_to_image_channel; + + /* Sample amount that was used for rendering this layer. */ + int samples; + + /* Device input channel will be copied from image channel input_to_image_channel[i]. */ + vector input_to_image_channel; + + /* input_to_image_channel of the secondary frames, if any are used. */ + vector> neighbor_input_to_image_channel; + + /* Write i-th channel of the processing output to output_to_image_channel[i]-th channel of the + * file. */ + vector output_to_image_channel; + + /* Detect whether this layer contains a full set of channels and set up the offsets accordingly. + */ + bool detect_denoising_channels(); + + /* Map the channels of a secondary frame to the channels that are required for processing, + * fill neighbor_input_to_image_channel if all are present or return false if a channel are + * missing. */ + bool match_channels(int neighbor, + const std::vector &channelnames, + const std::vector &neighbor_channelnames); +}; + +/* Denoise Image Data */ + +class DenoiseImage { + public: + DenoiseImage(); + ~DenoiseImage(); + + /* Dimensions */ + int width, height, num_channels; + + /* Samples */ + int samples; + + /* Pixel buffer with interleaved channels. */ + array pixels; + + /* Image file handles */ + ImageSpec in_spec; + vector> in_neighbors; + + /* Render layers */ + vector layers; + + void free(); + + /* Open the input image, parse its channels, open the output image and allocate the output + * buffer. */ + bool load(const string &in_filepath, string &error); + + /* Load neighboring frames. */ + bool load_neighbors(const vector &filepaths, const vector &frames, string &error); + + /* Load subset of pixels from file buffer into input buffer, as needed for denoising + * on the device. Channels are reshuffled following the provided mapping. */ + void read_pixels(const DenoiseImageLayer &layer, float *input_pixels); + bool read_neighbor_pixels(int neighbor, const DenoiseImageLayer &layer, float *input_pixels); + + bool save_output(const string &out_filepath, string &error); + + protected: + /* Parse input file channels, separate them into DenoiseImageLayers, + * detect DenoiseImageLayers with full channel sets, + * fill layers and set up the output channels and passthrough map. */ + bool parse_channels(const ImageSpec &in_spec, string &error); + + void close_input(); +}; + +/* Denoise Task */ + +class DenoiseTask { + public: + DenoiseTask(Device *device, + DenoiserPipeline *denoiser, + int frame, + const vector &neighbor_frames); + ~DenoiseTask(); + + /* Task stages */ + bool load(); + bool exec(); + bool save(); + void free(); + + string error; + + protected: + /* Denoiser parameters and device */ + DenoiserPipeline *denoiser; + Device *device; + + /* Frame number to be denoised */ + int frame; + vector neighbor_frames; + + /* Image file data */ + DenoiseImage image; + int current_layer; + + /* Device input buffer */ + device_vector input_pixels; + + /* Tiles */ + thread_mutex tiles_mutex; + list tiles; + int num_tiles; + + thread_mutex output_mutex; + map *> output_pixels; + + /* Task handling */ + bool load_input_pixels(int layer); + void create_task(DeviceTask &task); + + /* Device task callbacks */ + bool acquire_tile(Device *device, Device *tile_device, RenderTile &tile); + void map_neighboring_tiles(RenderTileNeighbors &neighbors, Device *tile_device); + void unmap_neighboring_tiles(RenderTileNeighbors &neighbors); + void release_tile(); + bool get_cancel(); +}; + +CCL_NAMESPACE_END + +#endif + +#endif /* __DENOISING_H__ */ diff --git a/intern/cycles/session/display_driver.h b/intern/cycles/session/display_driver.h new file mode 100644 index 00000000000..85f305034d7 --- /dev/null +++ b/intern/cycles/session/display_driver.h @@ -0,0 +1,131 @@ +/* + * Copyright 2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "util/util_half.h" +#include "util/util_types.h" + +CCL_NAMESPACE_BEGIN + +/* Display driver for efficient interactive display of renders. + * + * Host applications implement this interface for viewport rendering. For best performance, we + * recommend: + * - Allocating a texture on the GPU to be interactively updated + * - Using the graphics interop mechanism to avoid CPU-GPU copying overhead + * - Using a dedicated or thread-safe graphics API context for updates, to avoid + * blocking the host application. + */ +class DisplayDriver { + public: + DisplayDriver() = default; + virtual ~DisplayDriver() = default; + + /* Render buffer parameters. */ + struct Params { + public: + /* Render resolution, ignoring progressive resolution changes. + * The texture buffer should be allocated with this size. */ + int2 size = make_int2(0, 0); + + /* For border rendering, the full resolution of the render, and the offset within that larger + * render. */ + int2 full_size = make_int2(0, 0); + int2 full_offset = make_int2(0, 0); + + bool modified(const Params &other) const + { + return !(full_offset == other.full_offset && full_size == other.full_size && + size == other.size); + } + }; + + /* Update the render from the rendering thread. + * + * Cycles periodically updates the render to be displayed. For multithreaded updates with + * potentially multiple rendering devices, it will call these methods as follows. + * + * if (driver.update_begin(params, width, height)) { + * parallel_for_each(rendering_device) { + * buffer = driver.map_texture_buffer(); + * if (buffer) { + * fill(buffer); + * driver.unmap_texture_buffer(); + * } + * } + * driver.update_end(); + * } + * + * The parameters may dynamically change due to camera changes in the scene, and resources should + * be re-allocated accordingly. + * + * The width and height passed to update_begin() are the effective render resolution taking into + * account progressive resolution changes, which may be equal to or smaller than the params.size. + * For efficiency, changes in this resolution should be handled without re-allocating resources, + * but rather by using a subset of the full resolution buffer. */ + virtual bool update_begin(const Params ¶ms, int width, int height) = 0; + virtual void update_end() = 0; + + virtual half4 *map_texture_buffer() = 0; + virtual void unmap_texture_buffer() = 0; + + /* Optionally return a handle to a native graphics API texture buffer. If supported, + * the rendering device may write directly to this buffer instead of calling + * map_texture_buffer() and unmap_texture_buffer(). */ + class GraphicsInterop { + public: + /* Dimensions of the buffer, in pixels. */ + int buffer_width = 0; + int buffer_height = 0; + + /* OpenGL pixel buffer object. */ + int opengl_pbo_id = 0; + + /* Clear the entire buffer before doing partial write to it. */ + bool need_clear = false; + }; + + virtual GraphicsInterop graphics_interop_get() + { + return GraphicsInterop(); + } + + /* (De)activate graphics context required for editing or deleting the graphics interop + * object. + * + * For example, destruction of the CUDA object associated with an OpenGL requires the + * OpenGL context to be active. */ + virtual void graphics_interop_activate(){}; + virtual void graphics_interop_deactivate(){}; + + /* Clear the display buffer by filling it with zeros. */ + virtual void clear() = 0; + + /* Draw the render using the native graphics API. + * + * Note that this may be called in parallel to updates. The implementation is responsible for + * mutex locking or other mechanisms to avoid conflicts. + * + * The parameters may have changed since the last update. The implementation is responsible for + * deciding to skip or adjust render display for such changes. + * + * Host application drawing the render buffer should use Session.draw(), which will + * call this method. */ + virtual void draw(const Params ¶ms) = 0; +}; + +CCL_NAMESPACE_END diff --git a/intern/cycles/session/merge.cpp b/intern/cycles/session/merge.cpp new file mode 100644 index 00000000000..97e9c75d5f7 --- /dev/null +++ b/intern/cycles/session/merge.cpp @@ -0,0 +1,516 @@ +/* + * Copyright 2011-2019 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "session/merge.h" + +#include "util/util_array.h" +#include "util/util_map.h" +#include "util/util_system.h" +#include "util/util_time.h" +#include "util/util_unique_ptr.h" + +#include +#include + +OIIO_NAMESPACE_USING + +CCL_NAMESPACE_BEGIN + +/* Merge Image Layer */ + +enum MergeChannelOp { + MERGE_CHANNEL_NOP, + MERGE_CHANNEL_COPY, + MERGE_CHANNEL_SUM, + MERGE_CHANNEL_AVERAGE +}; + +struct MergeImagePass { + /* Full channel name. */ + string channel_name; + /* Channel format in the file. */ + TypeDesc format; + /* Type of operation to perform when merging. */ + MergeChannelOp op; + /* Offset of layer channels in input image. */ + int offset; + /* Offset of layer channels in merged image. */ + int merge_offset; +}; + +struct MergeImageLayer { + /* Layer name. */ + string name; + /* Passes. */ + vector passes; + /* Sample amount that was used for rendering this layer. */ + int samples; +}; + +/* Merge Image */ + +struct MergeImage { + /* OIIO file handle. */ + unique_ptr in; + /* Image file path. */ + string filepath; + /* Render layers. */ + vector layers; +}; + +/* Channel Parsing */ + +static MergeChannelOp parse_channel_operation(const string &pass_name) +{ + if (pass_name == "Depth" || pass_name == "IndexMA" || pass_name == "IndexOB" || + string_startswith(pass_name, "Crypto")) { + return MERGE_CHANNEL_COPY; + } + else if (string_startswith(pass_name, "Debug BVH") || + string_startswith(pass_name, "Debug Ray") || + string_startswith(pass_name, "Debug Render Time")) { + return MERGE_CHANNEL_SUM; + } + else { + return MERGE_CHANNEL_AVERAGE; + } +} + +/* Splits in at its last dot, setting suffix to the part after the dot and + * into the part before it. Returns whether a dot was found. */ +static bool split_last_dot(string &in, string &suffix) +{ + size_t pos = in.rfind("."); + if (pos == string::npos) { + return false; + } + suffix = in.substr(pos + 1); + in = in.substr(0, pos); + return true; +} + +/* Separate channel names as generated by Blender. + * Multiview format: RenderLayer.Pass.View.Channel + * Otherwise: RenderLayer.Pass.Channel */ +static bool parse_channel_name( + string name, string &renderlayer, string &pass, string &channel, bool multiview_channels) +{ + if (!split_last_dot(name, channel)) { + return false; + } + string view; + if (multiview_channels && !split_last_dot(name, view)) { + return false; + } + if (!split_last_dot(name, pass)) { + return false; + } + renderlayer = name; + + if (multiview_channels) { + renderlayer += "." + view; + } + + return true; +} + +static bool parse_channels(const ImageSpec &in_spec, + vector &layers, + string &error) +{ + const ParamValue *multiview = in_spec.find_attribute("multiView"); + const bool multiview_channels = (multiview && multiview->type().basetype == TypeDesc::STRING && + multiview->type().arraylen >= 2); + + layers.clear(); + + /* Loop over all the channels in the file, parse their name and sort them + * by RenderLayer. + * Channels that can't be parsed are directly passed through to the output. */ + map file_layers; + for (int i = 0; i < in_spec.nchannels; i++) { + MergeImagePass pass; + pass.channel_name = in_spec.channelnames[i]; + pass.format = (in_spec.channelformats.size() > 0) ? in_spec.channelformats[i] : in_spec.format; + pass.offset = i; + pass.merge_offset = i; + + string layername, passname, channelname; + if (parse_channel_name( + pass.channel_name, layername, passname, channelname, multiview_channels)) { + /* Channel part of a render layer. */ + pass.op = parse_channel_operation(passname); + } + else { + /* Other channels are added in unnamed layer. */ + layername = ""; + pass.op = parse_channel_operation(pass.channel_name); + } + + file_layers[layername].passes.push_back(pass); + } + + /* Loop over all detected render-layers, check whether they contain a full set of input channels. + * Any channels that won't be processed internally are also passed through. */ + for (auto &i : file_layers) { + const string &name = i.first; + MergeImageLayer &layer = i.second; + + layer.name = name; + layer.samples = 0; + + /* Determine number of samples from metadata. */ + if (layer.name == "") { + layer.samples = 1; + } + else if (layer.samples < 1) { + string sample_string = in_spec.get_string_attribute("cycles." + name + ".samples", ""); + if (sample_string != "") { + if (!sscanf(sample_string.c_str(), "%d", &layer.samples)) { + error = "Failed to parse samples metadata: " + sample_string; + return false; + } + } + } + + if (layer.samples < 1) { + error = string_printf( + "No sample number specified in the file for layer %s or on the command line", + name.c_str()); + return false; + } + + layers.push_back(layer); + } + + return true; +} + +static bool open_images(const vector &filepaths, vector &images, string &error) +{ + for (const string &filepath : filepaths) { + unique_ptr in(ImageInput::open(filepath)); + if (!in) { + error = "Couldn't open file: " + filepath; + return false; + } + + MergeImage image; + image.in = std::move(in); + image.filepath = filepath; + if (!parse_channels(image.in->spec(), image.layers, error)) { + return false; + } + + if (image.layers.size() == 0) { + error = "Could not find a render layer for merging"; + return false; + } + + if (image.in->spec().deep) { + error = "Merging deep images not supported."; + return false; + } + + if (images.size() > 0) { + const ImageSpec &base_spec = images[0].in->spec(); + const ImageSpec &spec = image.in->spec(); + + if (base_spec.width != spec.width || base_spec.height != spec.height || + base_spec.depth != spec.depth || base_spec.format != spec.format || + base_spec.deep != spec.deep) { + error = "Images do not have matching size and data layout."; + return false; + } + } + + images.push_back(std::move(image)); + } + + return true; +} + +static void merge_render_time(ImageSpec &spec, + const vector &images, + const string &name, + const bool average) +{ + double time = 0.0; + + for (const MergeImage &image : images) { + string time_str = image.in->spec().get_string_attribute(name, ""); + time += time_human_readable_to_seconds(time_str); + } + + if (average) { + time /= images.size(); + } + + spec.attribute(name, TypeDesc::STRING, time_human_readable_from_seconds(time)); +} + +static void merge_layer_render_time(ImageSpec &spec, + const vector &images, + const string &layer_name, + const string &time_name, + const bool average) +{ + string name = "cycles." + layer_name + "." + time_name; + double time = 0.0; + + for (const MergeImage &image : images) { + string time_str = image.in->spec().get_string_attribute(name, ""); + time += time_human_readable_to_seconds(time_str); + } + + if (average) { + time /= images.size(); + } + + spec.attribute(name, TypeDesc::STRING, time_human_readable_from_seconds(time)); +} + +static void merge_channels_metadata(vector &images, + ImageSpec &out_spec, + vector &channel_total_samples) +{ + /* Based on first image. */ + out_spec = images[0].in->spec(); + + /* Merge channels and compute offsets. */ + out_spec.nchannels = 0; + out_spec.channelformats.clear(); + out_spec.channelnames.clear(); + + for (MergeImage &image : images) { + for (MergeImageLayer &layer : image.layers) { + for (MergeImagePass &pass : layer.passes) { + /* Test if matching channel already exists in merged image. */ + bool found = false; + + for (size_t i = 0; i < out_spec.nchannels; i++) { + if (pass.channel_name == out_spec.channelnames[i]) { + pass.merge_offset = i; + channel_total_samples[i] += layer.samples; + /* First image wins for channels that can't be averaged or summed. */ + if (pass.op == MERGE_CHANNEL_COPY) { + pass.op = MERGE_CHANNEL_NOP; + } + found = true; + break; + } + } + + if (!found) { + /* Add new channel. */ + pass.merge_offset = out_spec.nchannels; + channel_total_samples.push_back(layer.samples); + + out_spec.channelnames.push_back(pass.channel_name); + out_spec.channelformats.push_back(pass.format); + out_spec.nchannels++; + } + } + } + } + + /* Merge metadata. */ + merge_render_time(out_spec, images, "RenderTime", false); + + map layer_num_samples; + for (MergeImage &image : images) { + for (MergeImageLayer &layer : image.layers) { + if (layer.name != "") { + layer_num_samples[layer.name] += layer.samples; + } + } + } + + for (const auto &i : layer_num_samples) { + string name = "cycles." + i.first + ".samples"; + out_spec.attribute(name, TypeDesc::STRING, string_printf("%d", i.second)); + + merge_layer_render_time(out_spec, images, i.first, "total_time", false); + merge_layer_render_time(out_spec, images, i.first, "render_time", false); + merge_layer_render_time(out_spec, images, i.first, "synchronization_time", true); + } +} + +static void alloc_pixels(const ImageSpec &spec, array &pixels) +{ + const size_t width = spec.width; + const size_t height = spec.height; + const size_t num_channels = spec.nchannels; + + const size_t num_pixels = (size_t)width * (size_t)height; + pixels.resize(num_pixels * num_channels); +} + +static bool merge_pixels(const vector &images, + const ImageSpec &out_spec, + const vector &channel_total_samples, + array &out_pixels, + string &error) +{ + alloc_pixels(out_spec, out_pixels); + memset(out_pixels.data(), 0, out_pixels.size() * sizeof(float)); + + for (const MergeImage &image : images) { + /* Read all channels into buffer. Reading all channels at once is + * faster than individually due to interleaved EXR channel storage. */ + array pixels; + alloc_pixels(image.in->spec(), pixels); + + if (!image.in->read_image(TypeDesc::FLOAT, pixels.data())) { + error = "Failed to read image: " + image.filepath; + return false; + } + + for (size_t li = 0; li < image.layers.size(); li++) { + const MergeImageLayer &layer = image.layers[li]; + + const size_t stride = image.in->spec().nchannels; + const size_t out_stride = out_spec.nchannels; + const size_t num_pixels = pixels.size(); + + for (const MergeImagePass &pass : layer.passes) { + size_t offset = pass.offset; + size_t out_offset = pass.merge_offset; + + switch (pass.op) { + case MERGE_CHANNEL_NOP: + break; + case MERGE_CHANNEL_COPY: + for (; offset < num_pixels; offset += stride, out_offset += out_stride) { + out_pixels[out_offset] = pixels[offset]; + } + break; + case MERGE_CHANNEL_SUM: + for (; offset < num_pixels; offset += stride, out_offset += out_stride) { + out_pixels[out_offset] += pixels[offset]; + } + break; + case MERGE_CHANNEL_AVERAGE: + /* Weights based on sample metadata. Per channel since not + * all files are guaranteed to have the same channels. */ + const int total_samples = channel_total_samples[out_offset]; + const float t = (float)layer.samples / (float)total_samples; + + for (; offset < num_pixels; offset += stride, out_offset += out_stride) { + out_pixels[out_offset] += t * pixels[offset]; + } + break; + } + } + } + } + + return true; +} + +static bool save_output(const string &filepath, + const ImageSpec &spec, + const array &pixels, + string &error) +{ + /* Write to temporary file path, so we merge images in place and don't + * risk destroying files when something goes wrong in file saving. */ + string extension = OIIO::Filesystem::extension(filepath); + string unique_name = ".merge-tmp-" + OIIO::Filesystem::unique_path(); + string tmp_filepath = filepath + unique_name + extension; + unique_ptr out(ImageOutput::create(tmp_filepath)); + + if (!out) { + error = "Failed to open temporary file " + tmp_filepath + " for writing"; + return false; + } + + /* Open temporary file and write image buffers. */ + if (!out->open(tmp_filepath, spec)) { + error = "Failed to open file " + tmp_filepath + " for writing: " + out->geterror(); + return false; + } + + bool ok = true; + if (!out->write_image(TypeDesc::FLOAT, pixels.data())) { + error = "Failed to write to file " + tmp_filepath + ": " + out->geterror(); + ok = false; + } + + if (!out->close()) { + error = "Failed to save to file " + tmp_filepath + ": " + out->geterror(); + ok = false; + } + + out.reset(); + + /* Copy temporary file to output filepath. */ + string rename_error; + if (ok && !OIIO::Filesystem::rename(tmp_filepath, filepath, rename_error)) { + error = "Failed to move merged image to " + filepath + ": " + rename_error; + ok = false; + } + + if (!ok) { + OIIO::Filesystem::remove(tmp_filepath); + } + + return ok; +} + +/* Image Merger */ + +ImageMerger::ImageMerger() +{ +} + +bool ImageMerger::run() +{ + if (input.empty()) { + error = "No input file paths specified."; + return false; + } + if (output.empty()) { + error = "No output file path specified."; + return false; + } + + /* Open images and verify they have matching layout. */ + vector images; + if (!open_images(input, images, error)) { + return false; + } + + /* Merge metadata and setup channels and offsets. */ + ImageSpec out_spec; + vector channel_total_samples; + merge_channels_metadata(images, out_spec, channel_total_samples); + + /* Merge pixels. */ + array out_pixels; + if (!merge_pixels(images, out_spec, channel_total_samples, out_pixels, error)) { + return false; + } + + /* We don't need input anymore at this point, and will possibly + * overwrite the same file. */ + images.clear(); + + /* Save output file. */ + return save_output(output, out_spec, out_pixels, error); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/session/merge.h b/intern/cycles/session/merge.h new file mode 100644 index 00000000000..87e5d2d4723 --- /dev/null +++ b/intern/cycles/session/merge.h @@ -0,0 +1,43 @@ +/* + * Copyright 2011-2019 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __MERGE_H__ +#define __MERGE_H__ + +#include "util/util_string.h" +#include "util/util_vector.h" + +CCL_NAMESPACE_BEGIN + +/* Merge OpenEXR multilayer renders. */ + +class ImageMerger { + public: + ImageMerger(); + bool run(); + + /* Error message after running, in case of failure. */ + string error; + + /* List of image filepaths to merge. */ + vector input; + /* Output filepath. */ + string output; +}; + +CCL_NAMESPACE_END + +#endif /* __MERGE_H__ */ diff --git a/intern/cycles/session/output_driver.h b/intern/cycles/session/output_driver.h new file mode 100644 index 00000000000..b7e980d71d4 --- /dev/null +++ b/intern/cycles/session/output_driver.h @@ -0,0 +1,82 @@ +/* + * Copyright 2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "util/util_math.h" +#include "util/util_string.h" +#include "util/util_types.h" + +CCL_NAMESPACE_BEGIN + +/* Output driver for reading render buffers. + * + * Host applications implement this interface for outputting render buffers for offline rendering. + * Drivers can be used to copy the buffers into the host application or write them directly to + * disk. This interface may also be used for interactive display, however the DisplayDriver is more + * efficient for that purpose. + */ +class OutputDriver { + public: + OutputDriver() = default; + virtual ~OutputDriver() = default; + + class Tile { + public: + Tile(const int2 offset, + const int2 size, + const int2 full_size, + const string_view layer, + const string_view view) + : offset(offset), size(size), full_size(full_size), layer(layer), view(view) + { + } + virtual ~Tile() = default; + + const int2 offset; + const int2 size; + const int2 full_size; + const string layer; + const string view; + + virtual bool get_pass_pixels(const string_view pass_name, + const int num_channels, + float *pixels) const = 0; + virtual bool set_pass_pixels(const string_view pass_name, + const int num_channels, + const float *pixels) const = 0; + }; + + /* Write tile once it has finished rendering. */ + virtual void write_render_tile(const Tile &tile) = 0; + + /* Update tile while rendering is in progress. Return true if any update + * was performed. */ + virtual bool update_render_tile(const Tile & /* tile */) + { + return false; + } + + /* For baking, read render pass PASS_BAKE_PRIMITIVE and PASS_BAKE_DIFFERENTIAL + * to determine which shading points to use for baking at each pixel. Return + * true if any data was read. */ + virtual bool read_render_tile(const Tile & /* tile */) + { + return false; + } +}; + +CCL_NAMESPACE_END diff --git a/intern/cycles/session/session.cpp b/intern/cycles/session/session.cpp new file mode 100644 index 00000000000..f8fc892f127 --- /dev/null +++ b/intern/cycles/session/session.cpp @@ -0,0 +1,624 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "device/cpu/device.h" +#include "device/device.h" +#include "integrator/pass_accessor_cpu.h" +#include "integrator/path_trace.h" +#include "scene/background.h" +#include "scene/bake.h" +#include "scene/camera.h" +#include "scene/integrator.h" +#include "scene/light.h" +#include "scene/mesh.h" +#include "scene/object.h" +#include "scene/scene.h" +#include "scene/shader_graph.h" +#include "session/buffers.h" +#include "session/display_driver.h" +#include "session/output_driver.h" +#include "session/session.h" + +#include "util/util_foreach.h" +#include "util/util_function.h" +#include "util/util_logging.h" +#include "util/util_math.h" +#include "util/util_task.h" +#include "util/util_time.h" + +CCL_NAMESPACE_BEGIN + +Session::Session(const SessionParams ¶ms_, const SceneParams &scene_params) + : params(params_), render_scheduler_(tile_manager_, params) +{ + TaskScheduler::init(params.threads); + + session_thread_ = nullptr; + + delayed_reset_.do_reset = false; + + pause_ = false; + cancel_ = false; + new_work_added_ = false; + + device = Device::create(params.device, stats, profiler); + + scene = new Scene(scene_params, device); + + /* Configure path tracer. */ + path_trace_ = make_unique( + device, scene->film, &scene->dscene, render_scheduler_, tile_manager_); + path_trace_->set_progress(&progress); + path_trace_->progress_update_cb = [&]() { update_status_time(); }; + + tile_manager_.full_buffer_written_cb = [&](string_view filename) { + if (!full_buffer_written_cb) { + return; + } + full_buffer_written_cb(filename); + }; +} + +Session::~Session() +{ + cancel(); + + /* Make sure path tracer is destroyed before the device. This is needed because destruction might + * need to access device for device memory free. */ + /* TODO(sergey): Convert device to be unique_ptr, and rely on C++ to destruct objects in the + * pre-defined order. */ + path_trace_.reset(); + + delete scene; + delete device; + + TaskScheduler::exit(); +} + +void Session::start() +{ + if (!session_thread_) { + session_thread_ = new thread(function_bind(&Session::run, this)); + } +} + +void Session::cancel(bool quick) +{ + if (quick && path_trace_) { + path_trace_->cancel(); + } + + if (session_thread_) { + /* wait for session thread to end */ + progress.set_cancel("Exiting"); + + { + thread_scoped_lock pause_lock(pause_mutex_); + pause_ = false; + cancel_ = true; + } + pause_cond_.notify_all(); + + wait(); + } +} + +bool Session::ready_to_reset() +{ + return path_trace_->ready_to_reset(); +} + +void Session::run_main_render_loop() +{ + path_trace_->clear_display(); + + while (true) { + RenderWork render_work = run_update_for_next_iteration(); + + if (!render_work) { + if (VLOG_IS_ON(2)) { + double total_time, render_time; + progress.get_time(total_time, render_time); + VLOG(2) << "Rendering in main loop is done in " << render_time << " seconds."; + VLOG(2) << path_trace_->full_report(); + } + + if (params.background) { + /* if no work left and in background mode, we can stop immediately. */ + progress.set_status("Finished"); + break; + } + } + + const bool did_cancel = progress.get_cancel(); + if (did_cancel) { + render_scheduler_.render_work_reschedule_on_cancel(render_work); + if (!render_work) { + break; + } + } + else if (run_wait_for_work(render_work)) { + continue; + } + + /* Stop rendering if error happened during scene update or other step of preparing scene + * for render. */ + if (device->have_error()) { + progress.set_error(device->error_message()); + break; + } + + { + /* buffers mutex is locked entirely while rendering each + * sample, and released/reacquired on each iteration to allow + * reset and draw in between */ + thread_scoped_lock buffers_lock(buffers_mutex_); + + /* update status and timing */ + update_status_time(); + + /* render */ + path_trace_->render(render_work); + + /* update status and timing */ + update_status_time(); + + /* Stop rendering if error happened during path tracing. */ + if (device->have_error()) { + progress.set_error(device->error_message()); + break; + } + } + + progress.set_update(); + + if (did_cancel) { + break; + } + } +} + +void Session::run() +{ + if (params.use_profiling && (params.device.type == DEVICE_CPU)) { + profiler.start(); + } + + /* session thread loop */ + progress.set_status("Waiting for render to start"); + + /* run */ + if (!progress.get_cancel()) { + /* reset number of rendered samples */ + progress.reset_sample(); + + run_main_render_loop(); + } + + profiler.stop(); + + /* progress update */ + if (progress.get_cancel()) + progress.set_status(progress.get_cancel_message()); + else + progress.set_update(); +} + +RenderWork Session::run_update_for_next_iteration() +{ + RenderWork render_work; + + thread_scoped_lock scene_lock(scene->mutex); + thread_scoped_lock reset_lock(delayed_reset_.mutex); + + bool have_tiles = true; + bool switched_to_new_tile = false; + + const bool did_reset = delayed_reset_.do_reset; + if (delayed_reset_.do_reset) { + thread_scoped_lock buffers_lock(buffers_mutex_); + do_delayed_reset(); + + /* After reset make sure the tile manager is at the first big tile. */ + have_tiles = tile_manager_.next(); + switched_to_new_tile = true; + } + + /* Update number of samples in the integrator. + * Ideally this would need to happen once in `Session::set_samples()`, but the issue there is + * the initial configuration when Session is created where the `set_samples()` is not used. + * + * NOTE: Unless reset was requested only allow increasing number of samples. */ + if (did_reset || scene->integrator->get_aa_samples() < params.samples) { + scene->integrator->set_aa_samples(params.samples); + } + + /* Update denoiser settings. */ + { + const DenoiseParams denoise_params = scene->integrator->get_denoise_params(); + path_trace_->set_denoiser_params(denoise_params); + } + + /* Update adaptive sampling. */ + { + const AdaptiveSampling adaptive_sampling = scene->integrator->get_adaptive_sampling(); + path_trace_->set_adaptive_sampling(adaptive_sampling); + } + + render_scheduler_.set_num_samples(params.samples); + render_scheduler_.set_time_limit(params.time_limit); + + while (have_tiles) { + render_work = render_scheduler_.get_render_work(); + if (render_work) { + break; + } + + progress.add_finished_tile(false); + + have_tiles = tile_manager_.next(); + if (have_tiles) { + render_scheduler_.reset_for_next_tile(); + switched_to_new_tile = true; + } + } + + if (render_work) { + scoped_timer update_timer; + + if (switched_to_new_tile) { + BufferParams tile_params = buffer_params_; + + const Tile &tile = tile_manager_.get_current_tile(); + + tile_params.width = tile.width; + tile_params.height = tile.height; + + tile_params.window_x = tile.window_x; + tile_params.window_y = tile.window_y; + tile_params.window_width = tile.window_width; + tile_params.window_height = tile.window_height; + + tile_params.full_x = tile.x + buffer_params_.full_x; + tile_params.full_y = tile.y + buffer_params_.full_y; + tile_params.full_width = buffer_params_.full_width; + tile_params.full_height = buffer_params_.full_height; + + tile_params.update_offset_stride(); + + path_trace_->reset(buffer_params_, tile_params); + } + + const int resolution = render_work.resolution_divider; + const int width = max(1, buffer_params_.full_width / resolution); + const int height = max(1, buffer_params_.full_height / resolution); + + if (update_scene(width, height)) { + profiler.reset(scene->shaders.size(), scene->objects.size()); + } + progress.add_skip_time(update_timer, params.background); + } + + return render_work; +} + +bool Session::run_wait_for_work(const RenderWork &render_work) +{ + /* In an offline rendering there is no pause, and no tiles will mean the job is fully done. */ + if (params.background) { + return false; + } + + thread_scoped_lock pause_lock(pause_mutex_); + + if (!pause_ && render_work) { + /* Rendering is not paused and there is work to be done. No need to wait for anything. */ + return false; + } + + const bool no_work = !render_work; + update_status_time(pause_, no_work); + + /* Only leave the loop when rendering is not paused. But even if the current render is un-paused + * but there is nothing to render keep waiting until new work is added. */ + while (!cancel_) { + scoped_timer pause_timer; + + if (!pause_ && (render_work || new_work_added_ || delayed_reset_.do_reset)) { + break; + } + + /* Wait for either pause state changed, or extra samples added to render. */ + pause_cond_.wait(pause_lock); + + if (pause_) { + progress.add_skip_time(pause_timer, params.background); + } + + update_status_time(pause_, no_work); + progress.set_update(); + } + + new_work_added_ = false; + + return no_work; +} + +void Session::draw() +{ + path_trace_->draw(); +} + +int2 Session::get_effective_tile_size() const +{ + /* No support yet for baking with tiles. */ + if (!params.use_auto_tile || scene->bake_manager->get_baking()) { + return make_int2(buffer_params_.width, buffer_params_.height); + } + + /* TODO(sergey): Take available memory into account, and if there is enough memory do not tile + * and prefer optimal performance. */ + const int tile_size = tile_manager_.compute_render_tile_size(params.tile_size); + return make_int2(tile_size, tile_size); +} + +void Session::do_delayed_reset() +{ + if (!delayed_reset_.do_reset) { + return; + } + delayed_reset_.do_reset = false; + + params = delayed_reset_.session_params; + buffer_params_ = delayed_reset_.buffer_params; + + /* Store parameters used for buffers access outside of scene graph. */ + buffer_params_.samples = params.samples; + buffer_params_.exposure = scene->film->get_exposure(); + buffer_params_.use_approximate_shadow_catcher = + scene->film->get_use_approximate_shadow_catcher(); + buffer_params_.use_transparent_background = scene->background->get_transparent(); + + /* Tile and work scheduling. */ + tile_manager_.reset_scheduling(buffer_params_, get_effective_tile_size()); + render_scheduler_.reset(buffer_params_, params.samples); + + /* Passes. */ + /* When multiple tiles are used SAMPLE_COUNT pass is used to keep track of possible partial + * tile results. It is safe to use generic update function here which checks for changes since + * changes in tile settings re-creates session, which ensures film is fully updated on tile + * changes. */ + scene->film->update_passes(scene, tile_manager_.has_multiple_tiles()); + + /* Update for new state of scene and passes. */ + buffer_params_.update_passes(scene->passes); + tile_manager_.update(buffer_params_, scene); + + /* Progress. */ + progress.reset_sample(); + progress.set_total_pixel_samples(static_cast(buffer_params_.width) * + buffer_params_.height * params.samples); + + if (!params.background) { + progress.set_start_time(); + } + progress.set_render_start_time(); +} + +void Session::reset(const SessionParams &session_params, const BufferParams &buffer_params) +{ + { + thread_scoped_lock reset_lock(delayed_reset_.mutex); + thread_scoped_lock pause_lock(pause_mutex_); + + delayed_reset_.do_reset = true; + delayed_reset_.session_params = session_params; + delayed_reset_.buffer_params = buffer_params; + + path_trace_->cancel(); + } + + pause_cond_.notify_all(); +} + +void Session::set_samples(int samples) +{ + if (samples == params.samples) { + return; + } + + params.samples = samples; + + { + thread_scoped_lock pause_lock(pause_mutex_); + new_work_added_ = true; + } + + pause_cond_.notify_all(); +} + +void Session::set_time_limit(double time_limit) +{ + if (time_limit == params.time_limit) { + return; + } + + params.time_limit = time_limit; + + { + thread_scoped_lock pause_lock(pause_mutex_); + new_work_added_ = true; + } + + pause_cond_.notify_all(); +} + +void Session::set_pause(bool pause) +{ + bool notify = false; + + { + thread_scoped_lock pause_lock(pause_mutex_); + + if (pause != pause_) { + pause_ = pause; + notify = true; + } + } + + if (session_thread_) { + if (notify) { + pause_cond_.notify_all(); + } + } + else if (pause_) { + update_status_time(pause_); + } +} + +void Session::set_output_driver(unique_ptr driver) +{ + path_trace_->set_output_driver(move(driver)); +} + +void Session::set_display_driver(unique_ptr driver) +{ + path_trace_->set_display_driver(move(driver)); +} + +double Session::get_estimated_remaining_time() const +{ + const float completed = progress.get_progress(); + if (completed == 0.0f) { + return 0.0; + } + + double total_time, render_time; + progress.get_time(total_time, render_time); + double remaining = (1.0 - (double)completed) * (render_time / (double)completed); + + const double time_limit = render_scheduler_.get_time_limit(); + if (time_limit != 0.0) { + remaining = min(remaining, max(time_limit - render_time, 0.0)); + } + + return remaining; +} + +void Session::wait() +{ + if (session_thread_) { + session_thread_->join(); + delete session_thread_; + } + + session_thread_ = nullptr; +} + +bool Session::update_scene(int width, int height) +{ + /* Update camera if dimensions changed for progressive render. the camera + * knows nothing about progressive or cropped rendering, it just gets the + * image dimensions passed in. */ + Camera *cam = scene->camera; + cam->set_screen_size(width, height); + + const bool scene_update_result = scene->update(progress); + + path_trace_->load_kernels(); + path_trace_->alloc_work_memory(); + + return scene_update_result; +} + +static string status_append(const string &status, const string &suffix) +{ + string prefix = status; + if (!prefix.empty()) { + prefix += ", "; + } + return prefix + suffix; +} + +void Session::update_status_time(bool show_pause, bool show_done) +{ + string status, substatus; + + const int current_tile = progress.get_rendered_tiles(); + const int num_tiles = tile_manager_.get_num_tiles(); + + const int current_sample = progress.get_current_sample(); + const int num_samples = render_scheduler_.get_num_samples(); + + /* TIle. */ + if (tile_manager_.has_multiple_tiles()) { + substatus = status_append(substatus, + string_printf("Rendered %d/%d Tiles", current_tile, num_tiles)); + } + + /* Sample. */ + if (num_samples == Integrator::MAX_SAMPLES) { + substatus = status_append(substatus, string_printf("Sample %d", current_sample)); + } + else { + substatus = status_append(substatus, + string_printf("Sample %d/%d", current_sample, num_samples)); + } + + /* TODO(sergey): Denoising status from the path trace. */ + + if (show_pause) { + status = "Rendering Paused"; + } + else if (show_done) { + status = "Rendering Done"; + progress.set_end_time(); /* Save end time so that further calls to get_time are accurate. */ + } + else { + status = substatus; + substatus.clear(); + } + + progress.set_status(status, substatus); +} + +void Session::device_free() +{ + scene->device_free(); + path_trace_->device_free(); +} + +void Session::collect_statistics(RenderStats *render_stats) +{ + scene->collect_statistics(render_stats); + if (params.use_profiling && (params.device.type == DEVICE_CPU)) { + render_stats->collect_profiling(scene, profiler); + } +} + +/* -------------------------------------------------------------------- + * Full-frame on-disk storage. + */ + +void Session::process_full_buffer_from_disk(string_view filename) +{ + path_trace_->process_full_buffer_from_disk(filename); +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/session/session.h b/intern/cycles/session/session.h new file mode 100644 index 00000000000..5aa6df79ef1 --- /dev/null +++ b/intern/cycles/session/session.h @@ -0,0 +1,229 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __SESSION_H__ +#define __SESSION_H__ + +#include "device/device.h" +#include "integrator/render_scheduler.h" +#include "scene/shader.h" +#include "scene/stats.h" +#include "session/buffers.h" +#include "session/tile.h" + +#include "util/util_progress.h" +#include "util/util_stats.h" +#include "util/util_thread.h" +#include "util/util_unique_ptr.h" +#include "util/util_vector.h" + +CCL_NAMESPACE_BEGIN + +class BufferParams; +class Device; +class DeviceScene; +class DisplayDriver; +class OutputDriver; +class PathTrace; +class Progress; +class RenderBuffers; +class Scene; +class SceneParams; + +/* Session Parameters */ + +class SessionParams { + public: + DeviceInfo device; + + bool headless; + bool background; + + bool experimental; + int samples; + int pixel_size; + int threads; + + /* Limit in seconds for how long path tracing is allowed to happen. + * Zero means no limit is applied. */ + double time_limit; + + bool use_profiling; + + bool use_auto_tile; + int tile_size; + + ShadingSystem shadingsystem; + + SessionParams() + { + headless = false; + background = false; + + experimental = false; + samples = 1024; + pixel_size = 1; + threads = 0; + time_limit = 0.0; + + use_profiling = false; + + use_auto_tile = true; + tile_size = 2048; + + shadingsystem = SHADINGSYSTEM_SVM; + } + + bool modified(const SessionParams ¶ms) const + { + /* Modified means we have to recreate the session, any parameter changes + * that can be handled by an existing Session are omitted. */ + return !(device == params.device && headless == params.headless && + background == params.background && experimental == params.experimental && + pixel_size == params.pixel_size && threads == params.threads && + use_profiling == params.use_profiling && shadingsystem == params.shadingsystem && + use_auto_tile == params.use_auto_tile && tile_size == params.tile_size); + } +}; + +/* Session + * + * This is the class that contains the session thread, running the render + * control loop and dispatching tasks. */ + +class Session { + public: + Device *device; + Scene *scene; + Progress progress; + SessionParams params; + Stats stats; + Profiler profiler; + + /* Callback is invoked by tile manager whenever on-dist tiles storage file is closed after + * writing. Allows an engine integration to keep track of those files without worry about + * transferring the information when it needs to re-create session during rendering. */ + function full_buffer_written_cb; + + explicit Session(const SessionParams ¶ms, const SceneParams &scene_params); + ~Session(); + + void start(); + + /* When quick cancel is requested path tracing is cancels as soon as possible, without waiting + * for the buffer to be uniformly sampled. */ + void cancel(bool quick = false); + + void draw(); + void wait(); + + bool ready_to_reset(); + void reset(const SessionParams &session_params, const BufferParams &buffer_params); + + void set_pause(bool pause); + + void set_samples(int samples); + void set_time_limit(double time_limit); + + void set_output_driver(unique_ptr driver); + void set_display_driver(unique_ptr driver); + + double get_estimated_remaining_time() const; + + void device_free(); + + /* Returns the rendering progress or 0 if no progress can be determined + * (for example, when rendering with unlimited samples). */ + float get_progress(); + + void collect_statistics(RenderStats *stats); + + /* -------------------------------------------------------------------- + * Full-frame on-disk storage. + */ + + /* Read given full-frame file from disk, perform needed processing and write it to the software + * via the write callback. */ + void process_full_buffer_from_disk(string_view filename); + + protected: + struct DelayedReset { + thread_mutex mutex; + bool do_reset; + SessionParams session_params; + BufferParams buffer_params; + } delayed_reset_; + + void run(); + + /* Update for the new iteration of the main loop in run implementation (run_cpu and run_gpu). + * + * Will take care of the following things: + * - Delayed reset + * - Scene update + * - Tile manager advance + * - Render scheduler work request + * + * The updates are done in a proper order with proper locking around them, which guarantees + * that the device side of scene and render buffers are always in a consistent state. + * + * Returns render work which is to be rendered next. */ + RenderWork run_update_for_next_iteration(); + + /* Wait for rendering to be unpaused, or for new tiles for render to arrive. + * Returns true if new main render loop iteration is required after this function call. + * + * The `render_work` is the work which was scheduled by the render scheduler right before + * checking the pause. */ + bool run_wait_for_work(const RenderWork &render_work); + + void run_main_render_loop(); + + bool update_scene(int width, int height); + + void update_status_time(bool show_pause = false, bool show_done = false); + + void do_delayed_reset(); + + int2 get_effective_tile_size() const; + + thread *session_thread_; + + bool pause_ = false; + bool cancel_ = false; + bool new_work_added_ = false; + + thread_condition_variable pause_cond_; + thread_mutex pause_mutex_; + thread_mutex tile_mutex_; + thread_mutex buffers_mutex_; + + TileManager tile_manager_; + BufferParams buffer_params_; + + /* Render scheduler is used to get work to be rendered with the current big tile. */ + RenderScheduler render_scheduler_; + + /* Path tracer object. + * + * Is a single full-frame path tracer for interactive viewport rendering. + * A path tracer for the current big-tile for an offline rendering. */ + unique_ptr path_trace_; +}; + +CCL_NAMESPACE_END + +#endif /* __SESSION_H__ */ diff --git a/intern/cycles/session/tile.cpp b/intern/cycles/session/tile.cpp new file mode 100644 index 00000000000..59332530596 --- /dev/null +++ b/intern/cycles/session/tile.cpp @@ -0,0 +1,629 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "session/tile.h" + +#include + +#include "graph/node.h" +#include "scene/background.h" +#include "scene/film.h" +#include "scene/integrator.h" +#include "scene/scene.h" +#include "util/util_algorithm.h" +#include "util/util_foreach.h" +#include "util/util_logging.h" +#include "util/util_path.h" +#include "util/util_string.h" +#include "util/util_system.h" +#include "util/util_types.h" + +CCL_NAMESPACE_BEGIN + +/* -------------------------------------------------------------------- + * Internal functions. + */ + +static const char *ATTR_PASSES_COUNT = "cycles.passes.count"; +static const char *ATTR_PASS_SOCKET_PREFIX_FORMAT = "cycles.passes.%d."; +static const char *ATTR_BUFFER_SOCKET_PREFIX = "cycles.buffer."; +static const char *ATTR_DENOISE_SOCKET_PREFIX = "cycles.denoise."; + +/* Global counter of ToleManager object instances. */ +static std::atomic g_instance_index = 0; + +/* Construct names of EXR channels which will ensure order of all channels to match exact offsets + * in render buffers corresponding to the given passes. + * + * Returns `std` datatypes so that it can be assigned directly to the OIIO's `ImageSpec`. */ +static std::vector exr_channel_names_for_passes(const BufferParams &buffer_params) +{ + static const char *component_suffixes[] = {"R", "G", "B", "A"}; + + int pass_index = 0; + int num_channels = 0; + std::vector channel_names; + for (const BufferPass &pass : buffer_params.passes) { + if (pass.offset == PASS_UNUSED) { + continue; + } + + const PassInfo pass_info = pass.get_info(); + num_channels += pass_info.num_components; + + /* EXR canonically expects first part of channel names to be sorted alphabetically, which is + * not guaranteed to be the case with passes names. Assign a prefix based on the pass index + * with a fixed width to ensure ordering. This makes it possible to dump existing render + * buffers memory to disk and read it back without doing extra mapping. */ + const string prefix = string_printf("%08d", pass_index); + + const string channel_name_prefix = prefix + string(pass.name) + "."; + + for (int i = 0; i < pass_info.num_components; ++i) { + channel_names.push_back(channel_name_prefix + component_suffixes[i]); + } + + ++pass_index; + } + + return channel_names; +} + +inline string node_socket_attribute_name(const SocketType &socket, const string &attr_name_prefix) +{ + return attr_name_prefix + string(socket.name); +} + +template +static bool node_socket_generic_to_image_spec_atttributes( + ImageSpec *image_spec, + const Node *node, + const SocketType &socket, + const string &attr_name_prefix, + const ValidateValueFunc &validate_value_func, + const GetValueFunc &get_value_func) +{ + if (!validate_value_func(node, socket)) { + return false; + } + + image_spec->attribute(node_socket_attribute_name(socket, attr_name_prefix), + get_value_func(node, socket)); + + return true; +} + +static bool node_socket_to_image_spec_atttributes(ImageSpec *image_spec, + const Node *node, + const SocketType &socket, + const string &attr_name_prefix) +{ + const string attr_name = node_socket_attribute_name(socket, attr_name_prefix); + + switch (socket.type) { + case SocketType::ENUM: { + const ustring value = node->get_string(socket); + + /* Validate that the node is consistent with the node type definition. */ + const NodeEnum &enum_values = *socket.enum_values; + if (!enum_values.exists(value)) { + LOG(DFATAL) << "Node enum contains invalid value " << value; + return false; + } + + image_spec->attribute(attr_name, value); + + return true; + } + + case SocketType::STRING: + image_spec->attribute(attr_name, node->get_string(socket)); + return true; + + case SocketType::INT: + image_spec->attribute(attr_name, node->get_int(socket)); + return true; + + case SocketType::FLOAT: + image_spec->attribute(attr_name, node->get_float(socket)); + return true; + + case SocketType::BOOLEAN: + image_spec->attribute(attr_name, node->get_bool(socket)); + return true; + + default: + LOG(DFATAL) << "Unhandled socket type " << socket.type << ", should never happen."; + return false; + } +} + +static bool node_socket_from_image_spec_atttributes(Node *node, + const SocketType &socket, + const ImageSpec &image_spec, + const string &attr_name_prefix) +{ + const string attr_name = node_socket_attribute_name(socket, attr_name_prefix); + + switch (socket.type) { + case SocketType::ENUM: { + /* TODO(sergey): Avoid construction of `ustring` by using `string_view` in the Node API. */ + const ustring value(image_spec.get_string_attribute(attr_name, "")); + + /* Validate that the node is consistent with the node type definition. */ + const NodeEnum &enum_values = *socket.enum_values; + if (!enum_values.exists(value)) { + LOG(ERROR) << "Invalid enumerator value " << value; + return false; + } + + node->set(socket, enum_values[value]); + + return true; + } + + case SocketType::STRING: + /* TODO(sergey): Avoid construction of `ustring` by using `string_view` in the Node API. */ + node->set(socket, ustring(image_spec.get_string_attribute(attr_name, ""))); + return true; + + case SocketType::INT: + node->set(socket, image_spec.get_int_attribute(attr_name, 0)); + return true; + + case SocketType::FLOAT: + node->set(socket, image_spec.get_float_attribute(attr_name, 0)); + return true; + + case SocketType::BOOLEAN: + node->set(socket, static_cast(image_spec.get_int_attribute(attr_name, 0))); + return true; + + default: + LOG(DFATAL) << "Unhandled socket type " << socket.type << ", should never happen."; + return false; + } +} + +static bool node_to_image_spec_atttributes(ImageSpec *image_spec, + const Node *node, + const string &attr_name_prefix) +{ + for (const SocketType &socket : node->type->inputs) { + if (!node_socket_to_image_spec_atttributes(image_spec, node, socket, attr_name_prefix)) { + return false; + } + } + + return true; +} + +static bool node_from_image_spec_atttributes(Node *node, + const ImageSpec &image_spec, + const string &attr_name_prefix) +{ + for (const SocketType &socket : node->type->inputs) { + if (!node_socket_from_image_spec_atttributes(node, socket, image_spec, attr_name_prefix)) { + return false; + } + } + + return true; +} + +static bool buffer_params_to_image_spec_atttributes(ImageSpec *image_spec, + const BufferParams &buffer_params) +{ + if (!node_to_image_spec_atttributes(image_spec, &buffer_params, ATTR_BUFFER_SOCKET_PREFIX)) { + return false; + } + + /* Passes storage is not covered by the node socket. so "expand" the loop manually. */ + + const int num_passes = buffer_params.passes.size(); + image_spec->attribute(ATTR_PASSES_COUNT, num_passes); + + for (int pass_index = 0; pass_index < num_passes; ++pass_index) { + const string attr_name_prefix = string_printf(ATTR_PASS_SOCKET_PREFIX_FORMAT, pass_index); + + const BufferPass *pass = &buffer_params.passes[pass_index]; + if (!node_to_image_spec_atttributes(image_spec, pass, attr_name_prefix)) { + return false; + } + } + + return true; +} + +static bool buffer_params_from_image_spec_atttributes(BufferParams *buffer_params, + const ImageSpec &image_spec) +{ + if (!node_from_image_spec_atttributes(buffer_params, image_spec, ATTR_BUFFER_SOCKET_PREFIX)) { + return false; + } + + /* Passes storage is not covered by the node socket. so "expand" the loop manually. */ + + const int num_passes = image_spec.get_int_attribute(ATTR_PASSES_COUNT, 0); + if (num_passes == 0) { + LOG(ERROR) << "Missing passes count attribute."; + return false; + } + + for (int pass_index = 0; pass_index < num_passes; ++pass_index) { + const string attr_name_prefix = string_printf(ATTR_PASS_SOCKET_PREFIX_FORMAT, pass_index); + + BufferPass pass; + + if (!node_from_image_spec_atttributes(&pass, image_spec, attr_name_prefix)) { + return false; + } + + buffer_params->passes.emplace_back(std::move(pass)); + } + + buffer_params->update_passes(); + + return true; +} + +/* Configure image specification for the given buffer parameters and passes. + * + * Image channels will be strictly ordered to match content of corresponding buffer, and the + * metadata will be set so that the render buffers and passes can be reconstructed from it. + * + * If the tile size different from (0, 0) the image specification will be configured to use the + * given tile size for tiled IO. */ +static bool configure_image_spec_from_buffer(ImageSpec *image_spec, + const BufferParams &buffer_params, + const int2 tile_size = make_int2(0, 0)) +{ + const std::vector channel_names = exr_channel_names_for_passes(buffer_params); + const int num_channels = channel_names.size(); + + *image_spec = ImageSpec( + buffer_params.width, buffer_params.height, num_channels, TypeDesc::FLOAT); + + image_spec->channelnames = move(channel_names); + + if (!buffer_params_to_image_spec_atttributes(image_spec, buffer_params)) { + return false; + } + + if (tile_size.x != 0 || tile_size.y != 0) { + DCHECK_GT(tile_size.x, 0); + DCHECK_GT(tile_size.y, 0); + + image_spec->tile_width = min(TileManager::IMAGE_TILE_SIZE, tile_size.x); + image_spec->tile_height = min(TileManager::IMAGE_TILE_SIZE, tile_size.y); + } + + return true; +} + +/* -------------------------------------------------------------------- + * Tile Manager. + */ + +TileManager::TileManager() +{ + /* Use process ID to separate different processes. + * To ensure uniqueness from within a process use combination of object address and instance + * index. This solves problem of possible object re-allocation at the same time, and solves + * possible conflict when the counter overflows while there are still active instances of the + * class. */ + const int tile_manager_id = g_instance_index.fetch_add(1, std::memory_order_relaxed); + tile_file_unique_part_ = to_string(system_self_process_id()) + "-" + + to_string(reinterpret_cast(this)) + "-" + + to_string(tile_manager_id); +} + +TileManager::~TileManager() +{ +} + +int TileManager::compute_render_tile_size(const int suggested_tile_size) const +{ + /* Must be a multiple of IMAGE_TILE_SIZE so that we can write render tiles into the image file + * aligned on image tile boundaries. We can't set IMAGE_TILE_SIZE equal to the render tile size + * because too big tile size leads to integer overflow inside OpenEXR. */ + return (suggested_tile_size <= IMAGE_TILE_SIZE) ? suggested_tile_size : + align_up(suggested_tile_size, IMAGE_TILE_SIZE); +} + +void TileManager::reset_scheduling(const BufferParams ¶ms, int2 tile_size) +{ + VLOG(3) << "Using tile size of " << tile_size; + + close_tile_output(); + + tile_size_ = tile_size; + + tile_state_.num_tiles_x = divide_up(params.width, tile_size_.x); + tile_state_.num_tiles_y = divide_up(params.height, tile_size_.y); + tile_state_.num_tiles = tile_state_.num_tiles_x * tile_state_.num_tiles_y; + + tile_state_.next_tile_index = 0; + + tile_state_.current_tile = Tile(); +} + +void TileManager::update(const BufferParams ¶ms, const Scene *scene) +{ + DCHECK_NE(params.pass_stride, -1); + + buffer_params_ = params; + + /* TODO(sergey): Proper Error handling, so that if configuration has failed we don't attempt to + * write to a partially configured file. */ + configure_image_spec_from_buffer(&write_state_.image_spec, buffer_params_, tile_size_); + + const DenoiseParams denoise_params = scene->integrator->get_denoise_params(); + const AdaptiveSampling adaptive_sampling = scene->integrator->get_adaptive_sampling(); + + node_to_image_spec_atttributes( + &write_state_.image_spec, &denoise_params, ATTR_DENOISE_SOCKET_PREFIX); + + if (adaptive_sampling.use) { + overscan_ = 4; + } + else { + overscan_ = 0; + } +} + +bool TileManager::done() +{ + return tile_state_.next_tile_index == tile_state_.num_tiles; +} + +bool TileManager::next() +{ + if (done()) { + return false; + } + + tile_state_.current_tile = get_tile_for_index(tile_state_.next_tile_index); + + ++tile_state_.next_tile_index; + + return true; +} + +Tile TileManager::get_tile_for_index(int index) const +{ + /* TODO(sergey): Consider using hilbert spiral, or. maybe, even configurable. Not sure this + * brings a lot of value since this is only applicable to BIG tiles. */ + + const int tile_index_y = index / tile_state_.num_tiles_x; + const int tile_index_x = index - tile_index_y * tile_state_.num_tiles_x; + + const int tile_window_x = tile_index_x * tile_size_.x; + const int tile_window_y = tile_index_y * tile_size_.y; + + Tile tile; + + tile.x = max(0, tile_window_x - overscan_); + tile.y = max(0, tile_window_y - overscan_); + + tile.window_x = tile_window_x - tile.x; + tile.window_y = tile_window_y - tile.y; + tile.window_width = min(tile_size_.x, buffer_params_.width - tile_window_x); + tile.window_height = min(tile_size_.y, buffer_params_.height - tile_window_y); + + tile.width = min(buffer_params_.width - tile.x, tile.window_x + tile.window_width + overscan_); + tile.height = min(buffer_params_.height - tile.y, + tile.window_y + tile.window_height + overscan_); + + return tile; +} + +const Tile &TileManager::get_current_tile() const +{ + return tile_state_.current_tile; +} + +const int2 TileManager::get_size() const +{ + return make_int2(buffer_params_.width, buffer_params_.height); +} + +bool TileManager::open_tile_output() +{ + write_state_.filename = path_temp_get("cycles-tile-buffer-" + tile_file_unique_part_ + "-" + + to_string(write_state_.tile_file_index) + ".exr"); + + write_state_.tile_out = ImageOutput::create(write_state_.filename); + if (!write_state_.tile_out) { + LOG(ERROR) << "Error creating image output for " << write_state_.filename; + return false; + } + + if (!write_state_.tile_out->supports("tiles")) { + LOG(ERROR) << "Progress tile file format does not support tiling."; + return false; + } + + if (!write_state_.tile_out->open(write_state_.filename, write_state_.image_spec)) { + LOG(ERROR) << "Error opening tile file: " << write_state_.tile_out->geterror(); + write_state_.tile_out = nullptr; + return false; + } + + write_state_.num_tiles_written = 0; + + VLOG(3) << "Opened tile file " << write_state_.filename; + + return true; +} + +bool TileManager::close_tile_output() +{ + if (!write_state_.tile_out) { + return true; + } + + const bool success = write_state_.tile_out->close(); + write_state_.tile_out = nullptr; + + if (!success) { + LOG(ERROR) << "Error closing tile file."; + return false; + } + + VLOG(3) << "Tile output is closed."; + + return true; +} + +bool TileManager::write_tile(const RenderBuffers &tile_buffers) +{ + if (!write_state_.tile_out) { + if (!open_tile_output()) { + return false; + } + } + + DCHECK_EQ(tile_buffers.params.pass_stride, buffer_params_.pass_stride); + + vector pixel_storage; + + const BufferParams &tile_params = tile_buffers.params; + + const int tile_x = tile_params.full_x - buffer_params_.full_x + tile_params.window_x; + const int tile_y = tile_params.full_y - buffer_params_.full_y + tile_params.window_y; + + const int64_t pass_stride = tile_params.pass_stride; + const int64_t tile_row_stride = tile_params.width * pass_stride; + + const int64_t xstride = pass_stride * sizeof(float); + const int64_t ystride = xstride * tile_params.width; + const int64_t zstride = ystride * tile_params.height; + + const float *pixels = tile_buffers.buffer.data() + tile_params.window_x * pass_stride + + tile_params.window_y * tile_row_stride; + + VLOG(3) << "Write tile at " << tile_x << ", " << tile_y; + + /* The image tile sizes in the OpenEXR file are different from the size of our big tiles. The + * write_tiles() method expects a contiguous image region that will be split into tiles + * internally. OpenEXR expects the size of this region to be a multiple of the tile size, + * however OpenImageIO automatically adds the required padding. + * + * The only thing we have to ensure is that the tile_x and tile_y are a multiple of the + * image tile size, which happens in compute_render_tile_size. */ + if (!write_state_.tile_out->write_tiles(tile_x, + tile_x + tile_params.window_width, + tile_y, + tile_y + tile_params.window_height, + 0, + 1, + TypeDesc::FLOAT, + pixels, + xstride, + ystride, + zstride)) { + LOG(ERROR) << "Error writing tile " << write_state_.tile_out->geterror(); + return false; + } + + ++write_state_.num_tiles_written; + + return true; +} + +void TileManager::finish_write_tiles() +{ + if (!write_state_.tile_out) { + /* None of the tiles were written hence the file was not created. + * Avoid creation of fully empty file since it is redundant. */ + return; + } + + /* EXR expects all tiles to present in file. So explicitly write missing tiles as all-zero. */ + if (write_state_.num_tiles_written < tile_state_.num_tiles) { + vector pixel_storage(tile_size_.x * tile_size_.y * buffer_params_.pass_stride); + + for (int tile_index = write_state_.num_tiles_written; tile_index < tile_state_.num_tiles; + ++tile_index) { + const Tile tile = get_tile_for_index(tile_index); + + const int tile_x = tile.x + tile.window_x; + const int tile_y = tile.y + tile.window_y; + + VLOG(3) << "Write dummy tile at " << tile_x << ", " << tile_y; + + write_state_.tile_out->write_tiles(tile_x, + tile_x + tile.window_width, + tile_y, + tile_y + tile.window_height, + 0, + 1, + TypeDesc::FLOAT, + pixel_storage.data()); + } + } + + close_tile_output(); + + if (full_buffer_written_cb) { + full_buffer_written_cb(write_state_.filename); + } + + /* Advance the counter upon explicit finish of the file. + * Makes it possible to re-use tile manager for another scene, and avoids unnecessary increments + * of the tile-file-within-session index. */ + ++write_state_.tile_file_index; + + write_state_.filename = ""; +} + +bool TileManager::read_full_buffer_from_disk(const string_view filename, + RenderBuffers *buffers, + DenoiseParams *denoise_params) +{ + unique_ptr in(ImageInput::open(filename)); + if (!in) { + LOG(ERROR) << "Error opening tile file " << filename; + return false; + } + + const ImageSpec &image_spec = in->spec(); + + BufferParams buffer_params; + if (!buffer_params_from_image_spec_atttributes(&buffer_params, image_spec)) { + return false; + } + buffers->reset(buffer_params); + + if (!node_from_image_spec_atttributes(denoise_params, image_spec, ATTR_DENOISE_SOCKET_PREFIX)) { + return false; + } + + if (!in->read_image(TypeDesc::FLOAT, buffers->buffer.data())) { + LOG(ERROR) << "Error reading pixels from the tile file " << in->geterror(); + return false; + } + + if (!in->close()) { + LOG(ERROR) << "Error closing tile file " << in->geterror(); + return false; + } + + return true; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/session/tile.h b/intern/cycles/session/tile.h new file mode 100644 index 00000000000..37a02081a53 --- /dev/null +++ b/intern/cycles/session/tile.h @@ -0,0 +1,182 @@ +/* + * Copyright 2011-2013 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "session/buffers.h" +#include "util/util_image.h" +#include "util/util_string.h" +#include "util/util_unique_ptr.h" + +CCL_NAMESPACE_BEGIN + +class DenoiseParams; +class Scene; + +/* -------------------------------------------------------------------- + * Tile. + */ + +class Tile { + public: + int x = 0, y = 0; + int width = 0, height = 0; + + int window_x = 0, window_y = 0; + int window_width = 0, window_height = 0; + + Tile() + { + } +}; + +/* -------------------------------------------------------------------- + * Tile Manager. + */ + +class TileManager { + public: + /* This callback is invoked by whenever on-dist tiles storage file is closed after writing. */ + function full_buffer_written_cb; + + TileManager(); + ~TileManager(); + + TileManager(const TileManager &other) = delete; + TileManager(TileManager &&other) noexcept = delete; + TileManager &operator=(const TileManager &other) = delete; + TileManager &operator=(TileManager &&other) = delete; + + /* Reset current progress and start new rendering of the full-frame parameters in tiles of the + * given size. + * Only touches scheduling-related state of the tile manager. */ + /* TODO(sergey): Consider using tile area instead of exact size to help dealing with extreme + * cases of stretched renders. */ + void reset_scheduling(const BufferParams ¶ms, int2 tile_size); + + /* Update for the known buffer passes and scene parameters. + * Will store all parameters needed for buffers access outside of the scene graph. */ + void update(const BufferParams ¶ms, const Scene *scene); + + inline int get_num_tiles() const + { + return tile_state_.num_tiles; + } + + inline bool has_multiple_tiles() const + { + return tile_state_.num_tiles > 1; + } + + inline int get_tile_overscan() const + { + return overscan_; + } + + bool next(); + bool done(); + + const Tile &get_current_tile() const; + const int2 get_size() const; + + /* Write render buffer of a tile to a file on disk. + * + * Opens file for write when first tile is written. + * + * Returns true on success. */ + bool write_tile(const RenderBuffers &tile_buffers); + + /* Inform the tile manager that no more tiles will be written to disk. + * The file will be considered final, all handles to it will be closed. */ + void finish_write_tiles(); + + /* Check whether any tile has been written to disk. */ + inline bool has_written_tiles() const + { + return write_state_.num_tiles_written != 0; + } + + /* Read full frame render buffer from tiles file on disk. + * + * Returns true on success. */ + bool read_full_buffer_from_disk(string_view filename, + RenderBuffers *buffers, + DenoiseParams *denoise_params); + + /* Compute valid tile size compatible with image saving. */ + int compute_render_tile_size(const int suggested_tile_size) const; + + /* Tile size in the image file. */ + static const int IMAGE_TILE_SIZE = 128; + + protected: + /* Get tile configuration for its index. + * The tile index must be within [0, state_.tile_state_). */ + Tile get_tile_for_index(int index) const; + + bool open_tile_output(); + bool close_tile_output(); + + /* Part of an on-disk tile file name which avoids conflicts between several Cycles instances or + * several sessions. */ + string tile_file_unique_part_; + + int2 tile_size_ = make_int2(0, 0); + + /* Number of extra pixels around the actual tile to render. */ + int overscan_ = 0; + + BufferParams buffer_params_; + + /* Tile scheduling state. */ + struct { + int num_tiles_x = 0; + int num_tiles_y = 0; + int num_tiles = 0; + + int next_tile_index; + + Tile current_tile; + } tile_state_; + + /* State of tiles writing to a file on disk. */ + struct { + /* Index of a tile file used during the current session. + * This number is used for the file name construction, making it possible to render several + * scenes throughout duration of the session and keep all results available for later read + * access. */ + int tile_file_index = 0; + + string filename; + + /* Specification of the tile image which corresponds to the buffer parameters. + * Contains channels configured according to the passes configuration in the path traces. + * + * Output images are saved using this specification, input images are expected to have matched + * specification. */ + ImageSpec image_spec; + + /* Output handle for the tile file. + * + * This file can not be closed until all tiles has been provided, so the handle is stored in + * the state and is created whenever writing is requested. */ + unique_ptr tile_out; + + int num_tiles_written = 0; + } write_state_; +}; + +CCL_NAMESPACE_END -- cgit v1.2.3