diff options
Diffstat (limited to 'intern/cycles/render')
42 files changed, 3475 insertions, 3786 deletions
diff --git a/intern/cycles/render/CMakeLists.txt b/intern/cycles/render/CMakeLists.txt index feead27c5ca..6edb5261b32 100644 --- a/intern/cycles/render/CMakeLists.txt +++ b/intern/cycles/render/CMakeLists.txt @@ -32,10 +32,10 @@ set(SRC camera.cpp colorspace.cpp constant_fold.cpp - coverage.cpp denoising.cpp film.cpp geometry.cpp + gpu_display.cpp graph.cpp hair.cpp image.cpp @@ -54,6 +54,7 @@ set(SRC object.cpp osl.cpp particles.cpp + pass.cpp curves.cpp scene.cpp session.cpp @@ -76,10 +77,10 @@ set(SRC_HEADERS camera.h colorspace.h constant_fold.h - coverage.h denoising.h film.h geometry.h + gpu_display.h graph.h hair.h image.h @@ -95,6 +96,7 @@ set(SRC_HEADERS object.h osl.h particles.h + pass.h procedural.h curves.h scene.h @@ -111,6 +113,7 @@ set(SRC_HEADERS set(LIB cycles_bvh cycles_device + cycles_integrator cycles_subd cycles_util ) diff --git a/intern/cycles/render/background.cpp b/intern/cycles/render/background.cpp index b925e755434..ae6290ac27b 100644 --- a/intern/cycles/render/background.cpp +++ b/intern/cycles/render/background.cpp @@ -34,11 +34,7 @@ NODE_DEFINE(Background) { NodeType *type = NodeType::add("background", create); - SOCKET_FLOAT(ao_factor, "AO Factor", 0.0f); - SOCKET_FLOAT(ao_distance, "AO Distance", FLT_MAX); - SOCKET_BOOLEAN(use_shader, "Use Shader", true); - SOCKET_BOOLEAN(use_ao, "Use AO", false); SOCKET_UINT(visibility, "Visibility", PATH_RAY_ALL_VISIBILITY); SOCKET_BOOLEAN(transparent, "Transparent", false); @@ -80,10 +76,6 @@ void Background::device_update(Device *device, DeviceScene *dscene, Scene *scene /* set shader index and transparent option */ KernelBackground *kbackground = &dscene->data.background; - kbackground->ao_factor = (use_ao) ? ao_factor : 0.0f; - kbackground->ao_bounces_factor = ao_factor; - kbackground->ao_distance = ao_distance; - kbackground->transparent = transparent; kbackground->surface_shader = scene->shader_manager->get_shader_id(bg_shader); @@ -138,10 +130,6 @@ void Background::tag_update(Scene *scene) * and to avoid doing unnecessary updates anywhere else. */ tag_use_shader_modified(); } - - if (ao_factor_is_modified() || use_ao_is_modified()) { - scene->integrator->tag_update(scene, Integrator::BACKGROUND_AO_MODIFIED); - } } Shader *Background::get_shader(const Scene *scene) diff --git a/intern/cycles/render/background.h b/intern/cycles/render/background.h index e89ffbc2445..2f7ef0f7737 100644 --- a/intern/cycles/render/background.h +++ b/intern/cycles/render/background.h @@ -32,11 +32,7 @@ class Background : public Node { public: NODE_DECLARE - NODE_SOCKET_API(float, ao_factor) - NODE_SOCKET_API(float, ao_distance) - NODE_SOCKET_API(bool, use_shader) - NODE_SOCKET_API(bool, use_ao) NODE_SOCKET_API(uint, visibility) NODE_SOCKET_API(Shader *, shader) diff --git a/intern/cycles/render/bake.cpp b/intern/cycles/render/bake.cpp index 317a3937cab..54e496caed6 100644 --- a/intern/cycles/render/bake.cpp +++ b/intern/cycles/render/bake.cpp @@ -26,58 +26,8 @@ CCL_NAMESPACE_BEGIN -static int aa_samples(Scene *scene, Object *object, ShaderEvalType type) -{ - if (type == SHADER_EVAL_UV || type == SHADER_EVAL_ROUGHNESS) { - return 1; - } - else if (type == SHADER_EVAL_NORMAL) { - /* Only antialias normal if mesh has bump mapping. */ - if (object->get_geometry()) { - foreach (Node *node, object->get_geometry()->get_used_shaders()) { - Shader *shader = static_cast<Shader *>(node); - if (shader->has_bump) { - return scene->integrator->get_aa_samples(); - } - } - } - - return 1; - } - else { - return scene->integrator->get_aa_samples(); - } -} - -/* Keep it synced with kernel_bake.h logic */ -static int shader_type_to_pass_filter(ShaderEvalType type, int pass_filter) -{ - const int component_flags = pass_filter & - (BAKE_FILTER_DIRECT | BAKE_FILTER_INDIRECT | BAKE_FILTER_COLOR); - - switch (type) { - case SHADER_EVAL_AO: - return BAKE_FILTER_AO; - case SHADER_EVAL_SHADOW: - return BAKE_FILTER_DIRECT; - case SHADER_EVAL_DIFFUSE: - return BAKE_FILTER_DIFFUSE | component_flags; - case SHADER_EVAL_GLOSSY: - return BAKE_FILTER_GLOSSY | component_flags; - case SHADER_EVAL_TRANSMISSION: - return BAKE_FILTER_TRANSMISSION | component_flags; - case SHADER_EVAL_COMBINED: - return pass_filter; - default: - return 0; - } -} - BakeManager::BakeManager() { - type = SHADER_EVAL_BAKE; - pass_filter = 0; - need_update_ = true; } @@ -85,32 +35,14 @@ BakeManager::~BakeManager() { } -bool BakeManager::get_baking() +bool BakeManager::get_baking() const { return !object_name.empty(); } -void BakeManager::set(Scene *scene, - const std::string &object_name_, - ShaderEvalType type_, - int pass_filter_) +void BakeManager::set(Scene *scene, const std::string &object_name_) { object_name = object_name_; - type = type_; - pass_filter = shader_type_to_pass_filter(type_, pass_filter_); - - Pass::add(PASS_BAKE_PRIMITIVE, scene->passes); - Pass::add(PASS_BAKE_DIFFERENTIAL, scene->passes); - - if (type == SHADER_EVAL_UV) { - /* force UV to be available */ - Pass::add(PASS_UV, scene->passes); - } - - /* force use_light_pass to be true if we bake more than just colors */ - if (pass_filter & ~BAKE_FILTER_COLOR) { - Pass::add(PASS_LIGHT, scene->passes); - } /* create device and update scene */ scene->film->tag_modified(); @@ -127,29 +59,29 @@ void BakeManager::device_update(Device * /*device*/, if (!need_update()) return; - scoped_callback_timer timer([scene](double time) { - if (scene->update_stats) { - scene->update_stats->bake.times.add_entry({"device_update", time}); - } - }); - - KernelIntegrator *kintegrator = &dscene->data.integrator; KernelBake *kbake = &dscene->data.bake; + memset(kbake, 0, sizeof(*kbake)); - kbake->type = type; - kbake->pass_filter = pass_filter; - - int object_index = 0; - foreach (Object *object, scene->objects) { - const Geometry *geom = object->get_geometry(); - if (object->name == object_name && geom->geometry_type == Geometry::MESH) { - kbake->object_index = object_index; - kbake->tri_offset = geom->prim_offset; - kintegrator->aa_samples = aa_samples(scene, object, type); - break; - } + if (!object_name.empty()) { + scoped_callback_timer timer([scene](double time) { + if (scene->update_stats) { + scene->update_stats->bake.times.add_entry({"device_update", time}); + } + }); + + kbake->use = true; - object_index++; + int object_index = 0; + foreach (Object *object, scene->objects) { + const Geometry *geom = object->get_geometry(); + if (object->name == object_name && geom->geometry_type == Geometry::MESH) { + kbake->object_index = object_index; + kbake->tri_offset = geom->prim_offset; + break; + } + + object_index++; + } } need_update_ = false; diff --git a/intern/cycles/render/bake.h b/intern/cycles/render/bake.h index 655b9b1cf7e..39e504490c2 100644 --- a/intern/cycles/render/bake.h +++ b/intern/cycles/render/bake.h @@ -30,8 +30,8 @@ class BakeManager { BakeManager(); ~BakeManager(); - void set(Scene *scene, const std::string &object_name, ShaderEvalType type, int pass_filter); - bool get_baking(); + void set(Scene *scene, const std::string &object_name); + bool get_baking() const; void device_update(Device *device, DeviceScene *dscene, Scene *scene, Progress &progress); void device_free(Device *device, DeviceScene *dscene); @@ -42,8 +42,6 @@ class BakeManager { private: bool need_update_; - ShaderEvalType type; - int pass_filter; std::string object_name; }; diff --git a/intern/cycles/render/buffers.cpp b/intern/cycles/render/buffers.cpp index fcfad58995e..1882510cd70 100644 --- a/intern/cycles/render/buffers.cpp +++ b/intern/cycles/render/buffers.cpp @@ -28,537 +28,335 @@ CCL_NAMESPACE_BEGIN -/* Buffer Params */ +/* -------------------------------------------------------------------- + * Convert part information to an index of `BufferParams::pass_offset_`. + */ -BufferParams::BufferParams() +static int pass_type_mode_to_index(PassType pass_type, PassMode mode) { - width = 0; - height = 0; - - full_x = 0; - full_y = 0; - full_width = 0; - full_height = 0; + int index = static_cast<int>(pass_type) * 2; - denoising_data_pass = false; - denoising_clean_pass = false; - denoising_prefiltered_pass = false; + if (mode == PassMode::DENOISED) { + ++index; + } - Pass::add(PASS_COMBINED, passes); + return index; } -void BufferParams::get_offset_stride(int &offset, int &stride) +static int pass_to_index(const BufferPass &pass) { - offset = -(full_x + full_y * width); - stride = width; + return pass_type_mode_to_index(pass.type, pass.mode); } -bool BufferParams::modified(const BufferParams ¶ms) -{ - return !(full_x == params.full_x && full_y == params.full_y && width == params.width && - height == params.height && full_width == params.full_width && - full_height == params.full_height && Pass::equals(passes, params.passes) && - denoising_data_pass == params.denoising_data_pass && - denoising_clean_pass == params.denoising_clean_pass && - denoising_prefiltered_pass == params.denoising_prefiltered_pass); -} +/* -------------------------------------------------------------------- + * Buffer pass. + */ -int BufferParams::get_passes_size() +NODE_DEFINE(BufferPass) { - int size = 0; + NodeType *type = NodeType::add("buffer_pass", create); - for (size_t i = 0; i < passes.size(); i++) - size += passes[i].components; + const NodeEnum *pass_type_enum = Pass::get_type_enum(); + const NodeEnum *pass_mode_enum = Pass::get_mode_enum(); - if (denoising_data_pass) { - size += DENOISING_PASS_SIZE_BASE; - if (denoising_clean_pass) - size += DENOISING_PASS_SIZE_CLEAN; - if (denoising_prefiltered_pass) - size += DENOISING_PASS_SIZE_PREFILTERED; - } + SOCKET_ENUM(type, "Type", *pass_type_enum, PASS_COMBINED); + SOCKET_ENUM(mode, "Mode", *pass_mode_enum, static_cast<int>(PassMode::DENOISED)); + SOCKET_STRING(name, "Name", ustring()); + SOCKET_BOOLEAN(include_albedo, "Include Albedo", false); - return align_up(size, 4); -} + SOCKET_INT(offset, "Offset", -1); -int BufferParams::get_denoising_offset() -{ - int offset = 0; - - for (size_t i = 0; i < passes.size(); i++) - offset += passes[i].components; - - return offset; + return type; } -int BufferParams::get_denoising_prefiltered_offset() +BufferPass::BufferPass() : Node(get_node_type()) { - assert(denoising_prefiltered_pass); - - int offset = get_denoising_offset(); - - offset += DENOISING_PASS_SIZE_BASE; - if (denoising_clean_pass) { - offset += DENOISING_PASS_SIZE_CLEAN; - } - - return offset; } -/* Render Buffer Task */ - -RenderTile::RenderTile() +BufferPass::BufferPass(const Pass *scene_pass) + : Node(get_node_type()), + type(scene_pass->get_type()), + mode(scene_pass->get_mode()), + name(scene_pass->get_name()), + include_albedo(scene_pass->get_include_albedo()) { - x = 0; - y = 0; - w = 0; - h = 0; - - sample = 0; - start_sample = 0; - num_samples = 0; - resolution = 0; - - offset = 0; - stride = 0; - - buffer = 0; - - buffers = NULL; - stealing_state = NO_STEALING; } -/* Render Buffers */ - -RenderBuffers::RenderBuffers(Device *device) - : buffer(device, "RenderBuffers", MEM_READ_WRITE), - map_neighbor_copied(false), - render_time(0.0f) +PassInfo BufferPass::get_info() const { + return Pass::get_info(type, include_albedo); } -RenderBuffers::~RenderBuffers() -{ - buffer.free(); -} +/* -------------------------------------------------------------------- + * Buffer Params. + */ -void RenderBuffers::reset(BufferParams ¶ms_) +NODE_DEFINE(BufferParams) { - params = params_; - - /* re-allocate buffer */ - buffer.alloc(params.width * params.get_passes_size(), params.height); - buffer.zero_to_device(); + NodeType *type = NodeType::add("buffer_params", create); + + SOCKET_INT(width, "Width", 0); + SOCKET_INT(height, "Height", 0); + + SOCKET_INT(full_x, "Full X", 0); + SOCKET_INT(full_y, "Full Y", 0); + SOCKET_INT(full_width, "Full Width", 0); + SOCKET_INT(full_height, "Full Height", 0); + + SOCKET_STRING(layer, "Layer", ustring()); + SOCKET_STRING(view, "View", ustring()); + SOCKET_INT(samples, "Samples", 0); + SOCKET_FLOAT(exposure, "Exposure", 1.0f); + SOCKET_BOOLEAN(use_approximate_shadow_catcher, "Use Approximate Shadow Catcher", false); + SOCKET_BOOLEAN(use_transparent_background, "Transparent Background", false); + + /* Notes: + * - Skip passes since they do not follow typical container socket definition. + * Might look into covering those as a socket in the future. + * + * - Skip offset, stride, and pass stride since those can be delivered from the passes and + * rest of the sockets. */ + + return type; } -void RenderBuffers::zero() +BufferParams::BufferParams() : Node(get_node_type()) { - buffer.zero_to_device(); + reset_pass_offset(); } -bool RenderBuffers::copy_from_device() +void BufferParams::update_passes() { - if (!buffer.device_pointer) - return false; - - buffer.copy_from_device(0, params.width * params.get_passes_size(), params.height); - - return true; -} - -bool RenderBuffers::get_denoising_pass_rect( - int type, float exposure, int sample, int components, float *pixels) -{ - if (buffer.data() == NULL) { - return false; - } - - float scale = 1.0f; - float alpha_scale = 1.0f / sample; - if (type == DENOISING_PASS_PREFILTERED_COLOR || type == DENOISING_PASS_CLEAN || - type == DENOISING_PASS_PREFILTERED_INTENSITY) { - scale *= exposure; - } - else if (type == DENOISING_PASS_PREFILTERED_VARIANCE) { - scale *= exposure * exposure * (sample - 1); - } + update_offset_stride(); + reset_pass_offset(); + + pass_stride = 0; + for (const BufferPass &pass : passes) { + if (pass.offset != PASS_UNUSED) { + const int index = pass_to_index(pass); + if (pass_offset_[index] == PASS_UNUSED) { + pass_offset_[index] = pass_stride; + } - int offset; - if (type == DENOISING_PASS_CLEAN) { - /* The clean pass isn't changed by prefiltering, so we use the original one there. */ - offset = type + params.get_denoising_offset(); - scale /= sample; - } - else if (params.denoising_prefiltered_pass) { - offset = type + params.get_denoising_prefiltered_offset(); - } - else { - switch (type) { - case DENOISING_PASS_PREFILTERED_DEPTH: - offset = params.get_denoising_offset() + DENOISING_PASS_DEPTH; - break; - case DENOISING_PASS_PREFILTERED_NORMAL: - offset = params.get_denoising_offset() + DENOISING_PASS_NORMAL; - break; - case DENOISING_PASS_PREFILTERED_ALBEDO: - offset = params.get_denoising_offset() + DENOISING_PASS_ALBEDO; - break; - case DENOISING_PASS_PREFILTERED_COLOR: - /* If we're not saving the prefiltering result, return the original noisy pass. */ - offset = params.get_denoising_offset() + DENOISING_PASS_COLOR; - break; - default: - return false; + pass_stride += pass.get_info().num_components; } - scale /= sample; } +} - int pass_stride = params.get_passes_size(); - int size = params.width * params.height; +void BufferParams::update_passes(const vector<Pass *> &scene_passes) +{ + passes.clear(); - float *in = buffer.data() + offset; + pass_stride = 0; + for (const Pass *scene_pass : scene_passes) { + BufferPass buffer_pass(scene_pass); - if (components == 1) { - for (int i = 0; i < size; i++, in += pass_stride, pixels++) { - pixels[0] = in[0] * scale; + if (scene_pass->is_written()) { + buffer_pass.offset = pass_stride; + pass_stride += scene_pass->get_info().num_components; } - } - else if (components == 3) { - for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) { - pixels[0] = in[0] * scale; - pixels[1] = in[1] * scale; - pixels[2] = in[2] * scale; - } - } - else if (components == 4) { - /* Since the alpha channel is not involved in denoising, output the Combined alpha channel. */ - assert(params.passes[0].type == PASS_COMBINED); - float *in_combined = buffer.data(); - - for (int i = 0; i < size; i++, in += pass_stride, in_combined += pass_stride, pixels += 4) { - float3 val = make_float3(in[0], in[1], in[2]); - if (type == DENOISING_PASS_PREFILTERED_COLOR && params.denoising_prefiltered_pass) { - /* Remove highlight compression from the image. */ - val = color_highlight_uncompress(val); - } - pixels[0] = val.x * scale; - pixels[1] = val.y * scale; - pixels[2] = val.z * scale; - pixels[3] = saturate(in_combined[3] * alpha_scale); + else { + buffer_pass.offset = PASS_UNUSED; } - } - else { - return false; + + passes.emplace_back(std::move(buffer_pass)); } - return true; + update_passes(); } -bool RenderBuffers::get_pass_rect( - const string &name, float exposure, int sample, int components, float *pixels) +void BufferParams::reset_pass_offset() { - if (buffer.data() == NULL) { - return false; + for (int i = 0; i < kNumPassOffsets; ++i) { + pass_offset_[i] = PASS_UNUSED; } +} - float *sample_count = NULL; - if (name == "Combined") { - int sample_offset = 0; - for (size_t j = 0; j < params.passes.size(); j++) { - Pass &pass = params.passes[j]; - if (pass.type != PASS_SAMPLE_COUNT) { - sample_offset += pass.components; - continue; - } - else { - sample_count = buffer.data() + sample_offset; - break; - } - } +int BufferParams::get_pass_offset(PassType pass_type, PassMode mode) const +{ + if (pass_type == PASS_NONE || pass_type == PASS_UNUSED) { + return PASS_UNUSED; } - int pass_offset = 0; - - for (size_t j = 0; j < params.passes.size(); j++) { - Pass &pass = params.passes[j]; + const int index = pass_type_mode_to_index(pass_type, mode); + return pass_offset_[index]; +} - /* Pass is identified by both type and name, multiple of the same type - * may exist with a different name. */ - if (pass.name != name) { - pass_offset += pass.components; - continue; +const BufferPass *BufferParams::find_pass(string_view name) const +{ + for (const BufferPass &pass : passes) { + if (pass.name == name) { + return &pass; } + } - PassType type = pass.type; - - float *in = buffer.data() + pass_offset; - int pass_stride = params.get_passes_size(); - - float scale = (pass.filter) ? 1.0f / (float)sample : 1.0f; - float scale_exposure = (pass.exposure) ? scale * exposure : scale; - - int size = params.width * params.height; + return nullptr; +} - if (components == 1 && type == PASS_RENDER_TIME) { - /* Render time is not stored by kernel, but measured per tile. */ - float val = (float)(1000.0 * render_time / (params.width * params.height * sample)); - for (int i = 0; i < size; i++, pixels++) { - pixels[0] = val; - } - } - else if (components == 1) { - assert(pass.components == components); - - /* Scalar */ - if (type == PASS_DEPTH) { - for (int i = 0; i < size; i++, in += pass_stride, pixels++) { - float f = *in; - pixels[0] = (f == 0.0f) ? 1e10f : f * scale_exposure; - } - } - else if (type == PASS_MIST) { - for (int i = 0; i < size; i++, in += pass_stride, pixels++) { - float f = *in; - pixels[0] = saturate(f * scale_exposure); - } - } - else { - for (int i = 0; i < size; i++, in += pass_stride, pixels++) { - float f = *in; - pixels[0] = f * scale_exposure; - } - } - } - else if (components == 3) { - assert(pass.components == 4); - - /* RGBA */ - if (type == PASS_SHADOW) { - for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) { - float4 f = make_float4(in[0], in[1], in[2], in[3]); - float invw = (f.w > 0.0f) ? 1.0f / f.w : 1.0f; - - pixels[0] = f.x * invw; - pixels[1] = f.y * invw; - pixels[2] = f.z * invw; - } - } - else if (pass.divide_type != PASS_NONE) { - /* RGB lighting passes that need to divide out color */ - pass_offset = 0; - for (size_t k = 0; k < params.passes.size(); k++) { - Pass &color_pass = params.passes[k]; - if (color_pass.type == pass.divide_type) - break; - pass_offset += color_pass.components; - } - - float *in_divide = buffer.data() + pass_offset; - - for (int i = 0; i < size; i++, in += pass_stride, in_divide += pass_stride, pixels += 3) { - float3 f = make_float3(in[0], in[1], in[2]); - float3 f_divide = make_float3(in_divide[0], in_divide[1], in_divide[2]); - - f = safe_divide_even_color(f * exposure, f_divide); - - pixels[0] = f.x; - pixels[1] = f.y; - pixels[2] = f.z; - } - } - else { - /* RGB/vector */ - for (int i = 0; i < size; i++, in += pass_stride, pixels += 3) { - float3 f = make_float3(in[0], in[1], in[2]); - - pixels[0] = f.x * scale_exposure; - pixels[1] = f.y * scale_exposure; - pixels[2] = f.z * scale_exposure; - } - } - } - else if (components == 4) { - assert(pass.components == components); - - /* RGBA */ - if (type == PASS_SHADOW) { - for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) { - float4 f = make_float4(in[0], in[1], in[2], in[3]); - float invw = (f.w > 0.0f) ? 1.0f / f.w : 1.0f; - - pixels[0] = f.x * invw; - pixels[1] = f.y * invw; - pixels[2] = f.z * invw; - pixels[3] = 1.0f; - } - } - else if (type == PASS_MOTION) { - /* need to normalize by number of samples accumulated for motion */ - pass_offset = 0; - for (size_t k = 0; k < params.passes.size(); k++) { - Pass &color_pass = params.passes[k]; - if (color_pass.type == PASS_MOTION_WEIGHT) - break; - pass_offset += color_pass.components; - } - - float *in_weight = buffer.data() + pass_offset; - - for (int i = 0; i < size; i++, in += pass_stride, in_weight += pass_stride, pixels += 4) { - float4 f = make_float4(in[0], in[1], in[2], in[3]); - float w = in_weight[0]; - float invw = (w > 0.0f) ? 1.0f / w : 0.0f; - - pixels[0] = f.x * invw; - pixels[1] = f.y * invw; - pixels[2] = f.z * invw; - pixels[3] = f.w * invw; - } - } - else if (type == PASS_CRYPTOMATTE) { - for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) { - float4 f = make_float4(in[0], in[1], in[2], in[3]); - /* x and z contain integer IDs, don't rescale them. - y and w contain matte weights, they get scaled. */ - pixels[0] = f.x; - pixels[1] = f.y * scale; - pixels[2] = f.z; - pixels[3] = f.w * scale; - } - } - else { - for (int i = 0; i < size; i++, in += pass_stride, pixels += 4) { - if (sample_count && sample_count[i * pass_stride] < 0.0f) { - scale = (pass.filter) ? -1.0f / (sample_count[i * pass_stride]) : 1.0f; - scale_exposure = (pass.exposure) ? scale * exposure : scale; - } - - float4 f = make_float4(in[0], in[1], in[2], in[3]); - - pixels[0] = f.x * scale_exposure; - pixels[1] = f.y * scale_exposure; - pixels[2] = f.z * scale_exposure; - - /* Clamp since alpha might be > 1.0 due to Russian roulette. */ - pixels[3] = saturate(f.w * scale); - } - } +const BufferPass *BufferParams::find_pass(PassType type, PassMode mode) const +{ + for (const BufferPass &pass : passes) { + if (pass.type == type && pass.mode == mode) { + return &pass; } - - return true; } - return false; + return nullptr; } -bool RenderBuffers::set_pass_rect(PassType type, int components, float *pixels, int samples) +const BufferPass *BufferParams::get_actual_display_pass(PassType type, PassMode mode) const { - if (buffer.data() == NULL) { - return false; - } - - int pass_offset = 0; + const BufferPass *pass = find_pass(type, mode); + return get_actual_display_pass(pass); +} - for (size_t j = 0; j < params.passes.size(); j++) { - Pass &pass = params.passes[j]; +const BufferPass *BufferParams::get_actual_display_pass(const BufferPass *pass) const +{ + if (!pass) { + return nullptr; + } - if (pass.type != type) { - pass_offset += pass.components; - continue; + if (pass->type == PASS_COMBINED) { + const BufferPass *shadow_catcher_matte_pass = find_pass(PASS_SHADOW_CATCHER_MATTE, pass->mode); + if (shadow_catcher_matte_pass) { + pass = shadow_catcher_matte_pass; } + } - float *out = buffer.data() + pass_offset; - int pass_stride = params.get_passes_size(); - int size = params.width * params.height; - - assert(pass.components == components); + return pass; +} - for (int i = 0; i < size; i++, out += pass_stride, pixels += components) { - if (pass.filter) { - /* Scale by the number of samples, inverse of what we do in get_pass_rect. - * A better solution would be to remove the need for set_pass_rect entirely, - * and change baking to bake multiple objects in a tile at once. */ - for (int j = 0; j < components; j++) { - out[j] = pixels[j] * samples; - } - } - else { - /* For non-filtered passes just straight copy, these may contain non-float data. */ - memcpy(out, pixels, sizeof(float) * components); - } - } +void BufferParams::update_offset_stride() +{ + offset = -(full_x + full_y * width); + stride = width; +} +bool BufferParams::modified(const BufferParams &other) const +{ + if (!(width == other.width && height == other.height && full_x == other.full_x && + full_y == other.full_y && full_width == other.full_width && + full_height == other.full_height && offset == other.offset && stride == other.stride && + pass_stride == other.pass_stride && layer == other.layer && view == other.view && + exposure == other.exposure && + use_approximate_shadow_catcher == other.use_approximate_shadow_catcher && + use_transparent_background == other.use_transparent_background)) { return true; } - return false; + return !(passes == other.passes); } -/* Display Buffer */ +/* -------------------------------------------------------------------- + * Render Buffers. + */ -DisplayBuffer::DisplayBuffer(Device *device, bool linear) - : draw_width(0), - draw_height(0), - transparent(true), /* todo: determine from background */ - half_float(linear), - rgba_byte(device, "display buffer byte"), - rgba_half(device, "display buffer half") +RenderBuffers::RenderBuffers(Device *device) : buffer(device, "RenderBuffers", MEM_READ_WRITE) { } -DisplayBuffer::~DisplayBuffer() +RenderBuffers::~RenderBuffers() { - rgba_byte.free(); - rgba_half.free(); + buffer.free(); } -void DisplayBuffer::reset(BufferParams ¶ms_) +void RenderBuffers::reset(const BufferParams ¶ms_) { - draw_width = 0; - draw_height = 0; + DCHECK(params_.pass_stride != -1); params = params_; - /* allocate display pixels */ - if (half_float) { - rgba_half.alloc_to_device(params.width, params.height); - } - else { - rgba_byte.alloc_to_device(params.width, params.height); - } + /* re-allocate buffer */ + buffer.alloc(params.width * params.pass_stride, params.height); } -void DisplayBuffer::draw_set(int width, int height) +void RenderBuffers::zero() { - assert(width <= params.width && height <= params.height); + buffer.zero_to_device(); +} - draw_width = width; - draw_height = height; +bool RenderBuffers::copy_from_device() +{ + DCHECK(params.pass_stride != -1); + + if (!buffer.device_pointer) + return false; + + buffer.copy_from_device(0, params.width * params.pass_stride, params.height); + + return true; } -void DisplayBuffer::draw(Device *device, const DeviceDrawParams &draw_params) +void RenderBuffers::copy_to_device() { - if (draw_width != 0 && draw_height != 0) { - device_memory &rgba = (half_float) ? (device_memory &)rgba_half : (device_memory &)rgba_byte; - - device->draw_pixels(rgba, - 0, - draw_width, - draw_height, - params.width, - params.height, - params.full_x, - params.full_y, - params.full_width, - params.full_height, - transparent, - draw_params); - } + buffer.copy_to_device(); } -bool DisplayBuffer::draw_ready() +void render_buffers_host_copy_denoised(RenderBuffers *dst, + const BufferParams &dst_params, + const RenderBuffers *src, + const BufferParams &src_params, + const size_t src_offset) { - return (draw_width != 0 && draw_height != 0); + DCHECK_EQ(dst_params.width, src_params.width); + /* TODO(sergey): More sanity checks to avoid buffer overrun. */ + + /* Create a map of pass offsets to be copied. + * Assume offsets are different to allow copying passes between buffers with different set of + * passes. */ + + struct { + int dst_offset; + int src_offset; + } pass_offsets[PASS_NUM]; + + int num_passes = 0; + + for (int i = 0; i < PASS_NUM; ++i) { + const PassType pass_type = static_cast<PassType>(i); + + const int dst_pass_offset = dst_params.get_pass_offset(pass_type, PassMode::DENOISED); + if (dst_pass_offset == PASS_UNUSED) { + continue; + } + + const int src_pass_offset = src_params.get_pass_offset(pass_type, PassMode::DENOISED); + if (src_pass_offset == PASS_UNUSED) { + continue; + } + + pass_offsets[num_passes].dst_offset = dst_pass_offset; + pass_offsets[num_passes].src_offset = src_pass_offset; + ++num_passes; + } + + /* Copy passes. */ + /* TODO(sergey): Make it more reusable, allowing implement copy of noisy passes. */ + + const int64_t dst_width = dst_params.width; + const int64_t dst_height = dst_params.height; + const int64_t dst_pass_stride = dst_params.pass_stride; + const int64_t dst_num_pixels = dst_width * dst_height; + + const int64_t src_pass_stride = src_params.pass_stride; + const int64_t src_offset_in_floats = src_offset * src_pass_stride; + + const float *src_pixel = src->buffer.data() + src_offset_in_floats; + float *dst_pixel = dst->buffer.data(); + + for (int i = 0; i < dst_num_pixels; + ++i, src_pixel += src_pass_stride, dst_pixel += dst_pass_stride) { + for (int pass_offset_idx = 0; pass_offset_idx < num_passes; ++pass_offset_idx) { + const int dst_pass_offset = pass_offsets[pass_offset_idx].dst_offset; + const int src_pass_offset = pass_offsets[pass_offset_idx].src_offset; + + /* TODO(sergey): Support non-RGBA passes. */ + dst_pixel[dst_pass_offset + 0] = src_pixel[src_pass_offset + 0]; + dst_pixel[dst_pass_offset + 1] = src_pixel[src_pass_offset + 1]; + dst_pixel[dst_pass_offset + 2] = src_pixel[src_pass_offset + 2]; + dst_pixel[dst_pass_offset + 3] = src_pixel[src_pass_offset + 3]; + } + } } CCL_NAMESPACE_END diff --git a/intern/cycles/render/buffers.h b/intern/cycles/render/buffers.h index 4ffc628bb52..184ac7197af 100644 --- a/intern/cycles/render/buffers.h +++ b/intern/cycles/render/buffers.h @@ -18,8 +18,8 @@ #define __BUFFERS_H__ #include "device/device_memory.h" - -#include "render/film.h" +#include "graph/node.h" +#include "render/pass.h" #include "kernel/kernel_types.h" @@ -34,170 +34,157 @@ class Device; struct DeviceDrawParams; struct float4; +/* NOTE: Is not a real scene node. Using Node API for ease of (de)serialization. */ +class BufferPass : public Node { + public: + NODE_DECLARE + + PassType type = PASS_NONE; + PassMode mode = PassMode::NOISY; + ustring name; + bool include_albedo = false; + + int offset = -1; + + BufferPass(); + explicit BufferPass(const Pass *scene_pass); + + BufferPass(BufferPass &&other) noexcept = default; + BufferPass(const BufferPass &other) = default; + + BufferPass &operator=(BufferPass &&other) = default; + BufferPass &operator=(const BufferPass &other) = default; + + ~BufferPass() = default; + + PassInfo get_info() const; + + inline bool operator==(const BufferPass &other) const + { + return type == other.type && mode == other.mode && name == other.name && + include_albedo == other.include_albedo && offset == other.offset; + } + inline bool operator!=(const BufferPass &other) const + { + return !(*this == other); + } +}; + /* Buffer Parameters * Size of render buffer and how it fits in the full image (border render). */ -class BufferParams { +/* NOTE: Is not a real scene node. Using Node API for ease of (de)serialization. */ +class BufferParams : public Node { public: - /* width/height of the physical buffer */ - int width; - int height; - - /* offset into and width/height of the full buffer */ - int full_x; - int full_y; - int full_width; - int full_height; - - /* passes */ - vector<Pass> passes; - bool denoising_data_pass; - /* If only some light path types should be target, an additional pass is needed. */ - bool denoising_clean_pass; - /* When we're prefiltering the passes during rendering, we need to keep both the - * original and the prefiltered data around because neighboring tiles might still - * need the original data. */ - bool denoising_prefiltered_pass; - - /* functions */ - BufferParams(); + NODE_DECLARE - void get_offset_stride(int &offset, int &stride); - bool modified(const BufferParams ¶ms); - int get_passes_size(); - int get_denoising_offset(); - int get_denoising_prefiltered_offset(); -}; + /* Width/height of the physical buffer. */ + int width = 0; + int height = 0; -/* Render Buffers */ + /* Offset into and width/height of the full buffer. */ + int full_x = 0; + int full_y = 0; + int full_width = 0; + int full_height = 0; -class RenderBuffers { - public: - /* buffer parameters */ - BufferParams params; + /* Runtime fields, only valid after `update_passes()` or `update_offset_stride()`. */ + int offset = -1, stride = -1; - /* float buffer */ - device_vector<float> buffer; - bool map_neighbor_copied; - double render_time; + /* Runtime fields, only valid after `update_passes()`. */ + int pass_stride = -1; - explicit RenderBuffers(Device *device); - ~RenderBuffers(); + /* Properties which are used for accessing buffer pixels outside of scene graph. */ + vector<BufferPass> passes; + ustring layer; + ustring view; + int samples = 0; + float exposure = 1.0f; + bool use_approximate_shadow_catcher = false; + bool use_transparent_background = false; - void reset(BufferParams ¶ms); - void zero(); + BufferParams(); - bool copy_from_device(); - bool get_pass_rect( - const string &name, float exposure, int sample, int components, float *pixels); - bool get_denoising_pass_rect( - int offset, float exposure, int sample, int components, float *pixels); - bool set_pass_rect(PassType type, int components, float *pixels, int samples); -}; + BufferParams(BufferParams &&other) noexcept = default; + BufferParams(const BufferParams &other) = default; -/* Display Buffer - * - * The buffer used for drawing during render, filled by converting the render - * buffers to byte of half float storage */ + BufferParams &operator=(BufferParams &&other) = default; + BufferParams &operator=(const BufferParams &other) = default; -class DisplayBuffer { - public: - /* buffer parameters */ - BufferParams params; - /* dimensions for how much of the buffer is actually ready for display. - * with progressive render we can be using only a subset of the buffer. - * if these are zero, it means nothing can be drawn yet */ - int draw_width, draw_height; - /* draw alpha channel? */ - bool transparent; - /* use half float? */ - bool half_float; - /* byte buffer for converted result */ - device_pixels<uchar4> rgba_byte; - device_pixels<half4> rgba_half; - - DisplayBuffer(Device *device, bool linear = false); - ~DisplayBuffer(); - - void reset(BufferParams ¶ms); - - void draw_set(int width, int height); - void draw(Device *device, const DeviceDrawParams &draw_params); - bool draw_ready(); -}; + ~BufferParams() = default; -/* Render Tile - * Rendering task on a buffer */ + /* Pre-calculate all fields which depends on the passes. + * + * When the scene passes are given, the buffer passes will be created from them and stored in + * this params, and then params are updated for those passes. + * The `update_passes()` without parameters updates offsets and strides which are stored outside + * of the passes. */ + void update_passes(); + void update_passes(const vector<Pass *> &scene_passes); -class RenderTile { - public: - typedef enum { PATH_TRACE = (1 << 0), BAKE = (1 << 1), DENOISE = (1 << 2) } Task; + /* Returns PASS_UNUSED if there is no such pass in the buffer. */ + int get_pass_offset(PassType type, PassMode mode = PassMode::NOISY) const; - Task task; - int x, y, w, h; - int start_sample; - int num_samples; - int sample; - int resolution; - int offset; - int stride; - int tile_index; + /* Returns nullptr if pass with given name does not exist. */ + const BufferPass *find_pass(string_view name) const; + const BufferPass *find_pass(PassType type, PassMode mode = PassMode::NOISY) const; - device_ptr buffer; - int device_size; + /* Get display pass from its name. + * Will do special logic to replace combined pass with shadow catcher matte. */ + const BufferPass *get_actual_display_pass(PassType type, PassMode mode = PassMode::NOISY) const; + const BufferPass *get_actual_display_pass(const BufferPass *pass) const; - typedef enum { NO_STEALING = 0, CAN_BE_STOLEN = 1, WAS_STOLEN = 2 } StealingState; - StealingState stealing_state; + void update_offset_stride(); - RenderBuffers *buffers; + bool modified(const BufferParams &other) const; - RenderTile(); + protected: + void reset_pass_offset(); - int4 bounds() const - { - return make_int4(x, /* xmin */ - y, /* ymin */ - x + w, /* xmax */ - y + h); /* ymax */ - } + /* Multiplied by 2 to be able to store noisy and denoised pass types. */ + static constexpr int kNumPassOffsets = PASS_NUM * 2; + + /* Indexed by an index derived from pass type and mode, indicates offset of the corresponding + * pass in the buffer. + * If there are multiple passes with same type and mode contains lowest offset of all of them. */ + int pass_offset_[kNumPassOffsets]; }; -/* Render Tile Neighbors - * Set of neighboring tiles used for denoising. Tile order: - * 0 1 2 - * 3 4 5 - * 6 7 8 */ +/* Render Buffers */ -class RenderTileNeighbors { +class RenderBuffers { public: - static const int SIZE = 9; - static const int CENTER = 4; + /* buffer parameters */ + BufferParams params; - RenderTile tiles[SIZE]; - RenderTile target; + /* float buffer */ + device_vector<float> buffer; - RenderTileNeighbors(const RenderTile ¢er) - { - tiles[CENTER] = center; - } + explicit RenderBuffers(Device *device); + ~RenderBuffers(); - int4 bounds() const - { - return make_int4(tiles[3].x, /* xmin */ - tiles[1].y, /* ymin */ - tiles[5].x + tiles[5].w, /* xmax */ - tiles[7].y + tiles[7].h); /* ymax */ - } + void reset(const BufferParams ¶ms); + void zero(); - void set_bounds_from_center() - { - tiles[3].x = tiles[CENTER].x; - tiles[1].y = tiles[CENTER].y; - tiles[5].x = tiles[CENTER].x + tiles[CENTER].w; - tiles[7].y = tiles[CENTER].y + tiles[CENTER].h; - } + bool copy_from_device(); + void copy_to_device(); }; +/* Copy denoised passes form source to destination. + * + * Buffer parameters are provided explicitly, allowing to copy pixels between render buffers which + * content corresponds to a render result at a non-unit resolution divider. + * + * `src_offset` allows to offset source pixel index which is used when a fraction of the source + * buffer is to be copied. + * + * Copy happens of the number of pixels in the destination. */ +void render_buffers_host_copy_denoised(RenderBuffers *dst, + const BufferParams &dst_params, + const RenderBuffers *src, + const BufferParams &src_params, + const size_t src_offset = 0); + CCL_NAMESPACE_END #endif /* __BUFFERS_H__ */ diff --git a/intern/cycles/render/camera.cpp b/intern/cycles/render/camera.cpp index 327f166f9d8..8b69c971991 100644 --- a/intern/cycles/render/camera.cpp +++ b/intern/cycles/render/camera.cpp @@ -33,9 +33,9 @@ /* needed for calculating differentials */ // clang-format off -#include "kernel/kernel_compat_cpu.h" -#include "kernel/split/kernel_split_data.h" -#include "kernel/kernel_globals.h" +#include "kernel/device/cpu/compat.h" +#include "kernel/device/cpu/globals.h" + #include "kernel/kernel_projection.h" #include "kernel/kernel_differential.h" #include "kernel/kernel_montecarlo.h" @@ -169,7 +169,6 @@ Camera::Camera() : Node(get_node_type()) width = 1024; height = 512; - resolution = 1; use_perspective_motion = false; @@ -455,7 +454,6 @@ void Camera::update(Scene *scene) /* render size */ kcam->width = width; kcam->height = height; - kcam->resolution = resolution; /* store differentials */ kcam->dx = float3_to_float4(dx); @@ -776,9 +774,11 @@ float Camera::world_to_raster_size(float3 P) &ray); #endif - differential_transfer(&ray.dP, ray.dP, ray.D, ray.dD, ray.D, dist); + /* TODO: would it help to use more accurate differentials here? */ + differential3 dP; + differential_transfer_compact(&dP, ray.dP, ray.D, ray.dD, ray.D, dist); - return max(len(ray.dP.dx), len(ray.dP.dy)); + return max(len(dP.dx), len(dP.dy)); } return res; @@ -789,12 +789,11 @@ bool Camera::use_motion() const return motion.size() > 1; } -void Camera::set_screen_size_and_resolution(int width_, int height_, int resolution_) +void Camera::set_screen_size(int width_, int height_) { - if (width_ != width || height_ != height || resolution_ != resolution) { + if (width_ != width || height_ != height) { width = width_; height = height_; - resolution = resolution_; tag_modified(); } } diff --git a/intern/cycles/render/camera.h b/intern/cycles/render/camera.h index 5abb4750764..cb8ecac1a7e 100644 --- a/intern/cycles/render/camera.h +++ b/intern/cycles/render/camera.h @@ -199,7 +199,6 @@ class Camera : public Node { private: int width; int height; - int resolution; public: /* functions */ @@ -225,7 +224,7 @@ class Camera : public Node { int motion_step(float time) const; bool use_motion() const; - void set_screen_size_and_resolution(int width_, int height_, int resolution_); + void set_screen_size(int width_, int height_); private: /* Private utility functions. */ diff --git a/intern/cycles/render/coverage.cpp b/intern/cycles/render/coverage.cpp deleted file mode 100644 index 99d4daa6961..00000000000 --- a/intern/cycles/render/coverage.cpp +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright 2018 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "render/coverage.h" -#include "render/buffers.h" - -#include "kernel/kernel_compat_cpu.h" -#include "kernel/kernel_types.h" -#include "kernel/split/kernel_split_data.h" - -#include "kernel/kernel_globals.h" -#include "kernel/kernel_id_passes.h" - -#include "util/util_map.h" - -CCL_NAMESPACE_BEGIN - -static bool crypomatte_comp(const pair<float, float> &i, const pair<float, float> j) -{ - return i.first > j.first; -} - -void Coverage::finalize() -{ - int pass_offset = 0; - if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { - finalize_buffer(coverage_object, pass_offset); - pass_offset += kernel_data.film.cryptomatte_depth * 4; - } - if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { - finalize_buffer(coverage_material, pass_offset); - pass_offset += kernel_data.film.cryptomatte_depth * 4; - } - if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { - finalize_buffer(coverage_asset, pass_offset); - } -} - -void Coverage::init_path_trace() -{ - kg->coverage_object = kg->coverage_material = kg->coverage_asset = NULL; - - if (kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE) { - if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { - coverage_object.clear(); - coverage_object.resize(tile.w * tile.h); - } - if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { - coverage_material.clear(); - coverage_material.resize(tile.w * tile.h); - } - if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { - coverage_asset.clear(); - coverage_asset.resize(tile.w * tile.h); - } - } -} - -void Coverage::init_pixel(int x, int y) -{ - if (kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE) { - const int pixel_index = tile.w * (y - tile.y) + x - tile.x; - if (kernel_data.film.cryptomatte_passes & CRYPT_OBJECT) { - kg->coverage_object = &coverage_object[pixel_index]; - } - if (kernel_data.film.cryptomatte_passes & CRYPT_MATERIAL) { - kg->coverage_material = &coverage_material[pixel_index]; - } - if (kernel_data.film.cryptomatte_passes & CRYPT_ASSET) { - kg->coverage_asset = &coverage_asset[pixel_index]; - } - } -} - -void Coverage::finalize_buffer(vector<CoverageMap> &coverage, const int pass_offset) -{ - if (kernel_data.film.cryptomatte_passes & CRYPT_ACCURATE) { - flatten_buffer(coverage, pass_offset); - } - else { - sort_buffer(pass_offset); - } -} - -void Coverage::flatten_buffer(vector<CoverageMap> &coverage, const int pass_offset) -{ - /* Sort the coverage map and write it to the output */ - int pixel_index = 0; - int pass_stride = tile.buffers->params.get_passes_size(); - for (int y = 0; y < tile.h; ++y) { - for (int x = 0; x < tile.w; ++x) { - const CoverageMap &pixel = coverage[pixel_index]; - if (!pixel.empty()) { - /* buffer offset */ - int index = x + y * tile.stride; - float *buffer = (float *)tile.buffer + index * pass_stride; - - /* sort the cryptomatte pixel */ - vector<pair<float, float>> sorted_pixel; - for (CoverageMap::const_iterator it = pixel.begin(); it != pixel.end(); ++it) { - sorted_pixel.push_back(std::make_pair(it->second, it->first)); - } - sort(sorted_pixel.begin(), sorted_pixel.end(), crypomatte_comp); - int num_slots = 2 * (kernel_data.film.cryptomatte_depth); - if (sorted_pixel.size() > num_slots) { - float leftover = 0.0f; - for (vector<pair<float, float>>::iterator it = sorted_pixel.begin() + num_slots; - it != sorted_pixel.end(); - ++it) { - leftover += it->first; - } - sorted_pixel[num_slots - 1].first += leftover; - } - int limit = min(num_slots, sorted_pixel.size()); - for (int i = 0; i < limit; ++i) { - kernel_write_id_slots(buffer + kernel_data.film.pass_cryptomatte + pass_offset, - 2 * (kernel_data.film.cryptomatte_depth), - sorted_pixel[i].second, - sorted_pixel[i].first); - } - } - ++pixel_index; - } - } -} - -void Coverage::sort_buffer(const int pass_offset) -{ - /* Sort the coverage map and write it to the output */ - int pass_stride = tile.buffers->params.get_passes_size(); - for (int y = 0; y < tile.h; ++y) { - for (int x = 0; x < tile.w; ++x) { - /* buffer offset */ - int index = x + y * tile.stride; - float *buffer = (float *)tile.buffer + index * pass_stride; - kernel_sort_id_slots(buffer + kernel_data.film.pass_cryptomatte + pass_offset, - 2 * (kernel_data.film.cryptomatte_depth)); - } - } -} - -CCL_NAMESPACE_END diff --git a/intern/cycles/render/coverage.h b/intern/cycles/render/coverage.h deleted file mode 100644 index 12182c614da..00000000000 --- a/intern/cycles/render/coverage.h +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright 2018 Blender Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __COVERAGE_H__ -#define __COVERAGE_H__ - -#include "util/util_map.h" -#include "util/util_vector.h" - -CCL_NAMESPACE_BEGIN - -struct KernelGlobals; -class RenderTile; - -typedef unordered_map<float, float> CoverageMap; - -class Coverage { - public: - Coverage(KernelGlobals *kg_, RenderTile &tile_) : kg(kg_), tile(tile_) - { - } - void init_path_trace(); - void init_pixel(int x, int y); - void finalize(); - - private: - vector<CoverageMap> coverage_object; - vector<CoverageMap> coverage_material; - vector<CoverageMap> coverage_asset; - KernelGlobals *kg; - RenderTile &tile; - void finalize_buffer(vector<CoverageMap> &coverage, const int pass_offset); - void flatten_buffer(vector<CoverageMap> &coverage, const int pass_offset); - void sort_buffer(const int pass_offset); -}; - -CCL_NAMESPACE_END - -#endif /* __COVERAGE_H__ */ diff --git a/intern/cycles/render/denoising.cpp b/intern/cycles/render/denoising.cpp index ddbe7484800..bcf8d3fa204 100644 --- a/intern/cycles/render/denoising.cpp +++ b/intern/cycles/render/denoising.cpp @@ -16,15 +16,17 @@ #include "render/denoising.h" -#include "kernel/filter/filter_defines.h" +#if 0 -#include "util/util_foreach.h" -#include "util/util_map.h" -#include "util/util_system.h" -#include "util/util_task.h" -#include "util/util_time.h" +# include "kernel/filter/filter_defines.h" -#include <OpenImageIO/filesystem.h> +# include "util/util_foreach.h" +# include "util/util_map.h" +# include "util/util_system.h" +# include "util/util_task.h" +# include "util/util_time.h" + +# include <OpenImageIO/filesystem.h> CCL_NAMESPACE_BEGIN @@ -225,7 +227,7 @@ bool DenoiseImageLayer::match_channels(int neighbor, /* Denoise Task */ DenoiseTask::DenoiseTask(Device *device, - Denoiser *denoiser, + DenoiserPipeline *denoiser, int frame, const vector<int> &neighbor_frames) : denoiser(denoiser), @@ -386,7 +388,6 @@ void DenoiseTask::create_task(DeviceTask &task) task.denoising = denoiser->params; task.denoising.type = DENOISER_NLM; task.denoising.use = true; - task.denoising.store_passes = false; task.denoising_from_render = false; task.denoising_frames.resize(neighbor_frames.size()); @@ -863,7 +864,7 @@ bool DenoiseImage::save_output(const string &out_filepath, string &error) /* File pattern handling and outer loop over frames */ -Denoiser::Denoiser(DeviceInfo &device_info) +DenoiserPipeline::DenoiserPipeline(DeviceInfo &device_info) { samples_override = 0; tile_size = make_int2(64, 64); @@ -876,18 +877,16 @@ Denoiser::Denoiser(DeviceInfo &device_info) /* Initialize device. */ device = Device::create(device_info, stats, profiler, true); - DeviceRequestedFeatures req; - req.use_denoising = true; - device->load_kernels(req); + device->load_kernels(KERNEL_FEATURE_DENOISING); } -Denoiser::~Denoiser() +DenoiserPipeline::~DenoiserPipeline() { delete device; TaskScheduler::exit(); } -bool Denoiser::run() +bool DenoiserPipeline::run() { assert(input.size() == output.size()); @@ -931,3 +930,5 @@ bool Denoiser::run() } CCL_NAMESPACE_END + +#endif diff --git a/intern/cycles/render/denoising.h b/intern/cycles/render/denoising.h index c1b4d0a5596..097cc570d06 100644 --- a/intern/cycles/render/denoising.h +++ b/intern/cycles/render/denoising.h @@ -17,27 +17,31 @@ #ifndef __DENOISING_H__ #define __DENOISING_H__ -#include "device/device.h" -#include "device/device_denoising.h" +#if 0 -#include "render/buffers.h" +/* TODO(sergey): Make it explicit and clear when something is a denoiser, its pipeline or + * parameters. Currently it is an annoying mixture of terms used interchangeably. */ -#include "util/util_string.h" -#include "util/util_unique_ptr.h" -#include "util/util_vector.h" +# include "device/device.h" -#include <OpenImageIO/imageio.h> +# include "render/buffers.h" + +# include "util/util_string.h" +# include "util/util_unique_ptr.h" +# include "util/util_vector.h" + +# include <OpenImageIO/imageio.h> OIIO_NAMESPACE_USING CCL_NAMESPACE_BEGIN -/* Denoiser */ +/* Denoiser pipeline */ -class Denoiser { +class DenoiserPipeline { public: - Denoiser(DeviceInfo &device_info); - ~Denoiser(); + DenoiserPipeline(DeviceInfo &device_info); + ~DenoiserPipeline(); bool run(); @@ -155,7 +159,10 @@ class DenoiseImage { class DenoiseTask { public: - DenoiseTask(Device *device, Denoiser *denoiser, int frame, const vector<int> &neighbor_frames); + DenoiseTask(Device *device, + DenoiserPipeline *denoiser, + int frame, + const vector<int> &neighbor_frames); ~DenoiseTask(); /* Task stages */ @@ -168,7 +175,7 @@ class DenoiseTask { protected: /* Denoiser parameters and device */ - Denoiser *denoiser; + DenoiserPipeline *denoiser; Device *device; /* Frame number to be denoised */ @@ -204,4 +211,6 @@ class DenoiseTask { CCL_NAMESPACE_END +#endif + #endif /* __DENOISING_H__ */ diff --git a/intern/cycles/render/film.cpp b/intern/cycles/render/film.cpp index 5df396394c4..8e14b338bd3 100644 --- a/intern/cycles/render/film.cpp +++ b/intern/cycles/render/film.cpp @@ -16,9 +16,12 @@ #include "render/film.h" #include "device/device.h" +#include "render/background.h" +#include "render/bake.h" #include "render/camera.h" #include "render/integrator.h" #include "render/mesh.h" +#include "render/object.h" #include "render/scene.h" #include "render/stats.h" #include "render/tables.h" @@ -31,261 +34,6 @@ CCL_NAMESPACE_BEGIN -/* Pass */ - -static bool compare_pass_order(const Pass &a, const Pass &b) -{ - if (a.components == b.components) - return (a.type < b.type); - return (a.components > b.components); -} - -static NodeEnum *get_pass_type_enum() -{ - static NodeEnum pass_type_enum; - pass_type_enum.insert("combined", PASS_COMBINED); - pass_type_enum.insert("depth", PASS_DEPTH); - pass_type_enum.insert("normal", PASS_NORMAL); - pass_type_enum.insert("uv", PASS_UV); - pass_type_enum.insert("object_id", PASS_OBJECT_ID); - pass_type_enum.insert("material_id", PASS_MATERIAL_ID); - pass_type_enum.insert("motion", PASS_MOTION); - pass_type_enum.insert("motion_weight", PASS_MOTION_WEIGHT); - pass_type_enum.insert("render_time", PASS_RENDER_TIME); - pass_type_enum.insert("cryptomatte", PASS_CRYPTOMATTE); - pass_type_enum.insert("aov_color", PASS_AOV_COLOR); - pass_type_enum.insert("aov_value", PASS_AOV_VALUE); - pass_type_enum.insert("adaptive_aux_buffer", PASS_ADAPTIVE_AUX_BUFFER); - pass_type_enum.insert("sample_count", PASS_SAMPLE_COUNT); - pass_type_enum.insert("mist", PASS_MIST); - pass_type_enum.insert("emission", PASS_EMISSION); - pass_type_enum.insert("background", PASS_BACKGROUND); - pass_type_enum.insert("ambient_occlusion", PASS_AO); - pass_type_enum.insert("shadow", PASS_SHADOW); - pass_type_enum.insert("diffuse_direct", PASS_DIFFUSE_DIRECT); - pass_type_enum.insert("diffuse_indirect", PASS_DIFFUSE_INDIRECT); - pass_type_enum.insert("diffuse_color", PASS_DIFFUSE_COLOR); - pass_type_enum.insert("glossy_direct", PASS_GLOSSY_DIRECT); - pass_type_enum.insert("glossy_indirect", PASS_GLOSSY_INDIRECT); - pass_type_enum.insert("glossy_color", PASS_GLOSSY_COLOR); - pass_type_enum.insert("transmission_direct", PASS_TRANSMISSION_DIRECT); - pass_type_enum.insert("transmission_indirect", PASS_TRANSMISSION_INDIRECT); - pass_type_enum.insert("transmission_color", PASS_TRANSMISSION_COLOR); - pass_type_enum.insert("volume_direct", PASS_VOLUME_DIRECT); - pass_type_enum.insert("volume_indirect", PASS_VOLUME_INDIRECT); - pass_type_enum.insert("bake_primitive", PASS_BAKE_PRIMITIVE); - pass_type_enum.insert("bake_differential", PASS_BAKE_DIFFERENTIAL); - - return &pass_type_enum; -} - -NODE_DEFINE(Pass) -{ - NodeType *type = NodeType::add("pass", create); - - NodeEnum *pass_type_enum = get_pass_type_enum(); - SOCKET_ENUM(type, "Type", *pass_type_enum, PASS_COMBINED); - SOCKET_STRING(name, "Name", ustring()); - - return type; -} - -Pass::Pass() : Node(get_node_type()) -{ -} - -void Pass::add(PassType type, vector<Pass> &passes, const char *name) -{ - for (size_t i = 0; i < passes.size(); i++) { - if (passes[i].type != type) { - continue; - } - - /* An empty name is used as a placeholder to signal that any pass of - * that type is fine (because the content always is the same). - * This is important to support divide_type: If the pass that has a - * divide_type is added first, a pass for divide_type with an empty - * name will be added. Then, if a matching pass with a name is later - * requested, the existing placeholder will be renamed to that. - * If the divide_type is explicitly allocated with a name first and - * then again as part of another pass, the second one will just be - * skipped because that type already exists. */ - - /* If no name is specified, any pass of the correct type will match. */ - if (name == NULL) { - return; - } - - /* If we already have a placeholder pass, rename that one. */ - if (passes[i].name.empty()) { - passes[i].name = name; - return; - } - - /* If neither existing nor requested pass have placeholder name, they - * must match. */ - if (name == passes[i].name) { - return; - } - } - - Pass pass; - - pass.type = type; - pass.filter = true; - pass.exposure = false; - pass.divide_type = PASS_NONE; - if (name) { - pass.name = name; - } - - switch (type) { - case PASS_NONE: - pass.components = 0; - break; - case PASS_COMBINED: - pass.components = 4; - pass.exposure = true; - break; - case PASS_DEPTH: - pass.components = 1; - pass.filter = false; - break; - case PASS_MIST: - pass.components = 1; - break; - case PASS_NORMAL: - pass.components = 4; - break; - case PASS_UV: - pass.components = 4; - break; - case PASS_MOTION: - pass.components = 4; - pass.divide_type = PASS_MOTION_WEIGHT; - break; - case PASS_MOTION_WEIGHT: - pass.components = 1; - break; - case PASS_OBJECT_ID: - case PASS_MATERIAL_ID: - pass.components = 1; - pass.filter = false; - break; - - case PASS_EMISSION: - case PASS_BACKGROUND: - pass.components = 4; - pass.exposure = true; - break; - case PASS_AO: - pass.components = 4; - break; - case PASS_SHADOW: - pass.components = 4; - pass.exposure = false; - break; - case PASS_LIGHT: - /* This isn't a real pass, used by baking to see whether - * light data is needed or not. - * - * Set components to 0 so pass sort below happens in a - * determined way. - */ - pass.components = 0; - break; - case PASS_RENDER_TIME: - /* This pass is handled entirely on the host side. */ - pass.components = 0; - break; - - case PASS_DIFFUSE_COLOR: - case PASS_GLOSSY_COLOR: - case PASS_TRANSMISSION_COLOR: - pass.components = 4; - break; - case PASS_DIFFUSE_DIRECT: - case PASS_DIFFUSE_INDIRECT: - pass.components = 4; - pass.exposure = true; - pass.divide_type = PASS_DIFFUSE_COLOR; - break; - case PASS_GLOSSY_DIRECT: - case PASS_GLOSSY_INDIRECT: - pass.components = 4; - pass.exposure = true; - pass.divide_type = PASS_GLOSSY_COLOR; - break; - case PASS_TRANSMISSION_DIRECT: - case PASS_TRANSMISSION_INDIRECT: - pass.components = 4; - pass.exposure = true; - pass.divide_type = PASS_TRANSMISSION_COLOR; - break; - case PASS_VOLUME_DIRECT: - case PASS_VOLUME_INDIRECT: - pass.components = 4; - pass.exposure = true; - break; - case PASS_CRYPTOMATTE: - pass.components = 4; - break; - case PASS_ADAPTIVE_AUX_BUFFER: - pass.components = 4; - break; - case PASS_SAMPLE_COUNT: - pass.components = 1; - pass.exposure = false; - break; - case PASS_AOV_COLOR: - pass.components = 4; - break; - case PASS_AOV_VALUE: - pass.components = 1; - break; - case PASS_BAKE_PRIMITIVE: - case PASS_BAKE_DIFFERENTIAL: - pass.components = 4; - pass.exposure = false; - pass.filter = false; - break; - default: - assert(false); - break; - } - - passes.push_back(pass); - - /* Order from by components, to ensure alignment so passes with size 4 - * come first and then passes with size 1. Note this must use stable sort - * so cryptomatte passes remain in the right order. */ - stable_sort(&passes[0], &passes[0] + passes.size(), compare_pass_order); - - if (pass.divide_type != PASS_NONE) - Pass::add(pass.divide_type, passes); -} - -bool Pass::equals(const vector<Pass> &A, const vector<Pass> &B) -{ - if (A.size() != B.size()) - return false; - - for (int i = 0; i < A.size(); i++) - if (A[i].type != B[i].type || A[i].name != B[i].name) - return false; - - return true; -} - -bool Pass::contains(const vector<Pass> &passes, PassType type) -{ - for (size_t i = 0; i < passes.size(); i++) - if (passes[i].type == type) - return true; - - return false; -} - /* Pixel Filter */ static float filter_func_box(float /*v*/, float /*width*/) @@ -368,17 +116,11 @@ NODE_DEFINE(Film) SOCKET_FLOAT(mist_depth, "Mist Depth", 100.0f); SOCKET_FLOAT(mist_falloff, "Mist Falloff", 1.0f); - SOCKET_BOOLEAN(denoising_data_pass, "Generate Denoising Data Pass", false); - SOCKET_BOOLEAN(denoising_clean_pass, "Generate Denoising Clean Pass", false); - SOCKET_BOOLEAN(denoising_prefiltered_pass, "Generate Denoising Prefiltered Pass", false); - SOCKET_INT(denoising_flags, "Denoising Flags", 0); - SOCKET_BOOLEAN(use_adaptive_sampling, "Use Adaptive Sampling", false); - - SOCKET_BOOLEAN(use_light_visibility, "Use Light Visibility", false); - - NodeEnum *pass_type_enum = get_pass_type_enum(); + const NodeEnum *pass_type_enum = Pass::get_type_enum(); SOCKET_ENUM(display_pass, "Display Pass", *pass_type_enum, PASS_COMBINED); + SOCKET_BOOLEAN(show_active_pixels, "Show Active Pixels", false); + static NodeEnum cryptomatte_passes_enum; cryptomatte_passes_enum.insert("none", CRYPT_NONE); cryptomatte_passes_enum.insert("object", CRYPT_OBJECT); @@ -389,15 +131,13 @@ NODE_DEFINE(Film) SOCKET_INT(cryptomatte_depth, "Cryptomatte Depth", 0); + SOCKET_BOOLEAN(use_approximate_shadow_catcher, "Use Approximate Shadow Catcher", false); + return type; } -Film::Film() : Node(get_node_type()) +Film::Film() : Node(get_node_type()), filter_table_offset_(TABLE_OFFSET_INVALID) { - use_light_visibility = false; - filter_table_offset = TABLE_OFFSET_INVALID; - cryptomatte_passes = CRYPT_NONE; - display_pass = PASS_COMBINED; } Film::~Film() @@ -406,7 +146,8 @@ Film::~Film() void Film::add_default(Scene *scene) { - Pass::add(PASS_COMBINED, scene->passes); + Pass *pass = scene->create_node<Pass>(); + pass->set_type(PASS_COMBINED); } void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) @@ -426,50 +167,77 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) /* update __data */ kfilm->exposure = exposure; + kfilm->pass_alpha_threshold = pass_alpha_threshold; kfilm->pass_flag = 0; - kfilm->display_pass_stride = -1; - kfilm->display_pass_components = 0; - kfilm->display_divide_pass_stride = -1; - kfilm->use_display_exposure = false; - kfilm->use_display_pass_alpha = (display_pass == PASS_COMBINED); + kfilm->use_approximate_shadow_catcher = get_use_approximate_shadow_catcher(); kfilm->light_pass_flag = 0; kfilm->pass_stride = 0; - kfilm->use_light_pass = use_light_visibility; - kfilm->pass_aov_value_num = 0; - kfilm->pass_aov_color_num = 0; + + /* Mark with PASS_UNUSED to avoid mask test in the kernel. */ + kfilm->pass_background = PASS_UNUSED; + kfilm->pass_emission = PASS_UNUSED; + kfilm->pass_ao = PASS_UNUSED; + kfilm->pass_diffuse_direct = PASS_UNUSED; + kfilm->pass_diffuse_indirect = PASS_UNUSED; + kfilm->pass_glossy_direct = PASS_UNUSED; + kfilm->pass_glossy_indirect = PASS_UNUSED; + kfilm->pass_transmission_direct = PASS_UNUSED; + kfilm->pass_transmission_indirect = PASS_UNUSED; + kfilm->pass_volume_direct = PASS_UNUSED; + kfilm->pass_volume_indirect = PASS_UNUSED; + kfilm->pass_volume_direct = PASS_UNUSED; + kfilm->pass_volume_indirect = PASS_UNUSED; + kfilm->pass_shadow = PASS_UNUSED; + + /* Mark passes as unused so that the kernel knows the pass is inaccessible. */ + kfilm->pass_denoising_normal = PASS_UNUSED; + kfilm->pass_denoising_albedo = PASS_UNUSED; + kfilm->pass_sample_count = PASS_UNUSED; + kfilm->pass_adaptive_aux_buffer = PASS_UNUSED; + kfilm->pass_shadow_catcher = PASS_UNUSED; + kfilm->pass_shadow_catcher_sample_count = PASS_UNUSED; + kfilm->pass_shadow_catcher_matte = PASS_UNUSED; bool have_cryptomatte = false; + bool have_aov_color = false; + bool have_aov_value = false; for (size_t i = 0; i < scene->passes.size(); i++) { - Pass &pass = scene->passes[i]; + const Pass *pass = scene->passes[i]; - if (pass.type == PASS_NONE) { + if (pass->get_type() == PASS_NONE || !pass->is_written()) { + continue; + } + + if (pass->get_mode() == PassMode::DENOISED) { + /* Generally we only storing offsets of the noisy passes. The display pass is an exception + * since it is a read operation and not a write. */ + kfilm->pass_stride += pass->get_info().num_components; continue; } /* Can't do motion pass if no motion vectors are available. */ - if (pass.type == PASS_MOTION || pass.type == PASS_MOTION_WEIGHT) { + if (pass->get_type() == PASS_MOTION || pass->get_type() == PASS_MOTION_WEIGHT) { if (scene->need_motion() != Scene::MOTION_PASS) { - kfilm->pass_stride += pass.components; + kfilm->pass_stride += pass->get_info().num_components; continue; } } - int pass_flag = (1 << (pass.type % 32)); - if (pass.type <= PASS_CATEGORY_MAIN_END) { - kfilm->pass_flag |= pass_flag; - } - else if (pass.type <= PASS_CATEGORY_LIGHT_END) { - kfilm->use_light_pass = 1; + const int pass_flag = (1 << (pass->get_type() % 32)); + if (pass->get_type() <= PASS_CATEGORY_LIGHT_END) { kfilm->light_pass_flag |= pass_flag; } + else if (pass->get_type() <= PASS_CATEGORY_DATA_END) { + kfilm->pass_flag |= pass_flag; + } else { - assert(pass.type <= PASS_CATEGORY_BAKE_END); + assert(pass->get_type() <= PASS_CATEGORY_BAKE_END); } - switch (pass.type) { + switch (pass->get_type()) { case PASS_COMBINED: kfilm->pass_combined = kfilm->pass_stride; break; @@ -479,6 +247,12 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) case PASS_NORMAL: kfilm->pass_normal = kfilm->pass_stride; break; + case PASS_POSITION: + kfilm->pass_position = kfilm->pass_stride; + break; + case PASS_ROUGHNESS: + kfilm->pass_roughness = kfilm->pass_stride; + break; case PASS_UV: kfilm->pass_uv = kfilm->pass_stride; break; @@ -511,9 +285,6 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->pass_shadow = kfilm->pass_stride; break; - case PASS_LIGHT: - break; - case PASS_DIFFUSE_COLOR: kfilm->pass_diffuse_color = kfilm->pass_stride; break; @@ -563,78 +334,56 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->pass_stride; have_cryptomatte = true; break; + + case PASS_DENOISING_NORMAL: + kfilm->pass_denoising_normal = kfilm->pass_stride; + break; + case PASS_DENOISING_ALBEDO: + kfilm->pass_denoising_albedo = kfilm->pass_stride; + break; + + case PASS_SHADOW_CATCHER: + kfilm->pass_shadow_catcher = kfilm->pass_stride; + break; + case PASS_SHADOW_CATCHER_SAMPLE_COUNT: + kfilm->pass_shadow_catcher_sample_count = kfilm->pass_stride; + break; + case PASS_SHADOW_CATCHER_MATTE: + kfilm->pass_shadow_catcher_matte = kfilm->pass_stride; + break; + case PASS_ADAPTIVE_AUX_BUFFER: kfilm->pass_adaptive_aux_buffer = kfilm->pass_stride; break; case PASS_SAMPLE_COUNT: kfilm->pass_sample_count = kfilm->pass_stride; break; + case PASS_AOV_COLOR: - if (kfilm->pass_aov_color_num == 0) { + if (!have_aov_color) { kfilm->pass_aov_color = kfilm->pass_stride; + have_aov_color = true; } - kfilm->pass_aov_color_num++; break; case PASS_AOV_VALUE: - if (kfilm->pass_aov_value_num == 0) { + if (!have_aov_value) { kfilm->pass_aov_value = kfilm->pass_stride; + have_aov_value = true; } - kfilm->pass_aov_value_num++; break; default: assert(false); break; } - if (pass.type == display_pass) { - kfilm->display_pass_stride = kfilm->pass_stride; - kfilm->display_pass_components = pass.components; - kfilm->use_display_exposure = pass.exposure && (kfilm->exposure != 1.0f); - } - else if (pass.type == PASS_DIFFUSE_COLOR || pass.type == PASS_TRANSMISSION_COLOR || - pass.type == PASS_GLOSSY_COLOR) { - kfilm->display_divide_pass_stride = kfilm->pass_stride; - } - - kfilm->pass_stride += pass.components; - } - - kfilm->pass_denoising_data = 0; - kfilm->pass_denoising_clean = 0; - kfilm->denoising_flags = 0; - if (denoising_data_pass) { - kfilm->pass_denoising_data = kfilm->pass_stride; - kfilm->pass_stride += DENOISING_PASS_SIZE_BASE; - kfilm->denoising_flags = denoising_flags; - if (denoising_clean_pass) { - kfilm->pass_denoising_clean = kfilm->pass_stride; - kfilm->pass_stride += DENOISING_PASS_SIZE_CLEAN; - kfilm->use_light_pass = 1; - } - if (denoising_prefiltered_pass) { - kfilm->pass_stride += DENOISING_PASS_SIZE_PREFILTERED; - } - } - - kfilm->pass_stride = align_up(kfilm->pass_stride, 4); - - /* When displaying the normal/uv pass in the viewport we need to disable - * transparency. - * - * We also don't need to perform light accumulations. Later we want to optimize this to suppress - * light calculations. */ - if (display_pass == PASS_NORMAL || display_pass == PASS_UV) { - kfilm->use_light_pass = 0; - } - else { - kfilm->pass_alpha_threshold = pass_alpha_threshold; + kfilm->pass_stride += pass->get_info().num_components; } /* update filter table */ vector<float> table = filter_table(filter_type, filter_width); - scene->lookup_tables->remove_table(&filter_table_offset); - filter_table_offset = scene->lookup_tables->add_table(dscene, table); - kfilm->filter_table_offset = (int)filter_table_offset; + scene->lookup_tables->remove_table(&filter_table_offset_); + filter_table_offset_ = scene->lookup_tables->add_table(dscene, table); + kfilm->filter_table_offset = (int)filter_table_offset_; /* mist pass parameters */ kfilm->mist_start = mist_start; @@ -644,79 +393,298 @@ void Film::device_update(Device *device, DeviceScene *dscene, Scene *scene) kfilm->cryptomatte_passes = cryptomatte_passes; kfilm->cryptomatte_depth = cryptomatte_depth; - pass_stride = kfilm->pass_stride; - denoising_data_offset = kfilm->pass_denoising_data; - denoising_clean_offset = kfilm->pass_denoising_clean; - clear_modified(); } void Film::device_free(Device * /*device*/, DeviceScene * /*dscene*/, Scene *scene) { - scene->lookup_tables->remove_table(&filter_table_offset); + scene->lookup_tables->remove_table(&filter_table_offset_); } -void Film::tag_passes_update(Scene *scene, const vector<Pass> &passes_, bool update_passes) +int Film::get_aov_offset(Scene *scene, string name, bool &is_color) { - if (Pass::contains(scene->passes, PASS_UV) != Pass::contains(passes_, PASS_UV)) { - scene->geometry_manager->tag_update(scene, GeometryManager::UV_PASS_NEEDED); + int offset_color = 0, offset_value = 0; + foreach (const Pass *pass, scene->passes) { + if (pass->get_name() == name) { + if (pass->get_type() == PASS_AOV_VALUE) { + is_color = false; + return offset_value; + } + else if (pass->get_type() == PASS_AOV_COLOR) { + is_color = true; + return offset_color; + } + } + + if (pass->get_type() == PASS_AOV_VALUE) { + offset_value += pass->get_info().num_components; + } + else if (pass->get_type() == PASS_AOV_COLOR) { + offset_color += pass->get_info().num_components; + } + } + + return -1; +} + +void Film::update_passes(Scene *scene, bool add_sample_count_pass) +{ + const Background *background = scene->background; + const BakeManager *bake_manager = scene->bake_manager; + const ObjectManager *object_manager = scene->object_manager; + Integrator *integrator = scene->integrator; + + if (!is_modified() && !object_manager->need_update() && !integrator->is_modified()) { + return; + } + + /* Remove auto generated passes and recreate them. */ + remove_auto_passes(scene); + + /* Display pass for viewport. */ + const PassType display_pass = get_display_pass(); + add_auto_pass(scene, display_pass); + + /* Assumption is that a combined pass always exists for now, for example + * adaptive sampling is always based on a combined pass. But we should + * try to lift this limitation in the future for faster rendering of + * individual passes. */ + if (display_pass != PASS_COMBINED) { + add_auto_pass(scene, PASS_COMBINED); + } + + /* Create passes needed for adaptive sampling. */ + const AdaptiveSampling adaptive_sampling = integrator->get_adaptive_sampling(); + if (adaptive_sampling.use) { + add_auto_pass(scene, PASS_SAMPLE_COUNT); + add_auto_pass(scene, PASS_ADAPTIVE_AUX_BUFFER); + } + + /* Create passes needed for denoising. */ + const bool use_denoise = integrator->get_use_denoise(); + if (use_denoise) { + if (integrator->get_use_denoise_pass_normal()) { + add_auto_pass(scene, PASS_DENOISING_NORMAL); + } + if (integrator->get_use_denoise_pass_albedo()) { + add_auto_pass(scene, PASS_DENOISING_ALBEDO); + } + } + + /* Create passes for shadow catcher. */ + if (scene->has_shadow_catcher()) { + const bool need_background = get_use_approximate_shadow_catcher() && + !background->get_transparent(); + + add_auto_pass(scene, PASS_SHADOW_CATCHER); + add_auto_pass(scene, PASS_SHADOW_CATCHER_SAMPLE_COUNT); + add_auto_pass(scene, PASS_SHADOW_CATCHER_MATTE); + + if (need_background) { + add_auto_pass(scene, PASS_BACKGROUND); + } + } + else if (Pass::contains(scene->passes, PASS_SHADOW_CATCHER)) { + add_auto_pass(scene, PASS_SHADOW_CATCHER); + add_auto_pass(scene, PASS_SHADOW_CATCHER_SAMPLE_COUNT); + } + + const vector<Pass *> passes_immutable = scene->passes; + for (const Pass *pass : passes_immutable) { + const PassInfo info = pass->get_info(); + /* Add utility passes needed to generate some light passes. */ + if (info.divide_type != PASS_NONE) { + add_auto_pass(scene, info.divide_type); + } + if (info.direct_type != PASS_NONE) { + add_auto_pass(scene, info.direct_type); + } + if (info.indirect_type != PASS_NONE) { + add_auto_pass(scene, info.indirect_type); + } + + /* NOTE: Enable all denoised passes when storage is requested. + * This way it is possible to tweak denoiser parameters later on. */ + if (info.support_denoise && use_denoise) { + add_auto_pass(scene, pass->get_type(), PassMode::DENOISED); + } + } + + if (bake_manager->get_baking()) { + add_auto_pass(scene, PASS_BAKE_PRIMITIVE, "BakePrimitive"); + add_auto_pass(scene, PASS_BAKE_DIFFERENTIAL, "BakeDifferential"); + } + + if (add_sample_count_pass) { + if (!Pass::contains(scene->passes, PASS_SAMPLE_COUNT)) { + add_auto_pass(scene, PASS_SAMPLE_COUNT); + } + } + + /* Remove duplicates and initialize internal pass info. */ + finalize_passes(scene, use_denoise); + /* Flush scene updates. */ + const bool have_uv_pass = Pass::contains(scene->passes, PASS_UV); + const bool have_motion_pass = Pass::contains(scene->passes, PASS_MOTION); + const bool have_ao_pass = Pass::contains(scene->passes, PASS_AO); + + if (have_uv_pass != prev_have_uv_pass) { + scene->geometry_manager->tag_update(scene, GeometryManager::UV_PASS_NEEDED); foreach (Shader *shader, scene->shaders) shader->need_update_uvs = true; } - else if (Pass::contains(scene->passes, PASS_MOTION) != Pass::contains(passes_, PASS_MOTION)) { + if (have_motion_pass != prev_have_motion_pass) { scene->geometry_manager->tag_update(scene, GeometryManager::MOTION_PASS_NEEDED); } - else if (Pass::contains(scene->passes, PASS_AO) != Pass::contains(passes_, PASS_AO)) { + if (have_ao_pass != prev_have_ao_pass) { scene->integrator->tag_update(scene, Integrator::AO_PASS_MODIFIED); } - if (update_passes) { - scene->passes = passes_; + prev_have_uv_pass = have_uv_pass; + prev_have_motion_pass = have_motion_pass; + prev_have_ao_pass = have_ao_pass; + + tag_modified(); + + /* Debug logging. */ + if (VLOG_IS_ON(2)) { + VLOG(2) << "Effective scene passes:"; + for (const Pass *pass : scene->passes) { + VLOG(2) << "- " << *pass; + } } } -int Film::get_aov_offset(Scene *scene, string name, bool &is_color) +void Film::add_auto_pass(Scene *scene, PassType type, const char *name) { - int num_color = 0, num_value = 0; - foreach (const Pass &pass, scene->passes) { - if (pass.type == PASS_AOV_COLOR) { - num_color++; - } - else if (pass.type == PASS_AOV_VALUE) { - num_value++; + add_auto_pass(scene, type, PassMode::NOISY, name); +} + +void Film::add_auto_pass(Scene *scene, PassType type, PassMode mode, const char *name) +{ + Pass *pass = new Pass(); + pass->set_type(type); + pass->set_mode(mode); + pass->set_name(ustring((name) ? name : "")); + pass->is_auto_ = true; + + pass->set_owner(scene); + scene->passes.push_back(pass); +} + +void Film::remove_auto_passes(Scene *scene) +{ + /* Remove all passes which were automatically created. */ + vector<Pass *> new_passes; + + for (Pass *pass : scene->passes) { + if (!pass->is_auto_) { + new_passes.push_back(pass); } else { - continue; - } - - if (pass.name == name) { - is_color = (pass.type == PASS_AOV_COLOR); - return (is_color ? num_color : num_value) - 1; + delete pass; } } - return -1; + scene->passes = new_passes; } -int Film::get_pass_stride() const +static bool compare_pass_order(const Pass *a, const Pass *b) { - return pass_stride; -} + const int num_components_a = a->get_info().num_components; + const int num_components_b = b->get_info().num_components; -int Film::get_denoising_data_offset() const -{ - return denoising_data_offset; + if (num_components_a == num_components_b) { + return (a->get_type() < b->get_type()); + } + + return num_components_a > num_components_b; } -int Film::get_denoising_clean_offset() const +void Film::finalize_passes(Scene *scene, const bool use_denoise) { - return denoising_clean_offset; + /* Remove duplicate passes. */ + vector<Pass *> new_passes; + + for (Pass *pass : scene->passes) { + /* Disable denoising on passes if denoising is disabled, or if the + * pass does not support it. */ + pass->set_mode((use_denoise && pass->get_info().support_denoise) ? pass->get_mode() : + PassMode::NOISY); + + /* Merge duplicate passes. */ + bool duplicate_found = false; + for (Pass *new_pass : new_passes) { + /* If different type or denoising, don't merge. */ + if (new_pass->get_type() != pass->get_type() || new_pass->get_mode() != pass->get_mode()) { + continue; + } + + /* If both passes have a name and the names are different, don't merge. + * If either pass has a name, we'll use that name. */ + if (!pass->get_name().empty() && !new_pass->get_name().empty() && + pass->get_name() != new_pass->get_name()) { + continue; + } + + if (!pass->get_name().empty() && new_pass->get_name().empty()) { + new_pass->set_name(pass->get_name()); + } + + new_pass->is_auto_ &= pass->is_auto_; + duplicate_found = true; + break; + } + + if (!duplicate_found) { + new_passes.push_back(pass); + } + else { + delete pass; + } + } + + /* Order from by components and type, This is required to for AOVs and cryptomatte passes, + * which the kernel assumes to be in order. Note this must use stable sort so cryptomatte + * passes remain in the right order. */ + stable_sort(new_passes.begin(), new_passes.end(), compare_pass_order); + + scene->passes = new_passes; } -size_t Film::get_filter_table_offset() const +uint Film::get_kernel_features(const Scene *scene) const { - return filter_table_offset; + uint kernel_features = 0; + + for (const Pass *pass : scene->passes) { + if (!pass->is_written()) { + continue; + } + + const PassType pass_type = pass->get_type(); + const PassMode pass_mode = pass->get_mode(); + + if (pass_mode == PassMode::DENOISED || pass_type == PASS_DENOISING_NORMAL || + pass_type == PASS_DENOISING_ALBEDO) { + kernel_features |= KERNEL_FEATURE_DENOISING; + } + + if (pass_type != PASS_NONE && pass_type != PASS_COMBINED && + pass_type <= PASS_CATEGORY_LIGHT_END) { + kernel_features |= KERNEL_FEATURE_LIGHT_PASSES; + + if (pass_type == PASS_SHADOW) { + kernel_features |= KERNEL_FEATURE_SHADOW_PASS; + } + } + + if (pass_type == PASS_AO) { + kernel_features |= KERNEL_FEATURE_NODE_RAYTRACE; + } + } + + return kernel_features; } CCL_NAMESPACE_END diff --git a/intern/cycles/render/film.h b/intern/cycles/render/film.h index 462a7275491..5d327353361 100644 --- a/intern/cycles/render/film.h +++ b/intern/cycles/render/film.h @@ -17,6 +17,7 @@ #ifndef __FILM_H__ #define __FILM_H__ +#include "render/pass.h" #include "util/util_string.h" #include "util/util_vector.h" @@ -38,36 +39,15 @@ typedef enum FilterType { FILTER_NUM_TYPES, } FilterType; -class Pass : public Node { - public: - NODE_DECLARE - - Pass(); - - PassType type; - int components; - bool filter; - bool exposure; - PassType divide_type; - ustring name; - - static void add(PassType type, vector<Pass> &passes, const char *name = NULL); - static bool equals(const vector<Pass> &A, const vector<Pass> &B); - static bool contains(const vector<Pass> &passes, PassType); -}; - class Film : public Node { public: NODE_DECLARE NODE_SOCKET_API(float, exposure) - NODE_SOCKET_API(bool, denoising_data_pass) - NODE_SOCKET_API(bool, denoising_clean_pass) - NODE_SOCKET_API(bool, denoising_prefiltered_pass) - NODE_SOCKET_API(int, denoising_flags) NODE_SOCKET_API(float, pass_alpha_threshold) NODE_SOCKET_API(PassType, display_pass) + NODE_SOCKET_API(bool, show_active_pixels) NODE_SOCKET_API(FilterType, filter_type) NODE_SOCKET_API(float, filter_width) @@ -76,17 +56,18 @@ class Film : public Node { NODE_SOCKET_API(float, mist_depth) NODE_SOCKET_API(float, mist_falloff) - NODE_SOCKET_API(bool, use_light_visibility) NODE_SOCKET_API(CryptomatteType, cryptomatte_passes) NODE_SOCKET_API(int, cryptomatte_depth) - NODE_SOCKET_API(bool, use_adaptive_sampling) + /* Approximate shadow catcher pass into its matte pass, so that both artificial objects and + * shadows can be alpha-overed onto a backdrop. */ + NODE_SOCKET_API(bool, use_approximate_shadow_catcher) private: - int pass_stride; - int denoising_data_offset; - int denoising_clean_offset; - size_t filter_table_offset; + size_t filter_table_offset_; + bool prev_have_uv_pass = false; + bool prev_have_motion_pass = false; + bool prev_have_ao_pass = false; public: Film(); @@ -98,14 +79,20 @@ class Film : public Node { void device_update(Device *device, DeviceScene *dscene, Scene *scene); void device_free(Device *device, DeviceScene *dscene, Scene *scene); - void tag_passes_update(Scene *scene, const vector<Pass> &passes_, bool update_passes = true); - int get_aov_offset(Scene *scene, string name, bool &is_color); - int get_pass_stride() const; - int get_denoising_data_offset() const; - int get_denoising_clean_offset() const; - size_t get_filter_table_offset() const; + /* Update passes so that they contain all passes required for the configured functionality. + * + * If `add_sample_count_pass` is true then the SAMPLE_COUNT pass is ensured to be added. */ + void update_passes(Scene *scene, bool add_sample_count_pass); + + uint get_kernel_features(const Scene *scene) const; + + private: + void add_auto_pass(Scene *scene, PassType type, const char *name = nullptr); + void add_auto_pass(Scene *scene, PassType type, PassMode mode, const char *name = nullptr); + void remove_auto_passes(Scene *scene); + void finalize_passes(Scene *scene, const bool use_denoise); }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/geometry.cpp b/intern/cycles/render/geometry.cpp index 7ec1d2d9abb..6804a006fe6 100644 --- a/intern/cycles/render/geometry.cpp +++ b/intern/cycles/render/geometry.cpp @@ -215,6 +215,12 @@ void Geometry::compute_bvh( msg += string_printf("%s %u/%u", name.c_str(), (uint)(n + 1), (uint)total); Object object; + + /* Ensure all visibility bits are set at the geometry level BVH. In + * the object level BVH is where actual visibility is tested. */ + object.set_is_shadow_catcher(true); + object.set_visibility(~0); + object.set_geometry(this); vector<Geometry *> geometry; @@ -315,7 +321,7 @@ void GeometryManager::update_osl_attributes(Device *device, { #ifdef WITH_OSL /* for OSL, a hash map is used to lookup the attribute by name. */ - OSLGlobals *og = (OSLGlobals *)device->osl_memory(); + OSLGlobals *og = (OSLGlobals *)device->get_cpu_osl_memory(); og->object_name_map.clear(); og->attribute_map.clear(); @@ -1855,8 +1861,8 @@ void GeometryManager::device_update(Device *device, }); Camera *dicing_camera = scene->dicing_camera; - dicing_camera->set_screen_size_and_resolution( - dicing_camera->get_full_width(), dicing_camera->get_full_height(), 1); + dicing_camera->set_screen_size(dicing_camera->get_full_width(), + dicing_camera->get_full_height()); dicing_camera->update(scene); size_t i = 0; @@ -2157,7 +2163,7 @@ void GeometryManager::device_free(Device *device, DeviceScene *dscene, bool forc dscene->data.bvh.bvh_layout = BVH_LAYOUT_NONE; #ifdef WITH_OSL - OSLGlobals *og = (OSLGlobals *)device->osl_memory(); + OSLGlobals *og = (OSLGlobals *)device->get_cpu_osl_memory(); if (og) { og->object_name_map.clear(); diff --git a/intern/cycles/render/gpu_display.cpp b/intern/cycles/render/gpu_display.cpp new file mode 100644 index 00000000000..a8f0cc50583 --- /dev/null +++ b/intern/cycles/render/gpu_display.cpp @@ -0,0 +1,227 @@ +/* + * Copyright 2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "render/gpu_display.h" + +#include "render/buffers.h" +#include "util/util_logging.h" + +CCL_NAMESPACE_BEGIN + +void GPUDisplay::reset(const BufferParams &buffer_params) +{ + thread_scoped_lock lock(mutex_); + + const GPUDisplayParams old_params = params_; + + params_.offset = make_int2(buffer_params.full_x, buffer_params.full_y); + params_.full_size = make_int2(buffer_params.full_width, buffer_params.full_height); + params_.size = make_int2(buffer_params.width, buffer_params.height); + + /* If the parameters did change tag texture as unusable. This avoids drawing old texture content + * in an updated configuration of the viewport. For example, avoids drawing old frame when render + * border did change. + * If the parameters did not change, allow drawing the current state of the texture, which will + * not count as an up-to-date redraw. This will avoid flickering when doping camera navigation by + * showing a previously rendered frame for until the new one is ready. */ + if (old_params.modified(params_)) { + texture_state_.is_usable = false; + } + + texture_state_.is_outdated = true; +} + +void GPUDisplay::mark_texture_updated() +{ + texture_state_.is_outdated = false; + texture_state_.is_usable = true; +} + +/* -------------------------------------------------------------------- + * Update procedure. + */ + +bool GPUDisplay::update_begin(int texture_width, int texture_height) +{ + DCHECK(!update_state_.is_active); + + if (update_state_.is_active) { + LOG(ERROR) << "Attempt to re-activate update process."; + return false; + } + + /* Get parameters within a mutex lock, to avoid reset() modifying them at the same time. + * The update itself is non-blocking however, for better performance and to avoid + * potential deadlocks due to locks held by the subclass. */ + GPUDisplayParams params; + { + thread_scoped_lock lock(mutex_); + params = params_; + texture_state_.size = make_int2(texture_width, texture_height); + } + + if (!do_update_begin(params, texture_width, texture_height)) { + LOG(ERROR) << "GPUDisplay implementation could not begin update."; + return false; + } + + update_state_.is_active = true; + + return true; +} + +void GPUDisplay::update_end() +{ + DCHECK(update_state_.is_active); + + if (!update_state_.is_active) { + LOG(ERROR) << "Attempt to deactivate inactive update process."; + return; + } + + do_update_end(); + + update_state_.is_active = false; +} + +int2 GPUDisplay::get_texture_size() const +{ + return texture_state_.size; +} + +/* -------------------------------------------------------------------- + * Texture update from CPU buffer. + */ + +void GPUDisplay::copy_pixels_to_texture( + const half4 *rgba_pixels, int texture_x, int texture_y, int pixels_width, int pixels_height) +{ + DCHECK(update_state_.is_active); + + if (!update_state_.is_active) { + LOG(ERROR) << "Attempt to copy pixels data outside of GPUDisplay update."; + return; + } + + mark_texture_updated(); + do_copy_pixels_to_texture(rgba_pixels, texture_x, texture_y, pixels_width, pixels_height); +} + +/* -------------------------------------------------------------------- + * Texture buffer mapping. + */ + +half4 *GPUDisplay::map_texture_buffer() +{ + DCHECK(!texture_buffer_state_.is_mapped); + DCHECK(update_state_.is_active); + + if (texture_buffer_state_.is_mapped) { + LOG(ERROR) << "Attempt to re-map an already mapped texture buffer."; + return nullptr; + } + + if (!update_state_.is_active) { + LOG(ERROR) << "Attempt to copy pixels data outside of GPUDisplay update."; + return nullptr; + } + + half4 *mapped_rgba_pixels = do_map_texture_buffer(); + + if (mapped_rgba_pixels) { + texture_buffer_state_.is_mapped = true; + } + + return mapped_rgba_pixels; +} + +void GPUDisplay::unmap_texture_buffer() +{ + DCHECK(texture_buffer_state_.is_mapped); + + if (!texture_buffer_state_.is_mapped) { + LOG(ERROR) << "Attempt to unmap non-mapped texture buffer."; + return; + } + + texture_buffer_state_.is_mapped = false; + + mark_texture_updated(); + do_unmap_texture_buffer(); +} + +/* -------------------------------------------------------------------- + * Graphics interoperability. + */ + +DeviceGraphicsInteropDestination GPUDisplay::graphics_interop_get() +{ + DCHECK(!texture_buffer_state_.is_mapped); + DCHECK(update_state_.is_active); + + if (texture_buffer_state_.is_mapped) { + LOG(ERROR) + << "Attempt to use graphics interoperability mode while the texture buffer is mapped."; + return DeviceGraphicsInteropDestination(); + } + + if (!update_state_.is_active) { + LOG(ERROR) << "Attempt to use graphics interoperability outside of GPUDisplay update."; + return DeviceGraphicsInteropDestination(); + } + + /* Assume that interop will write new values to the texture. */ + mark_texture_updated(); + + return do_graphics_interop_get(); +} + +void GPUDisplay::graphics_interop_activate() +{ +} + +void GPUDisplay::graphics_interop_deactivate() +{ +} + +/* -------------------------------------------------------------------- + * Drawing. + */ + +bool GPUDisplay::draw() +{ + /* Get parameters within a mutex lock, to avoid reset() modifying them at the same time. + * The drawing itself is non-blocking however, for better performance and to avoid + * potential deadlocks due to locks held by the subclass. */ + GPUDisplayParams params; + bool is_usable; + bool is_outdated; + + { + thread_scoped_lock lock(mutex_); + params = params_; + is_usable = texture_state_.is_usable; + is_outdated = texture_state_.is_outdated; + } + + if (is_usable) { + do_draw(params); + } + + return !is_outdated; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/render/gpu_display.h b/intern/cycles/render/gpu_display.h new file mode 100644 index 00000000000..a01348d28d5 --- /dev/null +++ b/intern/cycles/render/gpu_display.h @@ -0,0 +1,247 @@ +/* + * Copyright 2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "device/device_graphics_interop.h" +#include "util/util_half.h" +#include "util/util_thread.h" +#include "util/util_types.h" + +CCL_NAMESPACE_BEGIN + +class BufferParams; + +/* GPUDisplay class takes care of drawing render result in a viewport. The render result is stored + * in a GPU-side texture, which is updated from a path tracer and drawn by an application. + * + * The base GPUDisplay does some special texture state tracking, which allows render Session to + * make decisions on whether reset for an updated state is possible or not. This state should only + * be tracked in a base class and a particular implementation should not worry about it. + * + * The subclasses should only implement the pure virtual methods, which allows them to not worry + * about parent method calls, which helps them to be as small and reliable as possible. */ + +class GPUDisplayParams { + public: + /* Offset of the display within a viewport. + * For example, set to a lower-bottom corner of border render in Blender's viewport. */ + int2 offset = make_int2(0, 0); + + /* Full viewport size. + * + * NOTE: Is not affected by the resolution divider. */ + int2 full_size = make_int2(0, 0); + + /* Effective vieport size. + * In the case of border render, size of the border rectangle. + * + * NOTE: Is not affected by the resolution divider. */ + int2 size = make_int2(0, 0); + + bool modified(const GPUDisplayParams &other) const + { + return !(offset == other.offset && full_size == other.full_size && size == other.size); + } +}; + +class GPUDisplay { + public: + GPUDisplay() = default; + virtual ~GPUDisplay() = default; + + /* Reset the display for the new state of render session. Is called whenever session is reset, + * which happens on changes like viewport navigation or viewport dimension change. + * + * This call will configure parameters for a changed buffer and reset the texture state. */ + void reset(const BufferParams &buffer_params); + + const GPUDisplayParams &get_params() const + { + return params_; + } + + /* -------------------------------------------------------------------- + * Update procedure. + * + * These calls indicates a desire of the caller to update content of the displayed texture. */ + + /* Returns true when update is ready. Update should be finished with update_end(). + * + * If false is returned then no update is possible, and no update_end() call is needed. + * + * The texture width and height denotes an actual resolution of the underlying render result. */ + bool update_begin(int texture_width, int texture_height); + + void update_end(); + + /* Get currently configured texture size of the display (as configured by `update_begin()`. */ + int2 get_texture_size() const; + + /* -------------------------------------------------------------------- + * Texture update from CPU buffer. + * + * NOTE: The GPUDisplay should be marked for an update being in process with `update_begin()`. + * + * Most portable implementation, which must be supported by all platforms. Might not be the most + * efficient one. + */ + + /* Copy buffer of rendered pixels of a given size into a given position of the texture. + * + * This function does not acquire a lock. The reason for this is is to allow use of this function + * for partial updates from different devices. In this case the caller will acquire the lock + * once, update all the slices and release + * the lock once. This will ensure that draw() will never use partially updated texture. */ + void copy_pixels_to_texture( + const half4 *rgba_pixels, int texture_x, int texture_y, int pixels_width, int pixels_height); + + /* -------------------------------------------------------------------- + * Texture buffer mapping. + * + * This functionality is used to update GPU-side texture content without need to maintain CPU + * side buffer on the caller. + * + * NOTE: The GPUDisplay should be marked for an update being in process with `update_begin()`. + * + * NOTE: Texture buffer can not be mapped while graphics interoperability is active. This means + * that `map_texture_buffer()` is not allowed between `graphics_interop_begin()` and + * `graphics_interop_end()` calls. + */ + + /* Map pixels memory form texture to a buffer available for write from CPU. Width and height will + * define a requested size of the texture to write to. + * Upon success a non-null pointer is returned and the texture buffer is to be unmapped. + * If an error happens during mapping, or if mapping is not supported by this GPU display a + * null pointer is returned and the buffer is NOT to be unmapped. + * + * NOTE: Usually the implementation will rely on a GPU context of some sort, and the GPU context + * is often can not be bound to two threads simultaneously, and can not be released from a + * different thread. This means that the mapping API should be used from the single thread only, + */ + half4 *map_texture_buffer(); + void unmap_texture_buffer(); + + /* -------------------------------------------------------------------- + * Graphics interoperability. + * + * A special code path which allows to update texture content directly from the GPU compute + * device. Complementary part of DeviceGraphicsInterop. + * + * NOTE: Graphics interoperability can not be used while the texture buffer is mapped. This means + * that `graphics_interop_get()` is not allowed between `map_texture_buffer()` and + * `unmap_texture_buffer()` calls. */ + + /* Get GPUDisplay graphics interoperability information which acts as a destination for the + * device API. */ + DeviceGraphicsInteropDestination graphics_interop_get(); + + /* (De)activate GPU display for graphics interoperability outside of regular display update + * routines. */ + virtual void graphics_interop_activate(); + virtual void graphics_interop_deactivate(); + + /* -------------------------------------------------------------------- + * Drawing. + */ + + /* Clear the texture by filling it with all zeroes. + * + * This call might happen in parallel with draw, but can never happen in parallel with the + * update. + * + * The actual zero-ing can be deferred to a later moment. What is important is that after clear + * and before pixels update the drawing texture will be fully empty, and that partial update + * after clear will write new pixel values for an updating area, leaving everything else zeroed. + * + * If the GPU display supports graphics interoperability then the zeroing the display is to be + * delegated to the device via the `DeviceGraphicsInteropDestination`. */ + virtual void clear() = 0; + + /* Draw the current state of the texture. + * + * Returns true if this call did draw an updated state of the texture. */ + bool draw(); + + protected: + /* Implementation-specific calls which subclasses are to implement. + * These `do_foo()` method corresponds to their `foo()` calls, but they are purely virtual to + * simplify their particular implementation. */ + virtual bool do_update_begin(const GPUDisplayParams ¶ms, + int texture_width, + int texture_height) = 0; + virtual void do_update_end() = 0; + + virtual void do_copy_pixels_to_texture(const half4 *rgba_pixels, + int texture_x, + int texture_y, + int pixels_width, + int pixels_height) = 0; + + virtual half4 *do_map_texture_buffer() = 0; + virtual void do_unmap_texture_buffer() = 0; + + /* Note that this might be called in parallel to do_update_begin() and do_update_end(), + * the subclass is responsible for appropriate mutex locks to avoid multiple threads + * editing and drawing the texture at the same time. */ + virtual void do_draw(const GPUDisplayParams ¶ms) = 0; + + virtual DeviceGraphicsInteropDestination do_graphics_interop_get() = 0; + + private: + thread_mutex mutex_; + GPUDisplayParams params_; + + /* Mark texture as its content has been updated. + * Used from places which knows that the texture content has been brought up-to-date, so that the + * drawing knows whether it can be performed, and whether drawing happened with an up-to-date + * texture state. */ + void mark_texture_updated(); + + /* State of the update process. */ + struct { + /* True when update is in process, indicated by `update_begin()` / `update_end()`. */ + bool is_active = false; + } update_state_; + + /* State of the texture, which is needed for an integration with render session and interactive + * updates and navigation. */ + struct { + /* Denotes whether possibly existing state of GPU side texture is still usable. + * It will not be usable in cases like render border did change (in this case we don't want + * previous texture to be rendered at all). + * + * However, if only navigation or object in scene did change, then the outdated state of the + * texture is still usable for draw, preventing display viewport flickering on navigation and + * object modifications. */ + bool is_usable = false; + + /* Texture is considered outdated after `reset()` until the next call of + * `copy_pixels_to_texture()`. */ + bool is_outdated = true; + + /* Texture size in pixels. */ + int2 size = make_int2(0, 0); + } texture_state_; + + /* State of the texture buffer. Is tracked to perform sanity checks. */ + struct { + /* True when the texture buffer is mapped with `map_texture_buffer()`. */ + bool is_mapped = false; + } texture_buffer_state_; +}; + +CCL_NAMESPACE_END diff --git a/intern/cycles/render/graph.h b/intern/cycles/render/graph.h index 5102b182593..3584754fad1 100644 --- a/intern/cycles/render/graph.h +++ b/intern/cycles/render/graph.h @@ -224,10 +224,6 @@ class ShaderNode : public Node { { return false; } - virtual bool has_raytrace() - { - return false; - } vector<ShaderInput *> inputs; vector<ShaderOutput *> outputs; @@ -242,22 +238,13 @@ class ShaderNode : public Node { * that those functions are for selective compilation only? */ - /* Nodes are split into several groups, group of level 0 contains - * nodes which are most commonly used, further levels are extension - * of previous one and includes less commonly used nodes. - */ - virtual int get_group() - { - return NODE_GROUP_LEVEL_0; - } - /* Node feature are used to disable huge nodes inside the group, * so it's possible to disable huge nodes inside of the required * nodes group. */ virtual int get_feature() { - return bump == SHADER_BUMP_NONE ? 0 : NODE_FEATURE_BUMP; + return bump == SHADER_BUMP_NONE ? 0 : KERNEL_FEATURE_NODE_BUMP; } /* Get closure ID to which the node compiles into. */ diff --git a/intern/cycles/render/integrator.cpp b/intern/cycles/render/integrator.cpp index d8749cec9fa..d74d14242bb 100644 --- a/intern/cycles/render/integrator.cpp +++ b/intern/cycles/render/integrator.cpp @@ -53,6 +53,8 @@ NODE_DEFINE(Integrator) SOCKET_INT(transparent_max_bounce, "Transparent Max Bounce", 7); SOCKET_INT(ao_bounces, "AO Bounces", 0); + SOCKET_FLOAT(ao_factor, "AO Factor", 0.0f); + SOCKET_FLOAT(ao_distance, "AO Distance", FLT_MAX); SOCKET_INT(volume_max_steps, "Volume Max Steps", 1024); SOCKET_FLOAT(volume_step_rate, "Volume Step Rate", 1.0f); @@ -66,33 +68,39 @@ NODE_DEFINE(Integrator) SOCKET_BOOLEAN(motion_blur, "Motion Blur", false); SOCKET_INT(aa_samples, "AA Samples", 0); - SOCKET_INT(diffuse_samples, "Diffuse Samples", 1); - SOCKET_INT(glossy_samples, "Glossy Samples", 1); - SOCKET_INT(transmission_samples, "Transmission Samples", 1); - SOCKET_INT(ao_samples, "AO Samples", 1); - SOCKET_INT(mesh_light_samples, "Mesh Light Samples", 1); - SOCKET_INT(subsurface_samples, "Subsurface Samples", 1); - SOCKET_INT(volume_samples, "Volume Samples", 1); SOCKET_INT(start_sample, "Start Sample", 0); + SOCKET_BOOLEAN(use_adaptive_sampling, "Use Adaptive Sampling", false); SOCKET_FLOAT(adaptive_threshold, "Adaptive Threshold", 0.0f); SOCKET_INT(adaptive_min_samples, "Adaptive Min Samples", 0); - SOCKET_BOOLEAN(sample_all_lights_direct, "Sample All Lights Direct", true); - SOCKET_BOOLEAN(sample_all_lights_indirect, "Sample All Lights Indirect", true); SOCKET_FLOAT(light_sampling_threshold, "Light Sampling Threshold", 0.05f); - static NodeEnum method_enum; - method_enum.insert("path", PATH); - method_enum.insert("branched_path", BRANCHED_PATH); - SOCKET_ENUM(method, "Method", method_enum, PATH); - static NodeEnum sampling_pattern_enum; sampling_pattern_enum.insert("sobol", SAMPLING_PATTERN_SOBOL); - sampling_pattern_enum.insert("cmj", SAMPLING_PATTERN_CMJ); sampling_pattern_enum.insert("pmj", SAMPLING_PATTERN_PMJ); SOCKET_ENUM(sampling_pattern, "Sampling Pattern", sampling_pattern_enum, SAMPLING_PATTERN_SOBOL); + static NodeEnum denoiser_type_enum; + denoiser_type_enum.insert("optix", DENOISER_OPTIX); + denoiser_type_enum.insert("openimagedenoise", DENOISER_OPENIMAGEDENOISE); + + static NodeEnum denoiser_prefilter_enum; + denoiser_prefilter_enum.insert("none", DENOISER_PREFILTER_NONE); + denoiser_prefilter_enum.insert("fast", DENOISER_PREFILTER_FAST); + denoiser_prefilter_enum.insert("accurate", DENOISER_PREFILTER_ACCURATE); + + /* Default to accurate denoising with OpenImageDenoise. For interactive viewport + * it's best use OptiX and disable the normal pass since it does not always have + * the desired effect for that denoiser. */ + SOCKET_BOOLEAN(use_denoise, "Use Denoiser", false); + SOCKET_ENUM(denoiser_type, "Denoiser Type", denoiser_type_enum, DENOISER_OPENIMAGEDENOISE); + SOCKET_INT(denoise_start_sample, "Start Sample to Denoise", 0); + SOCKET_BOOLEAN(use_denoise_pass_albedo, "Use Albedo Pass for Denoiser", true); + SOCKET_BOOLEAN(use_denoise_pass_normal, "Use Normal Pass for Denoiser", true); + SOCKET_ENUM( + denoiser_prefilter, "Denoiser Type", denoiser_prefilter_enum, DENOISER_PREFILTER_ACCURATE); + return type; } @@ -115,13 +123,20 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene } }); - const bool need_update_lut = ao_samples_is_modified() || diffuse_samples_is_modified() || - glossy_samples_is_modified() || max_bounce_is_modified() || - max_transmission_bounce_is_modified() || - mesh_light_samples_is_modified() || method_is_modified() || - sampling_pattern_is_modified() || - subsurface_samples_is_modified() || - transmission_samples_is_modified() || volume_samples_is_modified(); + KernelIntegrator *kintegrator = &dscene->data.integrator; + + /* Adaptive sampling requires PMJ samples. + * + * This also makes detection of sampling pattern a bit more involved: can not rely on the changed + * state of socket, since its value might be different from the effective value used here. So + * instead compare with previous value in the KernelIntegrator. Only do it if the device was + * updated once (in which case the `sample_pattern_lut` will be allocated to a non-zero size). */ + const SamplingPattern new_sampling_pattern = (use_adaptive_sampling) ? SAMPLING_PATTERN_PMJ : + sampling_pattern; + + const bool need_update_lut = max_bounce_is_modified() || max_transmission_bounce_is_modified() || + dscene->sample_pattern_lut.size() == 0 || + kintegrator->sampling_pattern != new_sampling_pattern; if (need_update_lut) { dscene->sample_pattern_lut.tag_realloc(); @@ -129,8 +144,6 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene device_free(device, dscene); - KernelIntegrator *kintegrator = &dscene->data.integrator; - /* integrator parameters */ kintegrator->min_bounce = min_bounce + 1; kintegrator->max_bounce = max_bounce + 1; @@ -143,12 +156,9 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->transparent_min_bounce = transparent_min_bounce + 1; kintegrator->transparent_max_bounce = transparent_max_bounce + 1; - if (ao_bounces == 0) { - kintegrator->ao_bounces = INT_MAX; - } - else { - kintegrator->ao_bounces = ao_bounces - 1; - } + kintegrator->ao_bounces = ao_bounces; + kintegrator->ao_bounces_distance = ao_distance; + kintegrator->ao_bounces_factor = ao_factor; /* Transparent Shadows * We only need to enable transparent shadows, if we actually have @@ -171,10 +181,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene kintegrator->caustics_refractive = caustics_refractive; kintegrator->filter_glossy = (filter_glossy == 0.0f) ? FLT_MAX : 1.0f / filter_glossy; - kintegrator->seed = hash_uint2(seed, 0); - - kintegrator->use_ambient_occlusion = ((Pass::contains(scene->passes, PASS_AO)) || - dscene->data.background.ao_factor != 0.0f); + kintegrator->seed = seed; kintegrator->sample_clamp_direct = (sample_clamp_direct == 0.0f) ? FLT_MAX : sample_clamp_direct * 3.0f; @@ -182,51 +189,7 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene FLT_MAX : sample_clamp_indirect * 3.0f; - kintegrator->branched = (method == BRANCHED_PATH) && device->info.has_branched_path; - kintegrator->volume_decoupled = device->info.has_volume_decoupled; - kintegrator->diffuse_samples = diffuse_samples; - kintegrator->glossy_samples = glossy_samples; - kintegrator->transmission_samples = transmission_samples; - kintegrator->ao_samples = ao_samples; - kintegrator->mesh_light_samples = mesh_light_samples; - kintegrator->subsurface_samples = subsurface_samples; - kintegrator->volume_samples = volume_samples; - kintegrator->start_sample = start_sample; - - if (kintegrator->branched) { - kintegrator->sample_all_lights_direct = sample_all_lights_direct; - kintegrator->sample_all_lights_indirect = sample_all_lights_indirect; - } - else { - kintegrator->sample_all_lights_direct = false; - kintegrator->sample_all_lights_indirect = false; - } - - kintegrator->sampling_pattern = sampling_pattern; - kintegrator->aa_samples = aa_samples; - if (aa_samples > 0 && adaptive_min_samples == 0) { - kintegrator->adaptive_min_samples = max(4, (int)sqrtf(aa_samples)); - VLOG(1) << "Cycles adaptive sampling: automatic min samples = " - << kintegrator->adaptive_min_samples; - } - else { - kintegrator->adaptive_min_samples = max(4, adaptive_min_samples); - } - - kintegrator->adaptive_step = 4; - kintegrator->adaptive_stop_per_sample = device->info.has_adaptive_stop_per_sample; - - /* Adaptive step must be a power of two for bitwise operations to work. */ - assert((kintegrator->adaptive_step & (kintegrator->adaptive_step - 1)) == 0); - - if (aa_samples > 0 && adaptive_threshold == 0.0f) { - kintegrator->adaptive_threshold = max(0.001f, 1.0f / (float)aa_samples); - VLOG(1) << "Cycles adaptive sampling: automatic threshold = " - << kintegrator->adaptive_threshold; - } - else { - kintegrator->adaptive_threshold = adaptive_threshold; - } + kintegrator->sampling_pattern = new_sampling_pattern; if (light_sampling_threshold > 0.0f) { kintegrator->light_inv_rr_threshold = 1.0f / light_sampling_threshold; @@ -236,29 +199,15 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene } /* sobol directions table */ - int max_samples = 1; - - if (kintegrator->branched) { - foreach (Light *light, scene->lights) - max_samples = max(max_samples, light->get_samples()); - - max_samples = max(max_samples, - max(diffuse_samples, max(glossy_samples, transmission_samples))); - max_samples = max(max_samples, max(ao_samples, max(mesh_light_samples, subsurface_samples))); - max_samples = max(max_samples, volume_samples); - } - - uint total_bounces = max_bounce + transparent_max_bounce + 3 + VOLUME_BOUNDS_MAX + - max(BSSRDF_MAX_HITS, BSSRDF_MAX_BOUNCES); - - max_samples *= total_bounces; + int max_samples = max_bounce + transparent_max_bounce + 3 + VOLUME_BOUNDS_MAX + + max(BSSRDF_MAX_HITS, BSSRDF_MAX_BOUNCES); int dimensions = PRNG_BASE_NUM + max_samples * PRNG_BOUNCE_NUM; dimensions = min(dimensions, SOBOL_MAX_DIMENSIONS); if (need_update_lut) { - if (sampling_pattern == SAMPLING_PATTERN_SOBOL) { - uint *directions = dscene->sample_pattern_lut.alloc(SOBOL_BITS * dimensions); + if (kintegrator->sampling_pattern == SAMPLING_PATTERN_SOBOL) { + uint *directions = (uint *)dscene->sample_pattern_lut.alloc(SOBOL_BITS * dimensions); sobol_generate_direction_vectors((uint(*)[SOBOL_BITS])directions, dimensions); @@ -276,10 +225,13 @@ void Integrator::device_update(Device *device, DeviceScene *dscene, Scene *scene function_bind(&progressive_multi_jitter_02_generate_2D, sequence, sequence_size, j)); } pool.wait_work(); + dscene->sample_pattern_lut.copy_to_device(); } } + kintegrator->has_shadow_catcher = scene->has_shadow_catcher(); + dscene->sample_pattern_lut.clear_modified(); clear_modified(); } @@ -295,17 +247,12 @@ void Integrator::tag_update(Scene *scene, uint32_t flag) tag_modified(); } - if (flag & (AO_PASS_MODIFIED | BACKGROUND_AO_MODIFIED)) { + if (flag & AO_PASS_MODIFIED) { /* tag only the ao_bounces socket as modified so we avoid updating sample_pattern_lut * unnecessarily */ tag_ao_bounces_modified(); } - if ((flag & LIGHT_SAMPLES_MODIFIED) && (method == BRANCHED_PATH)) { - /* the number of light samples may affect the size of the sample_pattern_lut */ - tag_sampling_pattern_modified(); - } - if (filter_glossy_is_modified()) { foreach (Shader *shader, scene->shaders) { if (shader->has_integrator_dependency) { @@ -321,4 +268,65 @@ void Integrator::tag_update(Scene *scene, uint32_t flag) } } +AdaptiveSampling Integrator::get_adaptive_sampling() const +{ + AdaptiveSampling adaptive_sampling; + + adaptive_sampling.use = use_adaptive_sampling; + + if (!adaptive_sampling.use) { + return adaptive_sampling; + } + + if (aa_samples > 0 && adaptive_threshold == 0.0f) { + adaptive_sampling.threshold = max(0.001f, 1.0f / (float)aa_samples); + VLOG(1) << "Cycles adaptive sampling: automatic threshold = " << adaptive_sampling.threshold; + } + else { + adaptive_sampling.threshold = adaptive_threshold; + } + + if (adaptive_sampling.threshold > 0 && adaptive_min_samples == 0) { + /* Threshold 0.1 -> 32, 0.01 -> 64, 0.001 -> 128. + * This is highly scene dependent, we make a guess that seemed to work well + * in various test scenes. */ + const int min_samples = (int)ceilf(16.0f / powf(adaptive_sampling.threshold, 0.3f)); + adaptive_sampling.min_samples = max(4, min_samples); + VLOG(1) << "Cycles adaptive sampling: automatic min samples = " + << adaptive_sampling.min_samples; + } + else { + adaptive_sampling.min_samples = max(4, adaptive_min_samples); + } + + /* Arbitrary factor that makes the threshold more similar to what is was before, + * and gives arguably more intuitive values. */ + adaptive_sampling.threshold *= 5.0f; + + adaptive_sampling.adaptive_step = 16; + + DCHECK(is_power_of_two(adaptive_sampling.adaptive_step)) + << "Adaptive step must be a power of two for bitwise operations to work"; + + return adaptive_sampling; +} + +DenoiseParams Integrator::get_denoise_params() const +{ + DenoiseParams denoise_params; + + denoise_params.use = use_denoise; + + denoise_params.type = denoiser_type; + + denoise_params.start_sample = denoise_start_sample; + + denoise_params.use_pass_albedo = use_denoise_pass_albedo; + denoise_params.use_pass_normal = use_denoise_pass_normal; + + denoise_params.prefilter = denoiser_prefilter; + + return denoise_params; +} + CCL_NAMESPACE_END diff --git a/intern/cycles/render/integrator.h b/intern/cycles/render/integrator.h index 4eeeda92d41..32e108d62ca 100644 --- a/intern/cycles/render/integrator.h +++ b/intern/cycles/render/integrator.h @@ -19,7 +19,9 @@ #include "kernel/kernel_types.h" +#include "device/device_denoise.h" /* For the paramaters and type enum. */ #include "graph/node.h" +#include "integrator/adaptive_sampling.h" CCL_NAMESPACE_BEGIN @@ -43,6 +45,8 @@ class Integrator : public Node { NODE_SOCKET_API(int, transparent_max_bounce) NODE_SOCKET_API(int, ao_bounces) + NODE_SOCKET_API(float, ao_factor) + NODE_SOCKET_API(float, ao_distance) NODE_SOCKET_API(int, volume_max_steps) NODE_SOCKET_API(float, volume_step_rate) @@ -62,37 +66,26 @@ class Integrator : public Node { static const int MAX_SAMPLES = (1 << 24); NODE_SOCKET_API(int, aa_samples) - NODE_SOCKET_API(int, diffuse_samples) - NODE_SOCKET_API(int, glossy_samples) - NODE_SOCKET_API(int, transmission_samples) - NODE_SOCKET_API(int, ao_samples) - NODE_SOCKET_API(int, mesh_light_samples) - NODE_SOCKET_API(int, subsurface_samples) - NODE_SOCKET_API(int, volume_samples) NODE_SOCKET_API(int, start_sample) - NODE_SOCKET_API(bool, sample_all_lights_direct) - NODE_SOCKET_API(bool, sample_all_lights_indirect) NODE_SOCKET_API(float, light_sampling_threshold) + NODE_SOCKET_API(bool, use_adaptive_sampling) NODE_SOCKET_API(int, adaptive_min_samples) NODE_SOCKET_API(float, adaptive_threshold) - enum Method { - BRANCHED_PATH = 0, - PATH = 1, - - NUM_METHODS, - }; - - NODE_SOCKET_API(Method, method) - NODE_SOCKET_API(SamplingPattern, sampling_pattern) + NODE_SOCKET_API(bool, use_denoise); + NODE_SOCKET_API(DenoiserType, denoiser_type); + NODE_SOCKET_API(int, denoise_start_sample); + NODE_SOCKET_API(bool, use_denoise_pass_albedo); + NODE_SOCKET_API(bool, use_denoise_pass_normal); + NODE_SOCKET_API(DenoiserPrefilter, denoiser_prefilter); + enum : uint32_t { AO_PASS_MODIFIED = (1 << 0), - BACKGROUND_AO_MODIFIED = (1 << 1), - LIGHT_SAMPLES_MODIFIED = (1 << 2), + OBJECT_MANAGER = (1 << 1), /* tag everything in the manager for an update */ UPDATE_ALL = ~0u, @@ -107,6 +100,9 @@ class Integrator : public Node { void device_free(Device *device, DeviceScene *dscene, bool force_free = false); void tag_update(Scene *scene, uint32_t flag); + + AdaptiveSampling get_adaptive_sampling() const; + DenoiseParams get_denoise_params() const; }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/jitter.cpp b/intern/cycles/render/jitter.cpp index fc47b0e8f0a..e31f8abd446 100644 --- a/intern/cycles/render/jitter.cpp +++ b/intern/cycles/render/jitter.cpp @@ -242,12 +242,6 @@ class PMJ02_Generator : public PMJ_Generator { static void shuffle(float2 points[], int size, int rng_seed) { - /* Offset samples by 1.0 for faster scrambling in kernel_random.h */ - for (int i = 0; i < size; ++i) { - points[i].x += 1.0f; - points[i].y += 1.0f; - } - if (rng_seed == 0) { return; } diff --git a/intern/cycles/render/light.cpp b/intern/cycles/render/light.cpp index 15aa4e047b5..ae1150fc07b 100644 --- a/intern/cycles/render/light.cpp +++ b/intern/cycles/render/light.cpp @@ -14,12 +14,13 @@ * limitations under the License. */ -#include "render/light.h" #include "device/device.h" + #include "render/background.h" #include "render/film.h" #include "render/graph.h" #include "render/integrator.h" +#include "render/light.h" #include "render/mesh.h" #include "render/nodes.h" #include "render/object.h" @@ -27,6 +28,8 @@ #include "render/shader.h" #include "render/stats.h" +#include "integrator/shader_eval.h" + #include "util/util_foreach.h" #include "util/util_hash.h" #include "util/util_logging.h" @@ -43,63 +46,49 @@ static void shade_background_pixels(Device *device, vector<float3> &pixels, Progress &progress) { - /* create input */ - device_vector<uint4> d_input(device, "background_input", MEM_READ_ONLY); - device_vector<float4> d_output(device, "background_output", MEM_READ_WRITE); - - uint4 *d_input_data = d_input.alloc(width * height); - - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) { - float u = (x + 0.5f) / width; - float v = (y + 0.5f) / height; - - uint4 in = make_uint4(__float_as_int(u), __float_as_int(v), 0, 0); - d_input_data[x + y * width] = in; - } - } - - /* compute on device */ - d_output.alloc(width * height); - d_output.zero_to_device(); - d_input.copy_to_device(); - + /* Needs to be up to data for attribute access. */ device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - DeviceTask main_task(DeviceTask::SHADER); - main_task.shader_input = d_input.device_pointer; - main_task.shader_output = d_output.device_pointer; - main_task.shader_eval_type = SHADER_EVAL_BACKGROUND; - main_task.shader_x = 0; - main_task.shader_w = width * height; - main_task.num_samples = 1; - main_task.get_cancel = function_bind(&Progress::get_cancel, &progress); - - /* disabled splitting for now, there's an issue with multi-GPU mem_copy_from */ - list<DeviceTask> split_tasks; - main_task.split(split_tasks, 1, 128 * 128); - - foreach (DeviceTask &task, split_tasks) { - device->task_add(task); - device->task_wait(); - d_output.copy_from_device(task.shader_x, 1, task.shader_w); - } - - d_input.free(); - - float4 *d_output_data = d_output.data(); - - pixels.resize(width * height); - - for (int y = 0; y < height; y++) { - for (int x = 0; x < width; x++) { - pixels[y * width + x].x = d_output_data[y * width + x].x; - pixels[y * width + x].y = d_output_data[y * width + x].y; - pixels[y * width + x].z = d_output_data[y * width + x].z; - } - } + const int size = width * height; + pixels.resize(size); + + /* Evaluate shader on device. */ + ShaderEval shader_eval(device, progress); + shader_eval.eval( + SHADER_EVAL_BACKGROUND, + size, + [&](device_vector<KernelShaderEvalInput> &d_input) { + /* Fill coordinates for shading. */ + KernelShaderEvalInput *d_input_data = d_input.data(); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + float u = (x + 0.5f) / width; + float v = (y + 0.5f) / height; + + KernelShaderEvalInput in; + in.object = OBJECT_NONE; + in.prim = PRIM_NONE; + in.u = u; + in.v = v; + d_input_data[x + y * width] = in; + } + } - d_output.free(); + return size; + }, + [&](device_vector<float4> &d_output) { + /* Copy output to pixel buffer. */ + float4 *d_output_data = d_output.data(); + + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) { + pixels[y * width + x].x = d_output_data[y * width + x].x; + pixels[y * width + x].y = d_output_data[y * width + x].y; + pixels[y * width + x].z = d_output_data[y * width + x].z; + } + } + }); } /* Light */ @@ -140,15 +129,16 @@ NODE_DEFINE(Light) SOCKET_BOOLEAN(cast_shadow, "Cast Shadow", true); SOCKET_BOOLEAN(use_mis, "Use Mis", false); + SOCKET_BOOLEAN(use_camera, "Use Camera", true); SOCKET_BOOLEAN(use_diffuse, "Use Diffuse", true); SOCKET_BOOLEAN(use_glossy, "Use Glossy", true); SOCKET_BOOLEAN(use_transmission, "Use Transmission", true); SOCKET_BOOLEAN(use_scatter, "Use Scatter", true); - SOCKET_INT(samples, "Samples", 1); SOCKET_INT(max_bounces, "Max Bounces", 1024); SOCKET_UINT(random_id, "Random ID", 0); + SOCKET_BOOLEAN(is_shadow_catcher, "Shadow Catcher", true); SOCKET_BOOLEAN(is_portal, "Is Portal", false); SOCKET_BOOLEAN(is_enabled, "Is Enabled", true); @@ -166,10 +156,6 @@ void Light::tag_update(Scene *scene) { if (is_modified()) { scene->light_manager->tag_update(scene, LightManager::LIGHT_MODIFIED); - - if (samples_is_modified()) { - scene->integrator->tag_update(scene, Integrator::LIGHT_SAMPLES_MODIFIED); - } } } @@ -193,7 +179,6 @@ LightManager::LightManager() { update_flags = UPDATE_ALL; need_update_background = true; - use_light_visibility = false; last_background_enabled = false; last_background_resolution = 0; } @@ -357,21 +342,23 @@ void LightManager::device_update_distribution(Device *, int object_id = j; int shader_flag = 0; + if (!(object->get_visibility() & PATH_RAY_CAMERA)) { + shader_flag |= SHADER_EXCLUDE_CAMERA; + } if (!(object->get_visibility() & PATH_RAY_DIFFUSE)) { shader_flag |= SHADER_EXCLUDE_DIFFUSE; - use_light_visibility = true; } if (!(object->get_visibility() & PATH_RAY_GLOSSY)) { shader_flag |= SHADER_EXCLUDE_GLOSSY; - use_light_visibility = true; } if (!(object->get_visibility() & PATH_RAY_TRANSMIT)) { shader_flag |= SHADER_EXCLUDE_TRANSMIT; - use_light_visibility = true; } if (!(object->get_visibility() & PATH_RAY_VOLUME_SCATTER)) { shader_flag |= SHADER_EXCLUDE_SCATTER; - use_light_visibility = true; + } + if (!(object->get_is_shadow_catcher())) { + shader_flag |= SHADER_EXCLUDE_SHADOW_CATCHER; } size_t mesh_num_triangles = mesh->num_triangles(); @@ -496,10 +483,10 @@ void LightManager::device_update_distribution(Device *, kfilm->pass_shadow_scale = 1.0f; if (kintegrator->pdf_triangles != 0.0f) - kfilm->pass_shadow_scale *= 0.5f; + kfilm->pass_shadow_scale /= 0.5f; if (num_background_lights < num_lights) - kfilm->pass_shadow_scale *= (float)(num_lights - num_background_lights) / (float)num_lights; + kfilm->pass_shadow_scale /= (float)(num_lights - num_background_lights) / (float)num_lights; /* CDF */ dscene->light_distribution.copy_to_device(); @@ -766,25 +753,26 @@ void LightManager::device_update_points(Device *, DeviceScene *dscene, Scene *sc if (!light->cast_shadow) shader_id &= ~SHADER_CAST_SHADOW; + if (!light->use_camera) { + shader_id |= SHADER_EXCLUDE_CAMERA; + } if (!light->use_diffuse) { shader_id |= SHADER_EXCLUDE_DIFFUSE; - use_light_visibility = true; } if (!light->use_glossy) { shader_id |= SHADER_EXCLUDE_GLOSSY; - use_light_visibility = true; } if (!light->use_transmission) { shader_id |= SHADER_EXCLUDE_TRANSMIT; - use_light_visibility = true; } if (!light->use_scatter) { shader_id |= SHADER_EXCLUDE_SCATTER; - use_light_visibility = true; + } + if (!light->is_shadow_catcher) { + shader_id |= SHADER_EXCLUDE_SHADOW_CATCHER; } klights[light_index].type = light->light_type; - klights[light_index].samples = light->samples; klights[light_index].strength[0] = light->strength.x; klights[light_index].strength[1] = light->strength.y; klights[light_index].strength[2] = light->strength.z; @@ -836,19 +824,15 @@ void LightManager::device_update_points(Device *, DeviceScene *dscene, Scene *sc if (!(visibility & PATH_RAY_DIFFUSE)) { shader_id |= SHADER_EXCLUDE_DIFFUSE; - use_light_visibility = true; } if (!(visibility & PATH_RAY_GLOSSY)) { shader_id |= SHADER_EXCLUDE_GLOSSY; - use_light_visibility = true; } if (!(visibility & PATH_RAY_TRANSMIT)) { shader_id |= SHADER_EXCLUDE_TRANSMIT; - use_light_visibility = true; } if (!(visibility & PATH_RAY_VOLUME_SCATTER)) { shader_id |= SHADER_EXCLUDE_SCATTER; - use_light_visibility = true; } } else if (light->light_type == LIGHT_AREA) { @@ -998,8 +982,6 @@ void LightManager::device_update(Device *device, device_free(device, dscene, need_update_background); - use_light_visibility = false; - device_update_points(device, dscene, scene); if (progress.get_cancel()) return; @@ -1018,8 +1000,6 @@ void LightManager::device_update(Device *device, if (progress.get_cancel()) return; - scene->film->set_use_light_visibility(use_light_visibility); - update_flags = UPDATE_NONE; need_update_background = false; } diff --git a/intern/cycles/render/light.h b/intern/cycles/render/light.h index fbd709125ff..7f86237c8b3 100644 --- a/intern/cycles/render/light.h +++ b/intern/cycles/render/light.h @@ -69,16 +69,17 @@ class Light : public Node { NODE_SOCKET_API(bool, cast_shadow) NODE_SOCKET_API(bool, use_mis) + NODE_SOCKET_API(bool, use_camera) NODE_SOCKET_API(bool, use_diffuse) NODE_SOCKET_API(bool, use_glossy) NODE_SOCKET_API(bool, use_transmission) NODE_SOCKET_API(bool, use_scatter) + NODE_SOCKET_API(bool, is_shadow_catcher) NODE_SOCKET_API(bool, is_portal) NODE_SOCKET_API(bool, is_enabled) NODE_SOCKET_API(Shader *, shader) - NODE_SOCKET_API(int, samples) NODE_SOCKET_API(int, max_bounces) NODE_SOCKET_API(uint, random_id) @@ -108,8 +109,6 @@ class LightManager { UPDATE_NONE = 0u, }; - bool use_light_visibility; - /* Need to update background (including multiple importance map) */ bool need_update_background; diff --git a/intern/cycles/render/mesh_displace.cpp b/intern/cycles/render/mesh_displace.cpp index b39d81023d9..c00c4c24211 100644 --- a/intern/cycles/render/mesh_displace.cpp +++ b/intern/cycles/render/mesh_displace.cpp @@ -16,6 +16,8 @@ #include "device/device.h" +#include "integrator/shader_eval.h" + #include "render/mesh.h" #include "render/object.h" #include "render/scene.h" @@ -43,40 +45,28 @@ static float3 compute_face_normal(const Mesh::Triangle &t, float3 *verts) return norm / normlen; } -bool GeometryManager::displace( - Device *device, DeviceScene *dscene, Scene *scene, Mesh *mesh, Progress &progress) +/* Fill in coordinates for mesh displacement shader evaluation on device. */ +static int fill_shader_input(const Scene *scene, + const Mesh *mesh, + const int object_index, + device_vector<KernelShaderEvalInput> &d_input) { - /* verify if we have a displacement shader */ - if (!mesh->has_true_displacement()) { - return false; - } - - string msg = string_printf("Computing Displacement %s", mesh->name.c_str()); - progress.set_status("Updating Mesh", msg); + int d_input_size = 0; + KernelShaderEvalInput *d_input_data = d_input.data(); - /* find object index. todo: is arbitrary */ - size_t object_index = OBJECT_NONE; + const array<int> &mesh_shaders = mesh->get_shader(); + const array<Node *> &mesh_used_shaders = mesh->get_used_shaders(); + const array<float3> &mesh_verts = mesh->get_verts(); - for (size_t i = 0; i < scene->objects.size(); i++) { - if (scene->objects[i]->get_geometry() == mesh) { - object_index = i; - break; - } - } - - /* setup input for device task */ - const size_t num_verts = mesh->verts.size(); + const int num_verts = mesh_verts.size(); vector<bool> done(num_verts, false); - device_vector<uint4> d_input(device, "displace_input", MEM_READ_ONLY); - uint4 *d_input_data = d_input.alloc(num_verts); - size_t d_input_size = 0; - size_t num_triangles = mesh->num_triangles(); - for (size_t i = 0; i < num_triangles; i++) { + int num_triangles = mesh->num_triangles(); + for (int i = 0; i < num_triangles; i++) { Mesh::Triangle t = mesh->get_triangle(i); - int shader_index = mesh->shader[i]; - Shader *shader = (shader_index < mesh->used_shaders.size()) ? - static_cast<Shader *>(mesh->used_shaders[shader_index]) : + int shader_index = mesh_shaders[i]; + Shader *shader = (shader_index < mesh_used_shaders.size()) ? + static_cast<Shader *>(mesh_used_shaders[shader_index]) : scene->default_surface; if (!shader->has_displacement || shader->get_displacement_method() == DISPLACE_BUMP) { @@ -110,57 +100,41 @@ bool GeometryManager::displace( } /* back */ - uint4 in = make_uint4(object, prim, __float_as_int(u), __float_as_int(v)); + KernelShaderEvalInput in; + in.object = object; + in.prim = prim; + in.u = u; + in.v = v; d_input_data[d_input_size++] = in; } } - if (d_input_size == 0) - return false; - - /* run device task */ - device_vector<float4> d_output(device, "displace_output", MEM_READ_WRITE); - d_output.alloc(d_input_size); - d_output.zero_to_device(); - d_input.copy_to_device(); - - /* needs to be up to data for attribute access */ - device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); - - DeviceTask task(DeviceTask::SHADER); - task.shader_input = d_input.device_pointer; - task.shader_output = d_output.device_pointer; - task.shader_eval_type = SHADER_EVAL_DISPLACE; - task.shader_x = 0; - task.shader_w = d_output.size(); - task.num_samples = 1; - task.get_cancel = function_bind(&Progress::get_cancel, &progress); - - device->task_add(task); - device->task_wait(); - - if (progress.get_cancel()) { - d_input.free(); - d_output.free(); - return false; - } + return d_input_size; +} - d_output.copy_from_device(0, 1, d_output.size()); - d_input.free(); +/* Read back mesh displacement shader output. */ +static void read_shader_output(const Scene *scene, + Mesh *mesh, + const device_vector<float4> &d_output) +{ + const array<int> &mesh_shaders = mesh->get_shader(); + const array<Node *> &mesh_used_shaders = mesh->get_used_shaders(); + array<float3> &mesh_verts = mesh->get_verts(); - /* read result */ - done.clear(); - done.resize(num_verts, false); - int k = 0; + const int num_verts = mesh_verts.size(); + const int num_motion_steps = mesh->get_motion_steps(); + vector<bool> done(num_verts, false); - float4 *offset = d_output.data(); + const float4 *d_output_data = d_output.data(); + int d_output_index = 0; Attribute *attr_mP = mesh->attributes.find(ATTR_STD_MOTION_VERTEX_POSITION); - for (size_t i = 0; i < num_triangles; i++) { + int num_triangles = mesh->num_triangles(); + for (int i = 0; i < num_triangles; i++) { Mesh::Triangle t = mesh->get_triangle(i); - int shader_index = mesh->shader[i]; - Shader *shader = (shader_index < mesh->used_shaders.size()) ? - static_cast<Shader *>(mesh->used_shaders[shader_index]) : + int shader_index = mesh_shaders[i]; + Shader *shader = (shader_index < mesh_used_shaders.size()) ? + static_cast<Shader *>(mesh_used_shaders[shader_index]) : scene->default_surface; if (!shader->has_displacement || shader->get_displacement_method() == DISPLACE_BUMP) { @@ -170,12 +144,12 @@ bool GeometryManager::displace( for (int j = 0; j < 3; j++) { if (!done[t.v[j]]) { done[t.v[j]] = true; - float3 off = float4_to_float3(offset[k++]); + float3 off = float4_to_float3(d_output_data[d_output_index++]); /* Avoid illegal vertex coordinates. */ off = ensure_finite3(off); - mesh->verts[t.v[j]] += off; + mesh_verts[t.v[j]] += off; if (attr_mP != NULL) { - for (int step = 0; step < mesh->motion_steps - 1; step++) { + for (int step = 0; step < num_motion_steps - 1; step++) { float3 *mP = attr_mP->data_float3() + step * num_verts; mP[t.v[j]] += off; } @@ -183,8 +157,47 @@ bool GeometryManager::displace( } } } +} - d_output.free(); +bool GeometryManager::displace( + Device *device, DeviceScene *dscene, Scene *scene, Mesh *mesh, Progress &progress) +{ + /* verify if we have a displacement shader */ + if (!mesh->has_true_displacement()) { + return false; + } + + const size_t num_verts = mesh->verts.size(); + const size_t num_triangles = mesh->num_triangles(); + + if (num_triangles == 0) { + return false; + } + + string msg = string_printf("Computing Displacement %s", mesh->name.c_str()); + progress.set_status("Updating Mesh", msg); + + /* find object index. todo: is arbitrary */ + size_t object_index = OBJECT_NONE; + + for (size_t i = 0; i < scene->objects.size(); i++) { + if (scene->objects[i]->get_geometry() == mesh) { + object_index = i; + break; + } + } + + /* Needs to be up to data for attribute access. */ + device->const_copy_to("__data", &dscene->data, sizeof(dscene->data)); + + /* Evaluate shader on device. */ + ShaderEval shader_eval(device, progress); + if (!shader_eval.eval(SHADER_EVAL_DISPLACE, + num_verts, + function_bind(&fill_shader_input, scene, mesh, object_index, _1), + function_bind(&read_shader_output, scene, mesh, _1))) { + return false; + } /* stitch */ unordered_set<int> stitch_keys; @@ -297,8 +310,7 @@ bool GeometryManager::displace( } /* normalize vertex normals */ - done.clear(); - done.resize(num_verts, false); + vector<bool> done(num_verts, false); for (size_t i = 0; i < num_triangles; i++) { if (tri_has_true_disp[i]) { @@ -368,8 +380,7 @@ bool GeometryManager::displace( } /* normalize vertex normals */ - done.clear(); - done.resize(num_verts, false); + vector<bool> done(num_verts, false); for (size_t i = 0; i < num_triangles; i++) { if (tri_has_true_disp[i]) { diff --git a/intern/cycles/render/nodes.cpp b/intern/cycles/render/nodes.cpp index 795166bcf4c..5303d55242e 100644 --- a/intern/cycles/render/nodes.cpp +++ b/intern/cycles/render/nodes.cpp @@ -2736,18 +2736,21 @@ NODE_DEFINE(PrincipledBsdfNode) distribution, "Distribution", distribution_enum, CLOSURE_BSDF_MICROFACET_MULTI_GGX_GLASS_ID); static NodeEnum subsurface_method_enum; - subsurface_method_enum.insert("burley", CLOSURE_BSSRDF_PRINCIPLED_ID); - subsurface_method_enum.insert("random_walk", CLOSURE_BSSRDF_PRINCIPLED_RANDOM_WALK_ID); + subsurface_method_enum.insert("random_walk_fixed_radius", + CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID); + subsurface_method_enum.insert("random_walk", CLOSURE_BSSRDF_RANDOM_WALK_ID); SOCKET_ENUM(subsurface_method, "Subsurface Method", subsurface_method_enum, - CLOSURE_BSSRDF_PRINCIPLED_ID); + CLOSURE_BSSRDF_RANDOM_WALK_ID); SOCKET_IN_COLOR(base_color, "Base Color", make_float3(0.8f, 0.8f, 0.8f)); SOCKET_IN_COLOR(subsurface_color, "Subsurface Color", make_float3(0.8f, 0.8f, 0.8f)); SOCKET_IN_FLOAT(metallic, "Metallic", 0.0f); SOCKET_IN_FLOAT(subsurface, "Subsurface", 0.0f); SOCKET_IN_VECTOR(subsurface_radius, "Subsurface Radius", make_float3(0.1f, 0.1f, 0.1f)); + SOCKET_IN_FLOAT(subsurface_ior, "Subsurface IOR", 1.4f); + SOCKET_IN_FLOAT(subsurface_anisotropy, "Subsurface Anisotropy", 0.0f); SOCKET_IN_FLOAT(specular, "Specular", 0.0f); SOCKET_IN_FLOAT(roughness, "Roughness", 0.5f); SOCKET_IN_FLOAT(specular_tint, "Specular Tint", 0.0f); @@ -2857,6 +2860,8 @@ void PrincipledBsdfNode::compile(SVMCompiler &compiler, ShaderInput *p_metallic, ShaderInput *p_subsurface, ShaderInput *p_subsurface_radius, + ShaderInput *p_subsurface_ior, + ShaderInput *p_subsurface_anisotropy, ShaderInput *p_specular, ShaderInput *p_roughness, ShaderInput *p_specular_tint, @@ -2896,6 +2901,8 @@ void PrincipledBsdfNode::compile(SVMCompiler &compiler, int transmission_roughness_offset = compiler.stack_assign(p_transmission_roughness); int anisotropic_rotation_offset = compiler.stack_assign(p_anisotropic_rotation); int subsurface_radius_offset = compiler.stack_assign(p_subsurface_radius); + int subsurface_ior_offset = compiler.stack_assign(p_subsurface_ior); + int subsurface_anisotropy_offset = compiler.stack_assign(p_subsurface_anisotropy); compiler.add_node(NODE_CLOSURE_BSDF, compiler.encode_uchar4(closure, @@ -2929,8 +2936,10 @@ void PrincipledBsdfNode::compile(SVMCompiler &compiler, __float_as_int(bc_default.y), __float_as_int(bc_default.z)); - compiler.add_node( - clearcoat_normal_offset, subsurface_radius_offset, SVM_STACK_INVALID, SVM_STACK_INVALID); + compiler.add_node(clearcoat_normal_offset, + subsurface_radius_offset, + subsurface_ior_offset, + subsurface_anisotropy_offset); float3 ss_default = get_float3(subsurface_color_in->socket_type); @@ -2953,6 +2962,8 @@ void PrincipledBsdfNode::compile(SVMCompiler &compiler) input("Metallic"), input("Subsurface"), input("Subsurface Radius"), + input("Subsurface IOR"), + input("Subsurface Anisotropy"), input("Specular"), input("Roughness"), input("Specular Tint"), @@ -3048,16 +3059,16 @@ NODE_DEFINE(SubsurfaceScatteringNode) SOCKET_IN_NORMAL(normal, "Normal", zero_float3(), SocketType::LINK_NORMAL); SOCKET_IN_FLOAT(surface_mix_weight, "SurfaceMixWeight", 0.0f, SocketType::SVM_INTERNAL); - static NodeEnum falloff_enum; - falloff_enum.insert("cubic", CLOSURE_BSSRDF_CUBIC_ID); - falloff_enum.insert("gaussian", CLOSURE_BSSRDF_GAUSSIAN_ID); - falloff_enum.insert("burley", CLOSURE_BSSRDF_BURLEY_ID); - falloff_enum.insert("random_walk", CLOSURE_BSSRDF_RANDOM_WALK_ID); - SOCKET_ENUM(falloff, "Falloff", falloff_enum, CLOSURE_BSSRDF_BURLEY_ID); + static NodeEnum method_enum; + method_enum.insert("random_walk_fixed_radius", CLOSURE_BSSRDF_RANDOM_WALK_FIXED_RADIUS_ID); + method_enum.insert("random_walk", CLOSURE_BSSRDF_RANDOM_WALK_ID); + SOCKET_ENUM(method, "Method", method_enum, CLOSURE_BSSRDF_RANDOM_WALK_ID); + SOCKET_IN_FLOAT(scale, "Scale", 0.01f); SOCKET_IN_VECTOR(radius, "Radius", make_float3(0.1f, 0.1f, 0.1f)); - SOCKET_IN_FLOAT(sharpness, "Sharpness", 0.0f); - SOCKET_IN_FLOAT(texture_blur, "Texture Blur", 1.0f); + + SOCKET_IN_FLOAT(subsurface_ior, "IOR", 1.4f); + SOCKET_IN_FLOAT(subsurface_anisotropy, "Anisotropy", 0.0f); SOCKET_OUT_CLOSURE(BSSRDF, "BSSRDF"); @@ -3066,20 +3077,19 @@ NODE_DEFINE(SubsurfaceScatteringNode) SubsurfaceScatteringNode::SubsurfaceScatteringNode() : BsdfNode(get_node_type()) { - closure = falloff; + closure = method; } void SubsurfaceScatteringNode::compile(SVMCompiler &compiler) { - closure = falloff; - BsdfNode::compile( - compiler, input("Scale"), input("Texture Blur"), input("Radius"), input("Sharpness")); + closure = method; + BsdfNode::compile(compiler, input("Scale"), input("IOR"), input("Radius"), input("Anisotropy")); } void SubsurfaceScatteringNode::compile(OSLCompiler &compiler) { - closure = falloff; - compiler.parameter(this, "falloff"); + closure = method; + compiler.parameter(this, "method"); compiler.add(this, "node_subsurface_scattering"); } @@ -3786,20 +3796,6 @@ void GeometryNode::compile(OSLCompiler &compiler) compiler.add(this, "node_geometry"); } -int GeometryNode::get_group() -{ - ShaderOutput *out; - int result = ShaderNode::get_group(); - - /* Backfacing uses NODE_LIGHT_PATH */ - out = output("Backfacing"); - if (!out->links.empty()) { - result = max(result, NODE_GROUP_LEVEL_1); - } - - return result; -} - /* TextureCoordinate */ NODE_DEFINE(TextureCoordinateNode) @@ -5926,33 +5922,33 @@ NODE_DEFINE(OutputAOVNode) OutputAOVNode::OutputAOVNode() : ShaderNode(get_node_type()) { special_type = SHADER_SPECIAL_TYPE_OUTPUT_AOV; - slot = -1; + offset = -1; } void OutputAOVNode::simplify_settings(Scene *scene) { - slot = scene->film->get_aov_offset(scene, name.string(), is_color); - if (slot == -1) { - slot = scene->film->get_aov_offset(scene, name.string(), is_color); + offset = scene->film->get_aov_offset(scene, name.string(), is_color); + if (offset == -1) { + offset = scene->film->get_aov_offset(scene, name.string(), is_color); } - if (slot == -1 || is_color) { + if (offset == -1 || is_color) { input("Value")->disconnect(); } - if (slot == -1 || !is_color) { + if (offset == -1 || !is_color) { input("Color")->disconnect(); } } void OutputAOVNode::compile(SVMCompiler &compiler) { - assert(slot >= 0); + assert(offset >= 0); if (is_color) { - compiler.add_node(NODE_AOV_COLOR, compiler.stack_assign(input("Color")), slot); + compiler.add_node(NODE_AOV_COLOR, compiler.stack_assign(input("Color")), offset); } else { - compiler.add_node(NODE_AOV_VALUE, compiler.stack_assign(input("Value")), slot); + compiler.add_node(NODE_AOV_VALUE, compiler.stack_assign(input("Value")), offset); } } diff --git a/intern/cycles/render/nodes.h b/intern/cycles/render/nodes.h index 3013e9b1866..22bdb06b059 100644 --- a/intern/cycles/render/nodes.h +++ b/intern/cycles/render/nodes.h @@ -143,10 +143,6 @@ class EnvironmentTextureNode : public ImageSlotTextureNode { { return true; } - virtual int get_group() - { - return NODE_GROUP_LEVEL_2; - } virtual bool equals(const ShaderNode &other) { @@ -170,11 +166,6 @@ class SkyTextureNode : public TextureNode { public: SHADER_NODE_CLASS(SkyTextureNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_2; - } - NODE_SOCKET_API(NodeSkyType, sky_type) NODE_SOCKET_API(float3, sun_direction) NODE_SOCKET_API(float, turbidity) @@ -224,18 +215,13 @@ class OutputAOVNode : public ShaderNode { NODE_SOCKET_API(ustring, name) - virtual int get_group() - { - return NODE_GROUP_LEVEL_4; - } - /* Don't allow output node de-duplication. */ virtual bool equals(const ShaderNode & /*other*/) { return false; } - int slot; + int offset; bool is_color; }; @@ -243,11 +229,6 @@ class GradientTextureNode : public TextureNode { public: SHADER_NODE_CLASS(GradientTextureNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_2; - } - NODE_SOCKET_API(NodeGradientType, gradient_type) NODE_SOCKET_API(float3, vector) }; @@ -269,19 +250,14 @@ class VoronoiTextureNode : public TextureNode { public: SHADER_NODE_CLASS(VoronoiTextureNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_2; - } - virtual int get_feature() { int result = ShaderNode::get_feature(); if (dimensions == 4) { - result |= NODE_FEATURE_VORONOI_EXTRA; + result |= KERNEL_FEATURE_NODE_VORONOI_EXTRA; } else if (dimensions >= 2 && feature == NODE_VORONOI_SMOOTH_F1) { - result |= NODE_FEATURE_VORONOI_EXTRA; + result |= KERNEL_FEATURE_NODE_VORONOI_EXTRA; } return result; } @@ -301,11 +277,6 @@ class MusgraveTextureNode : public TextureNode { public: SHADER_NODE_CLASS(MusgraveTextureNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_2; - } - NODE_SOCKET_API(int, dimensions) NODE_SOCKET_API(NodeMusgraveType, musgrave_type) NODE_SOCKET_API(float, w) @@ -322,11 +293,6 @@ class WaveTextureNode : public TextureNode { public: SHADER_NODE_CLASS(WaveTextureNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_2; - } - NODE_SOCKET_API(NodeWaveType, wave_type) NODE_SOCKET_API(NodeWaveBandsDirection, bands_direction) NODE_SOCKET_API(NodeWaveRingsDirection, rings_direction) @@ -345,11 +311,6 @@ class MagicTextureNode : public TextureNode { public: SHADER_NODE_CLASS(MagicTextureNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_2; - } - NODE_SOCKET_API(int, depth) NODE_SOCKET_API(float3, vector) NODE_SOCKET_API(float, scale) @@ -364,11 +325,6 @@ class CheckerTextureNode : public TextureNode { NODE_SOCKET_API(float3, color1) NODE_SOCKET_API(float3, color2) NODE_SOCKET_API(float, scale) - - virtual int get_group() - { - return NODE_GROUP_LEVEL_2; - } }; class BrickTextureNode : public TextureNode { @@ -390,20 +346,11 @@ class BrickTextureNode : public TextureNode { NODE_SOCKET_API(float, brick_width) NODE_SOCKET_API(float, row_height) NODE_SOCKET_API(float3, vector) - - virtual int get_group() - { - return NODE_GROUP_LEVEL_2; - } }; class PointDensityTextureNode : public ShaderNode { public: SHADER_NODE_NO_CLONE_CLASS(PointDensityTextureNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_4; - } ~PointDensityTextureNode(); ShaderNode *clone(ShaderGraph *graph) const; @@ -443,10 +390,6 @@ class IESLightNode : public TextureNode { ~IESLightNode(); ShaderNode *clone(ShaderGraph *graph) const; - virtual int get_group() - { - return NODE_GROUP_LEVEL_2; - } NODE_SOCKET_API(ustring, filename) NODE_SOCKET_API(ustring, ies) @@ -464,10 +407,6 @@ class IESLightNode : public TextureNode { class WhiteNoiseTextureNode : public ShaderNode { public: SHADER_NODE_CLASS(WhiteNoiseTextureNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_2; - } NODE_SOCKET_API(int, dimensions) NODE_SOCKET_API(float3, vector) @@ -477,10 +416,6 @@ class WhiteNoiseTextureNode : public ShaderNode { class MappingNode : public ShaderNode { public: SHADER_NODE_CLASS(MappingNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_2; - } void constant_fold(const ConstantFolder &folder); NODE_SOCKET_API(float3, vector) @@ -546,6 +481,11 @@ class BsdfBaseNode : public ShaderNode { return false; } + virtual int get_feature() + { + return ShaderNode::get_feature() | KERNEL_FEATURE_NODE_BSDF; + } + protected: ClosureType closure; }; @@ -606,6 +546,8 @@ class PrincipledBsdfNode : public BsdfBaseNode { ShaderInput *metallic, ShaderInput *subsurface, ShaderInput *subsurface_radius, + ShaderInput *subsurface_ior, + ShaderInput *subsurface_anisotropy, ShaderInput *specular, ShaderInput *roughness, ShaderInput *specular_tint, @@ -622,6 +564,8 @@ class PrincipledBsdfNode : public BsdfBaseNode { NODE_SOCKET_API(float3, base_color) NODE_SOCKET_API(float3, subsurface_color) NODE_SOCKET_API(float3, subsurface_radius) + NODE_SOCKET_API(float, subsurface_ior) + NODE_SOCKET_API(float, subsurface_anisotropy) NODE_SOCKET_API(float, metallic) NODE_SOCKET_API(float, subsurface) NODE_SOCKET_API(float, specular) @@ -758,14 +702,14 @@ class SubsurfaceScatteringNode : public BsdfNode { bool has_bssrdf_bump(); ClosureType get_closure_type() { - return falloff; + return method; } NODE_SOCKET_API(float, scale) NODE_SOCKET_API(float3, radius) - NODE_SOCKET_API(float, sharpness) - NODE_SOCKET_API(float, texture_blur) - NODE_SOCKET_API(ClosureType, falloff) + NODE_SOCKET_API(float, subsurface_ior) + NODE_SOCKET_API(float, subsurface_anisotropy) + NODE_SOCKET_API(ClosureType, method) }; class EmissionNode : public ShaderNode { @@ -782,6 +726,11 @@ class EmissionNode : public ShaderNode { return true; } + virtual int get_feature() + { + return ShaderNode::get_feature() | KERNEL_FEATURE_NODE_EMISSION; + } + NODE_SOCKET_API(float3, color) NODE_SOCKET_API(float, strength) NODE_SOCKET_API(float, surface_mix_weight) @@ -792,6 +741,11 @@ class BackgroundNode : public ShaderNode { SHADER_NODE_CLASS(BackgroundNode) void constant_fold(const ConstantFolder &folder); + virtual int get_feature() + { + return ShaderNode::get_feature() | KERNEL_FEATURE_NODE_EMISSION; + } + NODE_SOCKET_API(float3, color) NODE_SOCKET_API(float, strength) NODE_SOCKET_API(float, surface_mix_weight) @@ -800,10 +754,6 @@ class BackgroundNode : public ShaderNode { class HoldoutNode : public ShaderNode { public: SHADER_NODE_CLASS(HoldoutNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_1; - } virtual ClosureType get_closure_type() { return CLOSURE_HOLDOUT_ID; @@ -821,13 +771,9 @@ class AmbientOcclusionNode : public ShaderNode { { return true; } - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } - virtual bool has_raytrace() + virtual int get_feature() { - return true; + return KERNEL_FEATURE_NODE_RAYTRACE; } NODE_SOCKET_API(float3, color) @@ -845,13 +791,9 @@ class VolumeNode : public ShaderNode { SHADER_NODE_BASE_CLASS(VolumeNode) void compile(SVMCompiler &compiler, ShaderInput *param1, ShaderInput *param2); - virtual int get_group() - { - return NODE_GROUP_LEVEL_1; - } virtual int get_feature() { - return ShaderNode::get_feature() | NODE_FEATURE_VOLUME; + return ShaderNode::get_feature() | KERNEL_FEATURE_NODE_VOLUME; } virtual ClosureType get_closure_type() { @@ -1013,10 +955,6 @@ class UVMapNode : public ShaderNode { { return true; } - virtual int get_group() - { - return NODE_GROUP_LEVEL_1; - } NODE_SOCKET_API(ustring, attribute) NODE_SOCKET_API(bool, from_dupli) @@ -1025,10 +963,6 @@ class UVMapNode : public ShaderNode { class LightPathNode : public ShaderNode { public: SHADER_NODE_CLASS(LightPathNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_1; - } }; class LightFalloffNode : public ShaderNode { @@ -1038,10 +972,6 @@ class LightFalloffNode : public ShaderNode { { return true; } - virtual int get_group() - { - return NODE_GROUP_LEVEL_2; - } NODE_SOCKET_API(float, strength) NODE_SOCKET_API(float, smooth) @@ -1050,10 +980,6 @@ class LightFalloffNode : public ShaderNode { class ObjectInfoNode : public ShaderNode { public: SHADER_NODE_CLASS(ObjectInfoNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_1; - } }; class ParticleInfoNode : public ShaderNode { @@ -1064,10 +990,6 @@ class ParticleInfoNode : public ShaderNode { { return true; } - virtual int get_group() - { - return NODE_GROUP_LEVEL_1; - } }; class HairInfoNode : public ShaderNode { @@ -1083,13 +1005,9 @@ class HairInfoNode : public ShaderNode { { return true; } - virtual int get_group() - { - return NODE_GROUP_LEVEL_1; - } virtual int get_feature() { - return ShaderNode::get_feature() | NODE_FEATURE_HAIR; + return ShaderNode::get_feature() | KERNEL_FEATURE_NODE_HAIR; } }; @@ -1168,10 +1086,6 @@ class InvertNode : public ShaderNode { public: SHADER_NODE_CLASS(InvertNode) void constant_fold(const ConstantFolder &folder); - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } NODE_SOCKET_API(float, fac) NODE_SOCKET_API(float3, color) @@ -1182,11 +1096,6 @@ class MixNode : public ShaderNode { SHADER_NODE_CLASS(MixNode) void constant_fold(const ConstantFolder &folder); - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } - NODE_SOCKET_API(NodeMix, mix_type) NODE_SOCKET_API(bool, use_clamp) NODE_SOCKET_API(float3, color1) @@ -1198,10 +1107,6 @@ class CombineRGBNode : public ShaderNode { public: SHADER_NODE_CLASS(CombineRGBNode) void constant_fold(const ConstantFolder &folder); - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } NODE_SOCKET_API(float, r) NODE_SOCKET_API(float, g) @@ -1212,10 +1117,6 @@ class CombineHSVNode : public ShaderNode { public: SHADER_NODE_CLASS(CombineHSVNode) void constant_fold(const ConstantFolder &folder); - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } NODE_SOCKET_API(float, h) NODE_SOCKET_API(float, s) @@ -1226,10 +1127,6 @@ class CombineXYZNode : public ShaderNode { public: SHADER_NODE_CLASS(CombineXYZNode) void constant_fold(const ConstantFolder &folder); - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } NODE_SOCKET_API(float, x) NODE_SOCKET_API(float, y) @@ -1240,10 +1137,6 @@ class GammaNode : public ShaderNode { public: SHADER_NODE_CLASS(GammaNode) void constant_fold(const ConstantFolder &folder); - virtual int get_group() - { - return NODE_GROUP_LEVEL_1; - } NODE_SOCKET_API(float3, color) NODE_SOCKET_API(float, gamma) @@ -1253,10 +1146,6 @@ class BrightContrastNode : public ShaderNode { public: SHADER_NODE_CLASS(BrightContrastNode) void constant_fold(const ConstantFolder &folder); - virtual int get_group() - { - return NODE_GROUP_LEVEL_1; - } NODE_SOCKET_API(float3, color) NODE_SOCKET_API(float, bright) @@ -1267,10 +1156,6 @@ class SeparateRGBNode : public ShaderNode { public: SHADER_NODE_CLASS(SeparateRGBNode) void constant_fold(const ConstantFolder &folder); - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } NODE_SOCKET_API(float3, color) }; @@ -1279,10 +1164,6 @@ class SeparateHSVNode : public ShaderNode { public: SHADER_NODE_CLASS(SeparateHSVNode) void constant_fold(const ConstantFolder &folder); - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } NODE_SOCKET_API(float3, color) }; @@ -1291,10 +1172,6 @@ class SeparateXYZNode : public ShaderNode { public: SHADER_NODE_CLASS(SeparateXYZNode) void constant_fold(const ConstantFolder &folder); - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } NODE_SOCKET_API(float3, vector) }; @@ -1333,10 +1210,6 @@ class CameraNode : public ShaderNode { { return true; } - virtual int get_group() - { - return NODE_GROUP_LEVEL_2; - } }; class FresnelNode : public ShaderNode { @@ -1346,10 +1219,6 @@ class FresnelNode : public ShaderNode { { return true; } - virtual int get_group() - { - return NODE_GROUP_LEVEL_1; - } NODE_SOCKET_API(float3, normal) NODE_SOCKET_API(float, IOR) @@ -1362,10 +1231,6 @@ class LayerWeightNode : public ShaderNode { { return true; } - virtual int get_group() - { - return NODE_GROUP_LEVEL_1; - } NODE_SOCKET_API(float3, normal) NODE_SOCKET_API(float, blend) @@ -1378,10 +1243,6 @@ class WireframeNode : public ShaderNode { { return true; } - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } NODE_SOCKET_API(float, size) NODE_SOCKET_API(bool, use_pixel_size) @@ -1390,10 +1251,6 @@ class WireframeNode : public ShaderNode { class WavelengthNode : public ShaderNode { public: SHADER_NODE_CLASS(WavelengthNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } NODE_SOCKET_API(float, wavelength) }; @@ -1402,10 +1259,6 @@ class BlackbodyNode : public ShaderNode { public: SHADER_NODE_CLASS(BlackbodyNode) void constant_fold(const ConstantFolder &folder); - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } NODE_SOCKET_API(float, temperature) }; @@ -1413,10 +1266,6 @@ class BlackbodyNode : public ShaderNode { class MapRangeNode : public ShaderNode { public: SHADER_NODE_CLASS(MapRangeNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } void expand(ShaderGraph *graph); NODE_SOCKET_API(float, value) @@ -1433,10 +1282,6 @@ class ClampNode : public ShaderNode { public: SHADER_NODE_CLASS(ClampNode) void constant_fold(const ConstantFolder &folder); - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } NODE_SOCKET_API(float, value) NODE_SOCKET_API(float, min) NODE_SOCKET_API(float, max) @@ -1446,10 +1291,6 @@ class ClampNode : public ShaderNode { class MathNode : public ShaderNode { public: SHADER_NODE_CLASS(MathNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_1; - } void expand(ShaderGraph *graph); void constant_fold(const ConstantFolder &folder); @@ -1463,10 +1304,6 @@ class MathNode : public ShaderNode { class NormalNode : public ShaderNode { public: SHADER_NODE_CLASS(NormalNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_2; - } NODE_SOCKET_API(float3, direction) NODE_SOCKET_API(float3, normal) @@ -1475,10 +1312,6 @@ class NormalNode : public ShaderNode { class VectorMathNode : public ShaderNode { public: SHADER_NODE_CLASS(VectorMathNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_1; - } void constant_fold(const ConstantFolder &folder); NODE_SOCKET_API(float3, vector1) @@ -1492,10 +1325,6 @@ class VectorRotateNode : public ShaderNode { public: SHADER_NODE_CLASS(VectorRotateNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } NODE_SOCKET_API(NodeVectorRotateType, rotate_type) NODE_SOCKET_API(bool, invert) NODE_SOCKET_API(float3, vector) @@ -1509,11 +1338,6 @@ class VectorTransformNode : public ShaderNode { public: SHADER_NODE_CLASS(VectorTransformNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } - NODE_SOCKET_API(NodeVectorTransformType, transform_type) NODE_SOCKET_API(NodeVectorTransformConvertSpace, convert_from) NODE_SOCKET_API(NodeVectorTransformConvertSpace, convert_to) @@ -1530,7 +1354,7 @@ class BumpNode : public ShaderNode { } virtual int get_feature() { - return NODE_FEATURE_BUMP; + return KERNEL_FEATURE_NODE_BUMP; } NODE_SOCKET_API(bool, invert) @@ -1549,11 +1373,6 @@ class CurvesNode : public ShaderNode { explicit CurvesNode(const NodeType *node_type); SHADER_NODE_BASE_CLASS(CurvesNode) - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } - NODE_SOCKET_API_ARRAY(array<float3>, curves) NODE_SOCKET_API(float, min_x) NODE_SOCKET_API(float, max_x) @@ -1583,10 +1402,6 @@ class RGBRampNode : public ShaderNode { public: SHADER_NODE_CLASS(RGBRampNode) void constant_fold(const ConstantFolder &folder); - virtual int get_group() - { - return NODE_GROUP_LEVEL_1; - } NODE_SOCKET_API_ARRAY(array<float3>, ramp) NODE_SOCKET_API_ARRAY(array<float>, ramp_alpha) @@ -1656,10 +1471,6 @@ class NormalMapNode : public ShaderNode { { return true; } - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } NODE_SOCKET_API(NodeNormalMapSpace, space) NODE_SOCKET_API(ustring, attribute) @@ -1680,10 +1491,6 @@ class TangentNode : public ShaderNode { { return true; } - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } NODE_SOCKET_API(NodeTangentDirectionType, direction_type) NODE_SOCKET_API(NodeTangentAxis, axis) @@ -1698,13 +1505,9 @@ class BevelNode : public ShaderNode { { return true; } - virtual int get_group() - { - return NODE_GROUP_LEVEL_3; - } - virtual bool has_raytrace() + virtual int get_feature() { - return true; + return KERNEL_FEATURE_NODE_RAYTRACE; } NODE_SOCKET_API(float, radius) @@ -1718,7 +1521,7 @@ class DisplacementNode : public ShaderNode { void constant_fold(const ConstantFolder &folder); virtual int get_feature() { - return NODE_FEATURE_BUMP; + return KERNEL_FEATURE_NODE_BUMP; } NODE_SOCKET_API(NodeNormalMapSpace, space) @@ -1739,7 +1542,7 @@ class VectorDisplacementNode : public ShaderNode { void constant_fold(const ConstantFolder &folder); virtual int get_feature() { - return NODE_FEATURE_BUMP; + return KERNEL_FEATURE_NODE_BUMP; } NODE_SOCKET_API(NodeNormalMapSpace, space) diff --git a/intern/cycles/render/object.cpp b/intern/cycles/render/object.cpp index c88d94fe4c2..4637f8fe989 100644 --- a/intern/cycles/render/object.cpp +++ b/intern/cycles/render/object.cpp @@ -216,6 +216,10 @@ void Object::tag_update(Scene *scene) if (use_holdout_is_modified()) { flag |= ObjectManager::HOLDOUT_MODIFIED; } + + if (is_shadow_catcher_is_modified()) { + scene->tag_shadow_catcher_modified(); + } } if (geometry) { @@ -273,14 +277,7 @@ bool Object::is_traceable() const uint Object::visibility_for_tracing() const { - uint trace_visibility = visibility; - if (is_shadow_catcher) { - trace_visibility &= ~PATH_RAY_SHADOW_NON_CATCHER; - } - else { - trace_visibility &= ~PATH_RAY_SHADOW_CATCHER; - } - return trace_visibility; + return SHADOW_CATCHER_OBJECT_VISIBILITY(is_shadow_catcher, visibility & PATH_RAY_ALL_VISIBILITY); } float Object::compute_volume_step_size() const @@ -680,7 +677,7 @@ void ObjectManager::device_update(Device *device, /* prepare for static BVH building */ /* todo: do before to support getting object level coords? */ - if (scene->params.bvh_type == SceneParams::BVH_STATIC) { + if (scene->params.bvh_type == BVH_TYPE_STATIC) { scoped_callback_timer timer([scene](double time) { if (scene->update_stats) { scene->update_stats->object.times.add_entry( @@ -932,6 +929,11 @@ void ObjectManager::tag_update(Scene *scene, uint32_t flag) } scene->light_manager->tag_update(scene, LightManager::OBJECT_MANAGER); + + /* Integrator's shadow catcher settings depends on object visibility settings. */ + if (flag & (OBJECT_ADDED | OBJECT_REMOVED | OBJECT_MODIFIED)) { + scene->integrator->tag_update(scene, Integrator::OBJECT_MANAGER); + } } bool ObjectManager::need_update() const diff --git a/intern/cycles/render/osl.cpp b/intern/cycles/render/osl.cpp index 7dc79f48145..d28b222c10e 100644 --- a/intern/cycles/render/osl.cpp +++ b/intern/cycles/render/osl.cpp @@ -113,7 +113,7 @@ void OSLShaderManager::device_update_specific(Device *device, scene->image_manager->set_osl_texture_system((void *)ts); /* create shaders */ - OSLGlobals *og = (OSLGlobals *)device->osl_memory(); + OSLGlobals *og = (OSLGlobals *)device->get_cpu_osl_memory(); Shader *background_shader = scene->background->get_shader(scene); foreach (Shader *shader, scene->shaders) { @@ -174,7 +174,7 @@ void OSLShaderManager::device_update_specific(Device *device, void OSLShaderManager::device_free(Device *device, DeviceScene *dscene, Scene *scene) { - OSLGlobals *og = (OSLGlobals *)device->osl_memory(); + OSLGlobals *og = (OSLGlobals *)device->get_cpu_osl_memory(); device_free_common(device, dscene, scene); @@ -257,25 +257,36 @@ void OSLShaderManager::shading_system_init() /* our own ray types */ static const char *raytypes[] = { - "camera", /* PATH_RAY_CAMERA */ - "reflection", /* PATH_RAY_REFLECT */ - "refraction", /* PATH_RAY_TRANSMIT */ - "diffuse", /* PATH_RAY_DIFFUSE */ - "glossy", /* PATH_RAY_GLOSSY */ - "singular", /* PATH_RAY_SINGULAR */ - "transparent", /* PATH_RAY_TRANSPARENT */ - - "shadow", /* PATH_RAY_SHADOW_OPAQUE_NON_CATCHER */ - "shadow", /* PATH_RAY_SHADOW_OPAQUE_CATCHER */ - "shadow", /* PATH_RAY_SHADOW_TRANSPARENT_NON_CATCHER */ - "shadow", /* PATH_RAY_SHADOW_TRANSPARENT_CATCHER */ - - "__unused__", "volume_scatter", /* PATH_RAY_VOLUME_SCATTER */ - "__unused__", - - "__unused__", "diffuse_ancestor", /* PATH_RAY_DIFFUSE_ANCESTOR */ - "__unused__", "__unused__", "__unused__", "__unused__", - "__unused__", "__unused__", "__unused__", + "camera", /* PATH_RAY_CAMERA */ + "reflection", /* PATH_RAY_REFLECT */ + "refraction", /* PATH_RAY_TRANSMIT */ + "diffuse", /* PATH_RAY_DIFFUSE */ + "glossy", /* PATH_RAY_GLOSSY */ + "singular", /* PATH_RAY_SINGULAR */ + "transparent", /* PATH_RAY_TRANSPARENT */ + "volume_scatter", /* PATH_RAY_VOLUME_SCATTER */ + + "shadow", /* PATH_RAY_SHADOW_OPAQUE */ + "shadow", /* PATH_RAY_SHADOW_TRANSPARENT */ + + "__unused__", /* PATH_RAY_NODE_UNALIGNED */ + "__unused__", /* PATH_RAY_MIS_SKIP */ + + "diffuse_ancestor", /* PATH_RAY_DIFFUSE_ANCESTOR */ + + "__unused__", /* PATH_RAY_SINGLE_PASS_DONE */ + "__unused__", /* PATH_RAY_TRANSPARENT_BACKGROUND */ + "__unused__", /* PATH_RAY_TERMINATE_IMMEDIATE */ + "__unused__", /* PATH_RAY_TERMINATE_AFTER_TRANSPARENT */ + "__unused__", /* PATH_RAY_EMISSION */ + "__unused__", /* PATH_RAY_SUBSURFACE */ + "__unused__", /* PATH_RAY_DENOISING_FEATURES */ + "__unused__", /* PATH_RAY_REFLECT_PASS */ + "__unused__", /* PATH_RAY_TRANSMISSION_PASS */ + "__unused__", /* PATH_RAY_VOLUME_PASS */ + "__unused__", /* PATH_RAY_SHADOW_FOR_LIGHT */ + "__unused__", /* PATH_RAY_SHADOW_CATCHER_HIT */ + "__unused__", /* PATH_RAY_SHADOW_CATCHER_PASS */ }; const int nraytypes = sizeof(raytypes) / sizeof(raytypes[0]); @@ -758,7 +769,8 @@ void OSLCompiler::add(ShaderNode *node, const char *name, bool isfilepath) current_shader->has_surface_bssrdf = true; current_shader->has_bssrdf_bump = true; /* can't detect yet */ } - current_shader->has_bump = true; /* can't detect yet */ + current_shader->has_bump = true; /* can't detect yet */ + current_shader->has_surface_raytrace = true; /* can't detect yet */ } if (node->has_spatial_varying()) { @@ -1054,6 +1066,8 @@ void OSLCompiler::generate_nodes(const ShaderNodeSet &nodes) current_shader->has_surface_emission = true; if (node->has_surface_transparent()) current_shader->has_surface_transparent = true; + if (node->get_feature() & KERNEL_FEATURE_NODE_RAYTRACE) + current_shader->has_surface_raytrace = true; if (node->has_spatial_varying()) current_shader->has_surface_spatial_varying = true; if (node->has_surface_bssrdf()) { diff --git a/intern/cycles/render/pass.cpp b/intern/cycles/render/pass.cpp new file mode 100644 index 00000000000..27ad7c0db97 --- /dev/null +++ b/intern/cycles/render/pass.cpp @@ -0,0 +1,427 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "render/pass.h" + +#include "util/util_algorithm.h" +#include "util/util_logging.h" + +CCL_NAMESPACE_BEGIN + +const char *pass_type_as_string(const PassType type) +{ + const int type_int = static_cast<int>(type); + + const NodeEnum *type_enum = Pass::get_type_enum(); + + if (!type_enum->exists(type_int)) { + LOG(DFATAL) << "Unhandled pass type " << static_cast<int>(type) << ", not supposed to happen."; + return "UNKNOWN"; + } + + return (*type_enum)[type_int].c_str(); +} + +const char *pass_mode_as_string(PassMode mode) +{ + switch (mode) { + case PassMode::NOISY: + return "NOISY"; + case PassMode::DENOISED: + return "DENOISED"; + } + + LOG(DFATAL) << "Unhandled pass mode " << static_cast<int>(mode) << ", should never happen."; + return "UNKNOWN"; +} + +std::ostream &operator<<(std::ostream &os, PassMode mode) +{ + os << pass_mode_as_string(mode); + return os; +} + +const NodeEnum *Pass::get_type_enum() +{ + static NodeEnum pass_type_enum; + + if (pass_type_enum.empty()) { + + /* Light Passes. */ + pass_type_enum.insert("combined", PASS_COMBINED); + pass_type_enum.insert("emission", PASS_EMISSION); + pass_type_enum.insert("background", PASS_BACKGROUND); + pass_type_enum.insert("ao", PASS_AO); + pass_type_enum.insert("shadow", PASS_SHADOW); + pass_type_enum.insert("diffuse", PASS_DIFFUSE); + pass_type_enum.insert("diffuse_direct", PASS_DIFFUSE_DIRECT); + pass_type_enum.insert("diffuse_indirect", PASS_DIFFUSE_INDIRECT); + pass_type_enum.insert("glossy", PASS_GLOSSY); + pass_type_enum.insert("glossy_direct", PASS_GLOSSY_DIRECT); + pass_type_enum.insert("glossy_indirect", PASS_GLOSSY_INDIRECT); + pass_type_enum.insert("transmission", PASS_TRANSMISSION); + pass_type_enum.insert("transmission_direct", PASS_TRANSMISSION_DIRECT); + pass_type_enum.insert("transmission_indirect", PASS_TRANSMISSION_INDIRECT); + pass_type_enum.insert("volume", PASS_VOLUME); + pass_type_enum.insert("volume_direct", PASS_VOLUME_DIRECT); + pass_type_enum.insert("volume_indirect", PASS_VOLUME_INDIRECT); + + /* Data passes. */ + pass_type_enum.insert("depth", PASS_DEPTH); + pass_type_enum.insert("position", PASS_POSITION); + pass_type_enum.insert("normal", PASS_NORMAL); + pass_type_enum.insert("roughness", PASS_ROUGHNESS); + pass_type_enum.insert("uv", PASS_UV); + pass_type_enum.insert("object_id", PASS_OBJECT_ID); + pass_type_enum.insert("material_id", PASS_MATERIAL_ID); + pass_type_enum.insert("motion", PASS_MOTION); + pass_type_enum.insert("motion_weight", PASS_MOTION_WEIGHT); + pass_type_enum.insert("render_time", PASS_RENDER_TIME); + pass_type_enum.insert("cryptomatte", PASS_CRYPTOMATTE); + pass_type_enum.insert("aov_color", PASS_AOV_COLOR); + pass_type_enum.insert("aov_value", PASS_AOV_VALUE); + pass_type_enum.insert("adaptive_aux_buffer", PASS_ADAPTIVE_AUX_BUFFER); + pass_type_enum.insert("sample_count", PASS_SAMPLE_COUNT); + pass_type_enum.insert("diffuse_color", PASS_DIFFUSE_COLOR); + pass_type_enum.insert("glossy_color", PASS_GLOSSY_COLOR); + pass_type_enum.insert("transmission_color", PASS_TRANSMISSION_COLOR); + pass_type_enum.insert("mist", PASS_MIST); + pass_type_enum.insert("denoising_normal", PASS_DENOISING_NORMAL); + pass_type_enum.insert("denoising_albedo", PASS_DENOISING_ALBEDO); + + pass_type_enum.insert("shadow_catcher", PASS_SHADOW_CATCHER); + pass_type_enum.insert("shadow_catcher_sample_count", PASS_SHADOW_CATCHER_SAMPLE_COUNT); + pass_type_enum.insert("shadow_catcher_matte", PASS_SHADOW_CATCHER_MATTE); + + pass_type_enum.insert("bake_primitive", PASS_BAKE_PRIMITIVE); + pass_type_enum.insert("bake_differential", PASS_BAKE_DIFFERENTIAL); + } + + return &pass_type_enum; +} + +const NodeEnum *Pass::get_mode_enum() +{ + static NodeEnum pass_mode_enum; + + if (pass_mode_enum.empty()) { + pass_mode_enum.insert("noisy", static_cast<int>(PassMode::NOISY)); + pass_mode_enum.insert("denoised", static_cast<int>(PassMode::DENOISED)); + } + + return &pass_mode_enum; +} + +NODE_DEFINE(Pass) +{ + NodeType *type = NodeType::add("pass", create); + + const NodeEnum *pass_type_enum = get_type_enum(); + const NodeEnum *pass_mode_enum = get_mode_enum(); + + SOCKET_ENUM(type, "Type", *pass_type_enum, PASS_COMBINED); + SOCKET_ENUM(mode, "Mode", *pass_mode_enum, static_cast<int>(PassMode::DENOISED)); + SOCKET_STRING(name, "Name", ustring()); + SOCKET_BOOLEAN(include_albedo, "Include Albedo", false); + + return type; +} + +Pass::Pass() : Node(get_node_type()), is_auto_(false) +{ +} + +PassInfo Pass::get_info() const +{ + return get_info(type, include_albedo); +} + +bool Pass::is_written() const +{ + return get_info().is_written; +} + +PassInfo Pass::get_info(const PassType type, const bool include_albedo) +{ + PassInfo pass_info; + + pass_info.use_filter = true; + pass_info.use_exposure = false; + pass_info.divide_type = PASS_NONE; + pass_info.use_compositing = false; + pass_info.use_denoising_albedo = true; + + switch (type) { + case PASS_NONE: + pass_info.num_components = 0; + break; + case PASS_COMBINED: + pass_info.num_components = 4; + pass_info.use_exposure = true; + pass_info.support_denoise = true; + break; + case PASS_DEPTH: + pass_info.num_components = 1; + pass_info.use_filter = false; + break; + case PASS_MIST: + pass_info.num_components = 1; + break; + case PASS_POSITION: + pass_info.num_components = 3; + break; + case PASS_NORMAL: + pass_info.num_components = 3; + break; + case PASS_ROUGHNESS: + pass_info.num_components = 1; + break; + case PASS_UV: + pass_info.num_components = 3; + break; + case PASS_MOTION: + pass_info.num_components = 4; + pass_info.divide_type = PASS_MOTION_WEIGHT; + break; + case PASS_MOTION_WEIGHT: + pass_info.num_components = 1; + break; + case PASS_OBJECT_ID: + case PASS_MATERIAL_ID: + pass_info.num_components = 1; + pass_info.use_filter = false; + break; + + case PASS_EMISSION: + case PASS_BACKGROUND: + pass_info.num_components = 3; + pass_info.use_exposure = true; + break; + case PASS_AO: + pass_info.num_components = 3; + break; + case PASS_SHADOW: + pass_info.num_components = 3; + pass_info.use_exposure = false; + break; + case PASS_RENDER_TIME: + /* This pass is handled entirely on the host side. */ + pass_info.num_components = 0; + break; + + case PASS_DIFFUSE_COLOR: + case PASS_GLOSSY_COLOR: + case PASS_TRANSMISSION_COLOR: + pass_info.num_components = 3; + break; + case PASS_DIFFUSE: + pass_info.num_components = 3; + pass_info.use_exposure = true; + pass_info.direct_type = PASS_DIFFUSE_DIRECT; + pass_info.indirect_type = PASS_DIFFUSE_INDIRECT; + pass_info.divide_type = (!include_albedo) ? PASS_DIFFUSE_COLOR : PASS_NONE; + pass_info.use_compositing = true; + pass_info.is_written = false; + break; + case PASS_DIFFUSE_DIRECT: + case PASS_DIFFUSE_INDIRECT: + pass_info.num_components = 3; + pass_info.use_exposure = true; + pass_info.divide_type = (!include_albedo) ? PASS_DIFFUSE_COLOR : PASS_NONE; + pass_info.use_compositing = true; + break; + case PASS_GLOSSY: + pass_info.num_components = 3; + pass_info.use_exposure = true; + pass_info.direct_type = PASS_GLOSSY_DIRECT; + pass_info.indirect_type = PASS_GLOSSY_INDIRECT; + pass_info.divide_type = (!include_albedo) ? PASS_GLOSSY_COLOR : PASS_NONE; + pass_info.use_compositing = true; + pass_info.is_written = false; + break; + case PASS_GLOSSY_DIRECT: + case PASS_GLOSSY_INDIRECT: + pass_info.num_components = 3; + pass_info.use_exposure = true; + pass_info.divide_type = (!include_albedo) ? PASS_GLOSSY_COLOR : PASS_NONE; + pass_info.use_compositing = true; + break; + case PASS_TRANSMISSION: + pass_info.num_components = 3; + pass_info.use_exposure = true; + pass_info.direct_type = PASS_TRANSMISSION_DIRECT; + pass_info.indirect_type = PASS_TRANSMISSION_INDIRECT; + pass_info.divide_type = (!include_albedo) ? PASS_TRANSMISSION_COLOR : PASS_NONE; + pass_info.use_compositing = true; + pass_info.is_written = false; + break; + case PASS_TRANSMISSION_DIRECT: + case PASS_TRANSMISSION_INDIRECT: + pass_info.num_components = 3; + pass_info.use_exposure = true; + pass_info.divide_type = (!include_albedo) ? PASS_TRANSMISSION_COLOR : PASS_NONE; + pass_info.use_compositing = true; + break; + case PASS_VOLUME: + pass_info.num_components = 3; + pass_info.use_exposure = true; + pass_info.direct_type = PASS_VOLUME_DIRECT; + pass_info.indirect_type = PASS_VOLUME_INDIRECT; + pass_info.use_compositing = true; + pass_info.is_written = false; + break; + case PASS_VOLUME_DIRECT: + case PASS_VOLUME_INDIRECT: + pass_info.num_components = 3; + pass_info.use_exposure = true; + break; + + case PASS_CRYPTOMATTE: + pass_info.num_components = 4; + break; + + case PASS_DENOISING_NORMAL: + pass_info.num_components = 3; + break; + case PASS_DENOISING_ALBEDO: + pass_info.num_components = 3; + break; + + case PASS_SHADOW_CATCHER: + pass_info.num_components = 3; + pass_info.use_exposure = true; + pass_info.use_compositing = true; + pass_info.use_denoising_albedo = false; + pass_info.support_denoise = true; + break; + case PASS_SHADOW_CATCHER_SAMPLE_COUNT: + pass_info.num_components = 1; + break; + case PASS_SHADOW_CATCHER_MATTE: + pass_info.num_components = 4; + pass_info.use_exposure = true; + pass_info.support_denoise = true; + /* Without shadow catcher approximation compositing is not needed. + * Since we don't know here whether approximation is used or not, leave the decision up to + * the caller which will know that. */ + break; + + case PASS_ADAPTIVE_AUX_BUFFER: + pass_info.num_components = 4; + break; + case PASS_SAMPLE_COUNT: + pass_info.num_components = 1; + pass_info.use_exposure = false; + break; + + case PASS_AOV_COLOR: + pass_info.num_components = 3; + break; + case PASS_AOV_VALUE: + pass_info.num_components = 1; + break; + + case PASS_BAKE_PRIMITIVE: + case PASS_BAKE_DIFFERENTIAL: + pass_info.num_components = 4; + pass_info.use_exposure = false; + pass_info.use_filter = false; + break; + + case PASS_CATEGORY_LIGHT_END: + case PASS_CATEGORY_DATA_END: + case PASS_CATEGORY_BAKE_END: + case PASS_NUM: + LOG(DFATAL) << "Unexpected pass type is used " << type; + pass_info.num_components = 0; + break; + } + + return pass_info; +} + +bool Pass::contains(const vector<Pass *> &passes, PassType type) +{ + for (const Pass *pass : passes) { + if (pass->get_type() != type) { + continue; + } + + return true; + } + + return false; +} + +const Pass *Pass::find(const vector<Pass *> &passes, const string &name) +{ + for (const Pass *pass : passes) { + if (pass->get_name() == name) { + return pass; + } + } + + return nullptr; +} + +const Pass *Pass::find(const vector<Pass *> &passes, PassType type, PassMode mode) +{ + for (const Pass *pass : passes) { + if (pass->get_type() != type || pass->get_mode() != mode) { + continue; + } + + return pass; + } + + return nullptr; +} + +int Pass::get_offset(const vector<Pass *> &passes, const Pass *pass) +{ + int pass_offset = 0; + + for (const Pass *current_pass : passes) { + /* Note that pass name is allowed to be empty. This is why we check for type and mode. */ + if (current_pass->get_type() == pass->get_type() && + current_pass->get_mode() == pass->get_mode() && + current_pass->get_name() == pass->get_name()) { + if (current_pass->is_written()) { + return pass_offset; + } + else { + return PASS_UNUSED; + } + } + if (current_pass->is_written()) { + pass_offset += current_pass->get_info().num_components; + } + } + + return PASS_UNUSED; +} + +std::ostream &operator<<(std::ostream &os, const Pass &pass) +{ + os << "type: " << pass_type_as_string(pass.get_type()); + os << ", name: \"" << pass.get_name() << "\""; + os << ", mode: " << pass.get_mode(); + os << ", is_written: " << string_from_bool(pass.is_written()); + + return os; +} + +CCL_NAMESPACE_END diff --git a/intern/cycles/render/pass.h b/intern/cycles/render/pass.h new file mode 100644 index 00000000000..82230c62cb0 --- /dev/null +++ b/intern/cycles/render/pass.h @@ -0,0 +1,106 @@ +/* + * Copyright 2011-2021 Blender Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include <ostream> // NOLINT + +#include "util/util_string.h" +#include "util/util_vector.h" + +#include "kernel/kernel_types.h" + +#include "graph/node.h" + +CCL_NAMESPACE_BEGIN + +const char *pass_type_as_string(const PassType type); + +enum class PassMode { + NOISY, + DENOISED, +}; +const char *pass_mode_as_string(PassMode mode); +std::ostream &operator<<(std::ostream &os, PassMode mode); + +struct PassInfo { + int num_components = -1; + bool use_filter = false; + bool use_exposure = false; + bool is_written = true; + PassType divide_type = PASS_NONE; + PassType direct_type = PASS_NONE; + PassType indirect_type = PASS_NONE; + + /* Pass access for read can not happen directly and needs some sort of compositing (for example, + * light passes due to divide_type, or shadow catcher pass. */ + bool use_compositing = false; + + /* Used to disable albedo pass for denoising. + * Light and shadow catcher passes should not have discontinuity in the denoised result based on + * the underlying albedo. */ + bool use_denoising_albedo = true; + + /* Pass supports denoising. */ + bool support_denoise = false; +}; + +class Pass : public Node { + public: + NODE_DECLARE + + NODE_SOCKET_API(PassType, type) + NODE_SOCKET_API(PassMode, mode) + NODE_SOCKET_API(ustring, name) + NODE_SOCKET_API(bool, include_albedo) + + Pass(); + + PassInfo get_info() const; + + /* The pass is written by the render pipeline (kernel or denoiser). If the pass is written it + * will have pixels allocated in a RenderBuffer. Passes which are not written do not have their + * pixels allocated to save memory. */ + bool is_written() const; + + protected: + /* The has been created automatically as a requirement to various rendering functionality (such + * as adaptive sampling). */ + bool is_auto_; + + public: + static const NodeEnum *get_type_enum(); + static const NodeEnum *get_mode_enum(); + + static PassInfo get_info(PassType type, const bool include_albedo = false); + + static bool contains(const vector<Pass *> &passes, PassType type); + + /* Returns nullptr if there is no pass with the given name or type+mode. */ + static const Pass *find(const vector<Pass *> &passes, const string &name); + static const Pass *find(const vector<Pass *> &passes, + PassType type, + PassMode mode = PassMode::NOISY); + + /* Returns PASS_UNUSED if there is no corresponding pass. */ + static int get_offset(const vector<Pass *> &passes, const Pass *pass); + + friend class Film; +}; + +std::ostream &operator<<(std::ostream &os, const Pass &pass); + +CCL_NAMESPACE_END diff --git a/intern/cycles/render/scene.cpp b/intern/cycles/render/scene.cpp index c4e7d2c79d6..a4b030190dc 100644 --- a/intern/cycles/render/scene.cpp +++ b/intern/cycles/render/scene.cpp @@ -163,12 +163,15 @@ void Scene::free_memory(bool final) delete p; foreach (Light *l, lights) delete l; + foreach (Pass *p, passes) + delete p; geometry.clear(); objects.clear(); lights.clear(); particle_systems.clear(); procedurals.clear(); + passes.clear(); if (device) { camera->device_free(device, &dscene, this); @@ -253,7 +256,6 @@ void Scene::device_update(Device *device_, Progress &progress) * - Camera may be used for adaptive subdivision. * - Displacement shader must have all shader data available. * - Light manager needs lookup tables and final mesh data to compute emission CDF. - * - Film needs light manager to run for use_light_visibility * - Lookup tables are done a second time to handle film tables */ @@ -469,88 +471,110 @@ void Scene::enable_update_stats() } } -DeviceRequestedFeatures Scene::get_requested_device_features() +void Scene::update_kernel_features() { - DeviceRequestedFeatures requested_features; + if (!need_update()) { + return; + } - shader_manager->get_requested_features(this, &requested_features); + /* These features are not being tweaked as often as shaders, + * so could be done selective magic for the viewport as well. */ + uint kernel_features = shader_manager->get_kernel_features(this); - /* This features are not being tweaked as often as shaders, - * so could be done selective magic for the viewport as well. - */ bool use_motion = need_motion() == Scene::MotionType::MOTION_BLUR; - requested_features.use_hair = false; - requested_features.use_hair_thick = (params.hair_shape == CURVE_THICK); - requested_features.use_object_motion = false; - requested_features.use_camera_motion = use_motion && camera->use_motion(); + kernel_features |= KERNEL_FEATURE_PATH_TRACING; + if (params.hair_shape == CURVE_THICK) { + kernel_features |= KERNEL_FEATURE_HAIR_THICK; + } + if (use_motion && camera->use_motion()) { + kernel_features |= KERNEL_FEATURE_CAMERA_MOTION; + } foreach (Object *object, objects) { Geometry *geom = object->get_geometry(); if (use_motion) { - requested_features.use_object_motion |= object->use_motion() | geom->get_use_motion_blur(); - requested_features.use_camera_motion |= geom->get_use_motion_blur(); + if (object->use_motion() || geom->get_use_motion_blur()) { + kernel_features |= KERNEL_FEATURE_OBJECT_MOTION; + } + if (geom->get_use_motion_blur()) { + kernel_features |= KERNEL_FEATURE_CAMERA_MOTION; + } } if (object->get_is_shadow_catcher()) { - requested_features.use_shadow_tricks = true; + kernel_features |= KERNEL_FEATURE_SHADOW_CATCHER; } if (geom->is_mesh()) { Mesh *mesh = static_cast<Mesh *>(geom); #ifdef WITH_OPENSUBDIV if (mesh->get_subdivision_type() != Mesh::SUBDIVISION_NONE) { - requested_features.use_patch_evaluation = true; + kernel_features |= KERNEL_FEATURE_PATCH_EVALUATION; } #endif - requested_features.use_true_displacement |= mesh->has_true_displacement(); } else if (geom->is_hair()) { - requested_features.use_hair = true; + kernel_features |= KERNEL_FEATURE_HAIR; } } - requested_features.use_background_light = light_manager->has_background_light(this); - - requested_features.use_baking = bake_manager->get_baking(); - requested_features.use_integrator_branched = (integrator->get_method() == - Integrator::BRANCHED_PATH); - if (film->get_denoising_data_pass()) { - requested_features.use_denoising = true; - requested_features.use_shadow_tricks = true; + if (bake_manager->get_baking()) { + kernel_features |= KERNEL_FEATURE_BAKING; } - return requested_features; -} + kernel_features |= film->get_kernel_features(this); -bool Scene::update(Progress &progress, bool &kernel_switch_needed) -{ - /* update scene */ - if (need_update()) { - /* Update max_closures. */ - KernelIntegrator *kintegrator = &dscene.data.integrator; - if (params.background) { - kintegrator->max_closures = get_max_closure_count(); - } - else { - /* Currently viewport render is faster with higher max_closures, needs investigating. */ - kintegrator->max_closures = MAX_CLOSURE; - } - - /* Load render kernels, before device update where we upload data to the GPU. */ - bool new_kernels_needed = load_kernels(progress, false); - - progress.set_status("Updating Scene"); - MEM_GUARDED_CALL(&progress, device_update, device, progress); + dscene.data.kernel_features = kernel_features; - DeviceKernelStatus kernel_switch_status = device->get_active_kernel_switch_state(); - kernel_switch_needed = kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_AVAILABLE || - kernel_switch_status == DEVICE_KERNEL_FEATURE_KERNEL_INVALID; - if (new_kernels_needed || kernel_switch_needed) { - progress.set_kernel_status("Compiling render kernels"); - device->wait_for_availability(loaded_kernel_features); - progress.set_kernel_status(""); - } + /* Currently viewport render is faster with higher max_closures, needs investigating. */ + const uint max_closures = (params.background) ? get_max_closure_count() : MAX_CLOSURE; + dscene.data.max_closures = max_closures; + dscene.data.max_shaders = shaders.size(); +} - return true; +bool Scene::update(Progress &progress) +{ + if (!need_update()) { + return false; } - return false; + + /* Load render kernels, before device update where we upload data to the GPU. */ + load_kernels(progress, false); + + /* Upload scene data to the GPU. */ + progress.set_status("Updating Scene"); + MEM_GUARDED_CALL(&progress, device_update, device, progress); + + return true; +} + +static void log_kernel_features(const uint features) +{ + VLOG(2) << "Requested features:\n"; + VLOG(2) << "Use BSDF " << string_from_bool(features & KERNEL_FEATURE_NODE_BSDF) << "\n"; + VLOG(2) << "Use Principled BSDF " << string_from_bool(features & KERNEL_FEATURE_PRINCIPLED) + << "\n"; + VLOG(2) << "Use Emission " << string_from_bool(features & KERNEL_FEATURE_NODE_EMISSION) << "\n"; + VLOG(2) << "Use Volume " << string_from_bool(features & KERNEL_FEATURE_NODE_VOLUME) << "\n"; + VLOG(2) << "Use Hair " << string_from_bool(features & KERNEL_FEATURE_NODE_HAIR) << "\n"; + VLOG(2) << "Use Bump " << string_from_bool(features & KERNEL_FEATURE_NODE_BUMP) << "\n"; + VLOG(2) << "Use Voronoi " << string_from_bool(features & KERNEL_FEATURE_NODE_VORONOI_EXTRA) + << "\n"; + VLOG(2) << "Use Shader Raytrace " << string_from_bool(features & KERNEL_FEATURE_NODE_RAYTRACE) + << "\n"; + VLOG(2) << "Use Transparent " << string_from_bool(features & KERNEL_FEATURE_TRANSPARENT) << "\n"; + VLOG(2) << "Use Denoising " << string_from_bool(features & KERNEL_FEATURE_DENOISING) << "\n"; + VLOG(2) << "Use Path Tracing " << string_from_bool(features & KERNEL_FEATURE_PATH_TRACING) + << "\n"; + VLOG(2) << "Use Hair " << string_from_bool(features & KERNEL_FEATURE_HAIR) << "\n"; + VLOG(2) << "Use Object Motion " << string_from_bool(features & KERNEL_FEATURE_OBJECT_MOTION) + << "\n"; + VLOG(2) << "Use Camera Motion " << string_from_bool(features & KERNEL_FEATURE_CAMERA_MOTION) + << "\n"; + VLOG(2) << "Use Baking " << string_from_bool(features & KERNEL_FEATURE_BAKING) << "\n"; + VLOG(2) << "Use Subsurface " << string_from_bool(features & KERNEL_FEATURE_SUBSURFACE) << "\n"; + VLOG(2) << "Use Volume " << string_from_bool(features & KERNEL_FEATURE_VOLUME) << "\n"; + VLOG(2) << "Use Patch Evaluation " + << string_from_bool(features & KERNEL_FEATURE_PATCH_EVALUATION) << "\n"; + VLOG(2) << "Use Shadow Catcher " << string_from_bool(features & KERNEL_FEATURE_SHADOW_CATCHER) + << "\n"; } bool Scene::load_kernels(Progress &progress, bool lock_scene) @@ -560,15 +584,15 @@ bool Scene::load_kernels(Progress &progress, bool lock_scene) scene_lock = thread_scoped_lock(mutex); } - DeviceRequestedFeatures requested_features = get_requested_device_features(); + const uint kernel_features = dscene.data.kernel_features; - if (!kernels_loaded || loaded_kernel_features.modified(requested_features)) { + if (!kernels_loaded || loaded_kernel_features != kernel_features) { progress.set_status("Loading render kernels (may take a few minutes the first time)"); scoped_timer timer; - VLOG(2) << "Requested features:\n" << requested_features; - if (!device->load_kernels(requested_features)) { + log_kernel_features(kernel_features); + if (!device->load_kernels(kernel_features)) { string message = device->error_message(); if (message.empty()) message = "Failed loading render kernel, see console for errors"; @@ -580,7 +604,7 @@ bool Scene::load_kernels(Progress &progress, bool lock_scene) } kernels_loaded = true; - loaded_kernel_features = requested_features; + loaded_kernel_features = kernel_features; return true; } return false; @@ -618,6 +642,28 @@ int Scene::get_max_closure_count() return max_closure_global; } +bool Scene::has_shadow_catcher() +{ + if (shadow_catcher_modified_) { + has_shadow_catcher_ = false; + for (Object *object : objects) { + if (object->get_is_shadow_catcher()) { + has_shadow_catcher_ = true; + break; + } + } + + shadow_catcher_modified_ = false; + } + + return has_shadow_catcher_; +} + +void Scene::tag_shadow_catcher_modified() +{ + shadow_catcher_modified_ = true; +} + template<> Light *Scene::create_node<Light>() { Light *node = new Light(); @@ -694,6 +740,15 @@ template<> AlembicProcedural *Scene::create_node<AlembicProcedural>() #endif } +template<> Pass *Scene::create_node<Pass>() +{ + Pass *node = new Pass(); + node->set_owner(this); + passes.push_back(node); + film->tag_modified(); + return node; +} + template<typename T> void delete_node_from_array(vector<T> &nodes, T node) { for (size_t i = 0; i < nodes.size(); ++i) { @@ -779,6 +834,12 @@ template<> void Scene::delete_node_impl(AlembicProcedural *node) #endif } +template<> void Scene::delete_node_impl(Pass *node) +{ + delete_node_from_array(passes, node); + film->tag_modified(); +} + template<typename T> static void remove_nodes_in_set(const set<T *> &nodes_set, vector<T *> &nodes_array, @@ -842,4 +903,10 @@ template<> void Scene::delete_nodes(const set<Procedural *> &nodes, const NodeOw procedural_manager->tag_update(); } +template<> void Scene::delete_nodes(const set<Pass *> &nodes, const NodeOwner *owner) +{ + remove_nodes_in_set(nodes, passes, owner); + film->tag_modified(); +} + CCL_NAMESPACE_END diff --git a/intern/cycles/render/scene.h b/intern/cycles/render/scene.h index 7d8a6774381..cf4a3ba6b12 100644 --- a/intern/cycles/render/scene.h +++ b/intern/cycles/render/scene.h @@ -128,7 +128,7 @@ class DeviceScene { device_vector<float> lookup_table; /* integrator */ - device_vector<uint> sample_pattern_lut; + device_vector<float> sample_pattern_lut; /* ies lights */ device_vector<float> ies_lights; @@ -142,27 +142,6 @@ class DeviceScene { class SceneParams { public: - /* Type of BVH, in terms whether it is supported dynamic updates of meshes - * or whether modifying geometry requires full BVH rebuild. - */ - enum BVHType { - /* BVH supports dynamic updates of geometry. - * - * Faster for updating BVH tree when doing modifications in viewport, - * but slower for rendering. - */ - BVH_DYNAMIC = 0, - /* BVH tree is calculated for specific scene, updates in geometry - * requires full tree rebuild. - * - * Slower to update BVH tree when modifying objects in viewport, also - * slower to build final BVH tree but gives best possible render speed. - */ - BVH_STATIC = 1, - - BVH_NUM_TYPES, - }; - ShadingSystem shadingsystem; /* Requested BVH layout. @@ -186,7 +165,7 @@ class SceneParams { { shadingsystem = SHADINGSYSTEM_SVM; bvh_layout = BVH_LAYOUT_BVH2; - bvh_type = BVH_DYNAMIC; + bvh_type = BVH_TYPE_DYNAMIC; use_bvh_spatial_split = false; use_bvh_unaligned_nodes = true; num_bvh_time_steps = 0; @@ -196,7 +175,7 @@ class SceneParams { background = true; } - bool modified(const SceneParams ¶ms) + bool modified(const SceneParams ¶ms) const { return !(shadingsystem == params.shadingsystem && bvh_layout == params.bvh_layout && bvh_type == params.bvh_type && @@ -236,7 +215,7 @@ class Scene : public NodeOwner { vector<Shader *> shaders; vector<Light *> lights; vector<ParticleSystem *> particle_systems; - vector<Pass> passes; + vector<Pass *> passes; vector<Procedural *> procedurals; /* data managers */ @@ -291,7 +270,11 @@ class Scene : public NodeOwner { void enable_update_stats(); - bool update(Progress &progress, bool &kernel_switch_needed); + void update_kernel_features(); + bool update(Progress &progress); + + bool has_shadow_catcher(); + void tag_shadow_catcher_modified(); /* This function is used to create a node of a specified type instead of * calling 'new', and sets the scene as the owner of the node. @@ -348,13 +331,12 @@ class Scene : public NodeOwner { void free_memory(bool final); bool kernels_loaded; - DeviceRequestedFeatures loaded_kernel_features; + uint loaded_kernel_features; bool load_kernels(Progress &progress, bool lock_scene = true); - /* ** Split kernel routines ** */ - - DeviceRequestedFeatures get_requested_device_features(); + bool has_shadow_catcher_ = false; + bool shadow_catcher_modified_ = true; /* Maximum number of closure during session lifetime. */ int max_closure_global; @@ -384,6 +366,8 @@ template<> Shader *Scene::create_node<Shader>(); template<> AlembicProcedural *Scene::create_node<AlembicProcedural>(); +template<> Pass *Scene::create_node<Pass>(); + template<> void Scene::delete_node_impl(Light *node); template<> void Scene::delete_node_impl(Mesh *node); @@ -404,6 +388,8 @@ template<> void Scene::delete_node_impl(Procedural *node); template<> void Scene::delete_node_impl(AlembicProcedural *node); +template<> void Scene::delete_node_impl(Pass *node); + template<> void Scene::delete_nodes(const set<Light *> &nodes, const NodeOwner *owner); template<> void Scene::delete_nodes(const set<Geometry *> &nodes, const NodeOwner *owner); @@ -416,6 +402,8 @@ template<> void Scene::delete_nodes(const set<Shader *> &nodes, const NodeOwner template<> void Scene::delete_nodes(const set<Procedural *> &nodes, const NodeOwner *owner); +template<> void Scene::delete_nodes(const set<Pass *> &nodes, const NodeOwner *owner); + CCL_NAMESPACE_END #endif /* __SCENE_H__ */ diff --git a/intern/cycles/render/session.cpp b/intern/cycles/render/session.cpp index 1b91c49f0ea..47eeffd97fe 100644 --- a/intern/cycles/render/session.cpp +++ b/intern/cycles/render/session.cpp @@ -17,10 +17,15 @@ #include <limits.h> #include <string.h> +#include "device/cpu/device.h" #include "device/device.h" +#include "integrator/pass_accessor_cpu.h" +#include "integrator/path_trace.h" +#include "render/background.h" #include "render/bake.h" #include "render/buffers.h" #include "render/camera.h" +#include "render/gpu_display.h" #include "render/graph.h" #include "render/integrator.h" #include "render/light.h" @@ -39,70 +44,63 @@ CCL_NAMESPACE_BEGIN -/* Note about preserve_tile_device option for tile manager: - * progressive refine and viewport rendering does requires tiles to - * always be allocated for the same device - */ -Session::Session(const SessionParams ¶ms_) - : params(params_), - tile_manager(params.progressive, - params.samples, - params.tile_size, - params.start_resolution, - params.background == false || params.progressive_refine, - params.background, - params.tile_order, - max(params.device.multi_devices.size(), 1), - params.pixel_size), - stats(), - profiler() +Session::Session(const SessionParams ¶ms_, const SceneParams &scene_params) + : params(params_), render_scheduler_(tile_manager_, params) { - device_use_gl_ = ((params.device.type != DEVICE_CPU) && !params.background); - TaskScheduler::init(params.threads); - session_thread_ = NULL; - scene = NULL; - - reset_time_ = 0.0; - last_update_time_ = 0.0; + session_thread_ = nullptr; delayed_reset_.do_reset = false; - delayed_reset_.samples = 0; - - display_outdated_ = false; - gpu_draw_ready_ = false; - gpu_need_display_buffer_update_ = false; pause_ = false; cancel_ = false; new_work_added_ = false; - buffers = NULL; - display = NULL; + device = Device::create(params.device, stats, profiler); - /* Validate denoising parameters. */ - set_denoising(params.denoising); + scene = new Scene(scene_params, device); - /* Create CPU/GPU devices. */ - device = Device::create(params.device, stats, profiler, params.background); - - if (!device->error_message().empty()) { - progress.set_error(device->error_message()); - return; - } + /* Configure path tracer. */ + path_trace_ = make_unique<PathTrace>( + device, scene->film, &scene->dscene, render_scheduler_, tile_manager_); + path_trace_->set_progress(&progress); + path_trace_->tile_buffer_update_cb = [&]() { + if (!update_render_tile_cb) { + return; + } + update_render_tile_cb(); + }; + path_trace_->tile_buffer_write_cb = [&]() { + if (!write_render_tile_cb) { + return; + } + write_render_tile_cb(); + }; + path_trace_->tile_buffer_read_cb = [&]() -> bool { + if (!read_render_tile_cb) { + return false; + } + read_render_tile_cb(); + return true; + }; + path_trace_->progress_update_cb = [&]() { update_status_time(); }; - /* Create buffers for interactive rendering. */ - if (!(params.background && !params.write_render_cb)) { - buffers = new RenderBuffers(device); - display = new DisplayBuffer(device, params.display_buffer_linear); - } + tile_manager_.full_buffer_written_cb = [&](string_view filename) { + if (!full_buffer_written_cb) { + return; + } + full_buffer_written_cb(filename); + }; } Session::~Session() { cancel(); + /* TODO(sergey): Bring the passes in viewport back. + * It is unclear why there is such an exception needed though. */ +#if 0 if (buffers && params.write_render_cb) { /* Copy to display buffer and write out image if requested */ delete display; @@ -116,12 +114,14 @@ Session::~Session() uchar4 *pixels = display->rgba_byte.copy_from_device(0, w, h); params.write_render_cb((uchar *)pixels, w, h, 4); } +#endif - /* clean up */ - tile_manager.device_free(); + /* Make sure path tracer is destroyed before the device. This is needed because destruction might + * need to access device for device memory free. */ + /* TODO(sergey): Convert device to be unique_ptr, and rely on C++ to destruct objects in the + * pre-defined order. */ + path_trace_.reset(); - delete buffers; - delete display; delete scene; delete device; @@ -135,15 +135,16 @@ void Session::start() } } -void Session::cancel() +void Session::cancel(bool quick) { + if (quick && path_trace_) { + path_trace_->cancel(); + } + if (session_thread_) { /* wait for session thread to end */ progress.set_cancel("Exiting"); - gpu_need_display_buffer_update_ = false; - gpu_need_display_buffer_update_cond_.notify_all(); - { thread_scoped_lock pause_lock(pause_mutex_); pause_ = false; @@ -157,570 +158,43 @@ void Session::cancel() bool Session::ready_to_reset() { - double dt = time_dt() - reset_time_; - - if (!display_outdated_) - return (dt > params.reset_timeout); - else - return (dt > params.cancel_timeout); + return path_trace_->ready_to_reset(); } -/* GPU Session */ - -void Session::reset_gpu(BufferParams &buffer_params, int samples) +void Session::run_main_render_loop() { - thread_scoped_lock pause_lock(pause_mutex_); - - /* block for buffer access and reset immediately. we can't do this - * in the thread, because we need to allocate an OpenGL buffer, and - * that only works in the main thread */ - thread_scoped_lock display_lock(display_mutex_); - thread_scoped_lock buffers_lock(buffers_mutex_); + path_trace_->clear_gpu_display(); - display_outdated_ = true; - reset_time_ = time_dt(); + while (true) { + RenderWork render_work = run_update_for_next_iteration(); - reset_(buffer_params, samples); - - gpu_need_display_buffer_update_ = false; - gpu_need_display_buffer_update_cond_.notify_all(); - - new_work_added_ = true; - - pause_cond_.notify_all(); -} - -bool Session::draw_gpu(BufferParams &buffer_params, DeviceDrawParams &draw_params) -{ - /* block for buffer access */ - thread_scoped_lock display_lock(display_mutex_); - - /* first check we already rendered something */ - if (gpu_draw_ready_) { - /* then verify the buffers have the expected size, so we don't - * draw previous results in a resized window */ - if (buffer_params.width == display->params.width && - buffer_params.height == display->params.height) { - /* for CUDA we need to do tone-mapping still, since we can - * only access GL buffers from the main thread. */ - if (gpu_need_display_buffer_update_) { - thread_scoped_lock buffers_lock(buffers_mutex_); - copy_to_display_buffer(tile_manager.state.sample); - gpu_need_display_buffer_update_ = false; - gpu_need_display_buffer_update_cond_.notify_all(); + if (!render_work) { + if (VLOG_IS_ON(2)) { + double total_time, render_time; + progress.get_time(total_time, render_time); + VLOG(2) << "Rendering in main loop is done in " << render_time << " seconds."; + VLOG(2) << path_trace_->full_report(); } - display->draw(device, draw_params); - - if (display_outdated_ && (time_dt() - reset_time_) > params.text_timeout) - return false; - - return true; - } - } - - return false; -} - -void Session::run_gpu() -{ - bool tiles_written = false; - - reset_time_ = time_dt(); - last_update_time_ = time_dt(); - last_display_time_ = last_update_time_; - - progress.set_render_start_time(); - - while (!progress.get_cancel()) { - const bool no_tiles = !run_update_for_next_iteration(); - - if (no_tiles) { if (params.background) { - /* if no work left and in background mode, we can stop immediately */ + /* if no work left and in background mode, we can stop immediately. */ progress.set_status("Finished"); break; } } - if (run_wait_for_work(no_tiles)) { - continue; - } - - if (progress.get_cancel()) { - break; - } - - if (!no_tiles) { - if (!device->error_message().empty()) - progress.set_error(device->error_message()); - - if (progress.get_cancel()) - break; - - /* buffers mutex is locked entirely while rendering each - * sample, and released/reacquired on each iteration to allow - * reset and draw in between */ - thread_scoped_lock buffers_lock(buffers_mutex_); - - /* update status and timing */ - update_status_time(); - - /* render */ - bool delayed_denoise = false; - const bool need_denoise = render_need_denoise(delayed_denoise); - render(need_denoise); - - device->task_wait(); - - if (!device->error_message().empty()) - progress.set_cancel(device->error_message()); - - /* update status and timing */ - update_status_time(); - - gpu_need_display_buffer_update_ = !delayed_denoise; - gpu_draw_ready_ = true; - progress.set_update(); - - /* wait for until display buffer is updated */ - if (!params.background) { - while (gpu_need_display_buffer_update_) { - if (progress.get_cancel()) - break; - - gpu_need_display_buffer_update_cond_.wait(buffers_lock); - } - } - - if (!device->error_message().empty()) - progress.set_error(device->error_message()); - - tiles_written = update_progressive_refine(progress.get_cancel()); - - if (progress.get_cancel()) - break; - } - } - - if (!tiles_written) - update_progressive_refine(true); -} - -/* CPU Session */ - -void Session::reset_cpu(BufferParams &buffer_params, int samples) -{ - thread_scoped_lock reset_lock(delayed_reset_.mutex); - thread_scoped_lock pause_lock(pause_mutex_); - - display_outdated_ = true; - reset_time_ = time_dt(); - - delayed_reset_.params = buffer_params; - delayed_reset_.samples = samples; - delayed_reset_.do_reset = true; - device->task_cancel(); - - pause_cond_.notify_all(); -} - -bool Session::draw_cpu(BufferParams &buffer_params, DeviceDrawParams &draw_params) -{ - thread_scoped_lock display_lock(display_mutex_); - - /* first check we already rendered something */ - if (display->draw_ready()) { - /* then verify the buffers have the expected size, so we don't - * draw previous results in a resized window */ - if (buffer_params.width == display->params.width && - buffer_params.height == display->params.height) { - display->draw(device, draw_params); - - if (display_outdated_ && (time_dt() - reset_time_) > params.text_timeout) - return false; - - return true; - } - } - - return false; -} - -bool Session::steal_tile(RenderTile &rtile, Device *tile_device, thread_scoped_lock &tile_lock) -{ - /* Devices that can get their tiles stolen don't steal tiles themselves. - * Additionally, if there are no stealable tiles in flight, give up here. */ - if (tile_device->info.type == DEVICE_CPU || stealable_tiles_ == 0) { - return false; - } - - /* Wait until no other thread is trying to steal a tile. */ - while (tile_stealing_state_ != NOT_STEALING && stealable_tiles_ > 0) { - /* Someone else is currently trying to get a tile. - * Wait on the condition variable and try later. */ - tile_steal_cond_.wait(tile_lock); - } - /* If another thread stole the last stealable tile in the meantime, give up. */ - if (stealable_tiles_ == 0) { - return false; - } - - /* There are stealable tiles in flight, so signal that one should be released. */ - tile_stealing_state_ = WAITING_FOR_TILE; - - /* Wait until a device notices the signal and releases its tile. */ - while (tile_stealing_state_ != GOT_TILE && stealable_tiles_ > 0) { - tile_steal_cond_.wait(tile_lock); - } - /* If the last stealable tile finished on its own, give up. */ - if (tile_stealing_state_ != GOT_TILE) { - tile_stealing_state_ = NOT_STEALING; - return false; - } - - /* Successfully stole a tile, now move it to the new device. */ - rtile = stolen_tile_; - rtile.buffers->buffer.move_device(tile_device); - rtile.buffer = rtile.buffers->buffer.device_pointer; - rtile.stealing_state = RenderTile::NO_STEALING; - rtile.num_samples -= (rtile.sample - rtile.start_sample); - rtile.start_sample = rtile.sample; - - tile_stealing_state_ = NOT_STEALING; - - /* Poke any threads which might be waiting for NOT_STEALING above. */ - tile_steal_cond_.notify_one(); - - return true; -} - -bool Session::get_tile_stolen() -{ - /* If tile_stealing_state is WAITING_FOR_TILE, atomically set it to RELEASING_TILE - * and return true. */ - TileStealingState expected = WAITING_FOR_TILE; - return tile_stealing_state_.compare_exchange_weak(expected, RELEASING_TILE); -} - -bool Session::acquire_tile(RenderTile &rtile, Device *tile_device, uint tile_types) -{ - if (progress.get_cancel()) { - if (params.progressive_refine == false) { - /* for progressive refine current sample should be finished for all tiles */ - return false; - } - } - - thread_scoped_lock tile_lock(tile_mutex_); - - /* get next tile from manager */ - Tile *tile; - int device_num = device->device_number(tile_device); - - while (!tile_manager.next_tile(tile, device_num, tile_types)) { - /* Can only steal tiles on devices that support rendering - * This is because denoising tiles cannot be stolen (see below) - */ - if ((tile_types & (RenderTile::PATH_TRACE | RenderTile::BAKE)) && - steal_tile(rtile, tile_device, tile_lock)) { - return true; - } - - /* Wait for denoising tiles to become available */ - if ((tile_types & RenderTile::DENOISE) && !progress.get_cancel() && tile_manager.has_tiles()) { - denoising_cond_.wait(tile_lock); - continue; - } - - return false; - } - - /* fill render tile */ - rtile.x = tile_manager.state.buffer.full_x + tile->x; - rtile.y = tile_manager.state.buffer.full_y + tile->y; - rtile.w = tile->w; - rtile.h = tile->h; - rtile.start_sample = tile_manager.state.sample; - rtile.num_samples = tile_manager.state.num_samples; - rtile.resolution = tile_manager.state.resolution_divider; - rtile.tile_index = tile->index; - rtile.stealing_state = RenderTile::NO_STEALING; - - if (tile->state == Tile::DENOISE) { - rtile.task = RenderTile::DENOISE; - } - else { - if (tile_device->info.type == DEVICE_CPU) { - stealable_tiles_++; - rtile.stealing_state = RenderTile::CAN_BE_STOLEN; - } - - if (read_bake_tile_cb) { - rtile.task = RenderTile::BAKE; - } - else { - rtile.task = RenderTile::PATH_TRACE; - } - } - - tile_lock.unlock(); - - /* in case of a permanent buffer, return it, otherwise we will allocate - * a new temporary buffer */ - if (buffers) { - tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride); - - rtile.buffer = buffers->buffer.device_pointer; - rtile.buffers = buffers; - - device->map_tile(tile_device, rtile); - - /* Reset copy state, since buffer contents change after the tile was acquired */ - buffers->map_neighbor_copied = false; - - /* This hack ensures that the copy in 'MultiDevice::map_neighbor_tiles' accounts - * for the buffer resolution divider. */ - buffers->buffer.data_width = (buffers->params.width * buffers->params.get_passes_size()) / - tile_manager.state.resolution_divider; - buffers->buffer.data_height = buffers->params.height / tile_manager.state.resolution_divider; - - return true; - } - - if (tile->buffers == NULL) { - /* fill buffer parameters */ - BufferParams buffer_params = tile_manager.params; - buffer_params.full_x = rtile.x; - buffer_params.full_y = rtile.y; - buffer_params.width = rtile.w; - buffer_params.height = rtile.h; - - /* allocate buffers */ - tile->buffers = new RenderBuffers(tile_device); - tile->buffers->reset(buffer_params); - } - else if (tile->buffers->buffer.device != tile_device) { - /* Move buffer to current tile device again in case it was stolen before. - * Not needed for denoising since that already handles mapping of tiles and - * neighbors to its own device. */ - if (rtile.task != RenderTile::DENOISE) { - tile->buffers->buffer.move_device(tile_device); - } - } - - tile->buffers->map_neighbor_copied = false; - - tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride); - - rtile.buffer = tile->buffers->buffer.device_pointer; - rtile.buffers = tile->buffers; - rtile.sample = tile_manager.state.sample; - - if (read_bake_tile_cb) { - /* This will read any passes needed as input for baking. */ - if (tile_manager.state.sample == tile_manager.range_start_sample) { - { - thread_scoped_lock tile_lock(tile_mutex_); - read_bake_tile_cb(rtile); - } - rtile.buffers->buffer.copy_to_device(); - } - } - else { - /* This will tag tile as IN PROGRESS in blender-side render pipeline, - * which is needed to highlight currently rendering tile before first - * sample was processed for it. */ - update_tile_sample(rtile); - } - - return true; -} - -void Session::update_tile_sample(RenderTile &rtile) -{ - thread_scoped_lock tile_lock(tile_mutex_); - - if (update_render_tile_cb) { - if (params.progressive_refine == false) { - /* todo: optimize this by making it thread safe and removing lock */ - - update_render_tile_cb(rtile, true); - } - } - - update_status_time(); -} - -void Session::release_tile(RenderTile &rtile, const bool need_denoise) -{ - thread_scoped_lock tile_lock(tile_mutex_); - - if (rtile.stealing_state != RenderTile::NO_STEALING) { - stealable_tiles_--; - if (rtile.stealing_state == RenderTile::WAS_STOLEN) { - /* If the tile is being stolen, don't release it here - the new device will pick up where - * the old one left off. */ - - assert(tile_stealing_state_ == RELEASING_TILE); - assert(rtile.sample < rtile.start_sample + rtile.num_samples); - - tile_stealing_state_ = GOT_TILE; - stolen_tile_ = rtile; - tile_steal_cond_.notify_all(); - return; - } - else if (stealable_tiles_ == 0) { - /* If this was the last stealable tile, wake up any threads still waiting for one. */ - tile_steal_cond_.notify_all(); - } - } - - progress.add_finished_tile(rtile.task == RenderTile::DENOISE); - - bool delete_tile; - - if (tile_manager.finish_tile(rtile.tile_index, need_denoise, delete_tile)) { - /* Finished tile pixels write. */ - if (write_render_tile_cb && params.progressive_refine == false) { - write_render_tile_cb(rtile); - } - - if (delete_tile) { - delete rtile.buffers; - tile_manager.state.tiles[rtile.tile_index].buffers = NULL; - } - } - else { - /* In progress tile pixels update. */ - if (update_render_tile_cb && params.progressive_refine == false) { - update_render_tile_cb(rtile, false); - } - } - - update_status_time(); - - /* Notify denoising thread that a tile was finished. */ - denoising_cond_.notify_all(); -} - -void Session::map_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device) -{ - thread_scoped_lock tile_lock(tile_mutex_); - - const int4 image_region = make_int4( - tile_manager.state.buffer.full_x, - tile_manager.state.buffer.full_y, - tile_manager.state.buffer.full_x + tile_manager.state.buffer.width, - tile_manager.state.buffer.full_y + tile_manager.state.buffer.height); - - RenderTile ¢er_tile = neighbors.tiles[RenderTileNeighbors::CENTER]; - - if (!tile_manager.schedule_denoising) { - /* Fix up tile slices with overlap. */ - if (tile_manager.slice_overlap != 0) { - int y = max(center_tile.y - tile_manager.slice_overlap, image_region.y); - center_tile.h = min(center_tile.y + center_tile.h + tile_manager.slice_overlap, - image_region.w) - - y; - center_tile.y = y; - } - - /* Tiles are not being denoised individually, which means the entire image is processed. */ - neighbors.set_bounds_from_center(); - } - else { - int center_idx = center_tile.tile_index; - assert(tile_manager.state.tiles[center_idx].state == Tile::DENOISE); - - for (int dy = -1, i = 0; dy <= 1; dy++) { - for (int dx = -1; dx <= 1; dx++, i++) { - RenderTile &rtile = neighbors.tiles[i]; - int nindex = tile_manager.get_neighbor_index(center_idx, i); - if (nindex >= 0) { - Tile *tile = &tile_manager.state.tiles[nindex]; - - rtile.x = image_region.x + tile->x; - rtile.y = image_region.y + tile->y; - rtile.w = tile->w; - rtile.h = tile->h; - - if (buffers) { - tile_manager.state.buffer.get_offset_stride(rtile.offset, rtile.stride); - - rtile.buffer = buffers->buffer.device_pointer; - rtile.buffers = buffers; - } - else { - assert(tile->buffers); - tile->buffers->params.get_offset_stride(rtile.offset, rtile.stride); - - rtile.buffer = tile->buffers->buffer.device_pointer; - rtile.buffers = tile->buffers; - } - } - else { - int px = center_tile.x + dx * params.tile_size.x; - int py = center_tile.y + dy * params.tile_size.y; - - rtile.x = clamp(px, image_region.x, image_region.z); - rtile.y = clamp(py, image_region.y, image_region.w); - rtile.w = rtile.h = 0; - - rtile.buffer = (device_ptr)NULL; - rtile.buffers = NULL; - } - } - } - } - - assert(center_tile.buffers); - device->map_neighbor_tiles(tile_device, neighbors); - - /* The denoised result is written back to the original tile. */ - neighbors.target = center_tile; -} - -void Session::unmap_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device) -{ - thread_scoped_lock tile_lock(tile_mutex_); - device->unmap_neighbor_tiles(tile_device, neighbors); -} - -void Session::run_cpu() -{ - bool tiles_written = false; - - last_update_time_ = time_dt(); - last_display_time_ = last_update_time_; - - while (!progress.get_cancel()) { - const bool no_tiles = !run_update_for_next_iteration(); - bool need_copy_to_display_buffer = false; - - if (no_tiles) { - if (params.background) { - /* if no work left and in background mode, we can stop immediately */ - progress.set_status("Finished"); + const bool did_cancel = progress.get_cancel(); + if (did_cancel) { + render_scheduler_.render_work_reschedule_on_cancel(render_work); + if (!render_work) { break; } } - - if (run_wait_for_work(no_tiles)) { + else if (run_wait_for_work(render_work)) { continue; } - if (progress.get_cancel()) { - break; - } - - if (!no_tiles) { - if (!device->error_message().empty()) - progress.set_error(device->error_message()); - - if (progress.get_cancel()) - break; - + { /* buffers mutex is locked entirely while rendering each * sample, and released/reacquired on each iteration to allow * reset and draw in between */ @@ -730,49 +204,25 @@ void Session::run_cpu() update_status_time(); /* render */ - bool delayed_denoise = false; - const bool need_denoise = render_need_denoise(delayed_denoise); - render(need_denoise); + path_trace_->render(render_work); /* update status and timing */ update_status_time(); - if (!params.background) - need_copy_to_display_buffer = !delayed_denoise; - - if (!device->error_message().empty()) - progress.set_error(device->error_message()); - } - - device->task_wait(); - - { - thread_scoped_lock reset_lock(delayed_reset_.mutex); - thread_scoped_lock buffers_lock(buffers_mutex_); - thread_scoped_lock display_lock(display_mutex_); - - if (delayed_reset_.do_reset) { - /* reset rendering if request from main thread */ - delayed_reset_.do_reset = false; - reset_(delayed_reset_.params, delayed_reset_.samples); - } - else if (need_copy_to_display_buffer) { - /* Only copy to display_buffer if we do not reset, we don't - * want to show the result of an incomplete sample */ - copy_to_display_buffer(tile_manager.state.sample); + if (device->have_error()) { + const string &error_message = device->error_message(); + progress.set_error(error_message); + progress.set_cancel(error_message); + break; } - - if (!device->error_message().empty()) - progress.set_error(device->error_message()); - - tiles_written = update_progressive_refine(progress.get_cancel()); } progress.set_update(); - } - if (!tiles_written) - update_progressive_refine(true); + if (did_cancel) { + break; + } + } } void Session::run() @@ -789,10 +239,7 @@ void Session::run() /* reset number of rendered samples */ progress.reset_sample(); - if (device_use_gl_) - run_gpu(); - else - run_cpu(); + run_main_render_loop(); } profiler.stop(); @@ -804,31 +251,92 @@ void Session::run() progress.set_update(); } -bool Session::run_update_for_next_iteration() +RenderWork Session::run_update_for_next_iteration() { + RenderWork render_work; + thread_scoped_lock scene_lock(scene->mutex); thread_scoped_lock reset_lock(delayed_reset_.mutex); + bool have_tiles = true; + bool switched_to_new_tile = false; + if (delayed_reset_.do_reset) { thread_scoped_lock buffers_lock(buffers_mutex_); - reset_(delayed_reset_.params, delayed_reset_.samples); - delayed_reset_.do_reset = false; + do_delayed_reset(); + + /* After reset make sure the tile manager is at the first big tile. */ + have_tiles = tile_manager_.next(); + switched_to_new_tile = true; + } + + /* Update number of samples in the integrator. + * Ideally this would need to happen once in `Session::set_samples()`, but the issue there is + * the initial configuration when Session is created where the `set_samples()` is not used. */ + scene->integrator->set_aa_samples(params.samples); + + /* Update denoiser settings. */ + { + const DenoiseParams denoise_params = scene->integrator->get_denoise_params(); + path_trace_->set_denoiser_params(denoise_params); + } + + /* Update adaptive sampling. */ + { + const AdaptiveSampling adaptive_sampling = scene->integrator->get_adaptive_sampling(); + path_trace_->set_adaptive_sampling(adaptive_sampling); } - const bool have_tiles = tile_manager.next(); + render_scheduler_.set_num_samples(params.samples); + render_scheduler_.set_time_limit(params.time_limit); + + while (have_tiles) { + render_work = render_scheduler_.get_render_work(); + if (render_work) { + break; + } - if (have_tiles) { + progress.add_finished_tile(false); + + have_tiles = tile_manager_.next(); + if (have_tiles) { + render_scheduler_.reset_for_next_tile(); + switched_to_new_tile = true; + } + } + + if (render_work) { scoped_timer update_timer; - if (update_scene()) { + + if (switched_to_new_tile) { + BufferParams tile_params = buffer_params_; + + const Tile &tile = tile_manager_.get_current_tile(); + tile_params.width = tile.width; + tile_params.height = tile.height; + tile_params.full_x = tile.x + buffer_params_.full_x; + tile_params.full_y = tile.y + buffer_params_.full_y; + tile_params.full_width = buffer_params_.full_width; + tile_params.full_height = buffer_params_.full_height; + tile_params.update_offset_stride(); + + path_trace_->reset(buffer_params_, tile_params); + } + + const int resolution = render_work.resolution_divider; + const int width = max(1, buffer_params_.full_width / resolution); + const int height = max(1, buffer_params_.full_height / resolution); + + if (update_scene(width, height)) { profiler.reset(scene->shaders.size(), scene->objects.size()); } progress.add_skip_time(update_timer, params.background); } - return have_tiles; + return render_work; } -bool Session::run_wait_for_work(bool no_tiles) +bool Session::run_wait_for_work(const RenderWork &render_work) { /* In an offline rendering there is no pause, and no tiles will mean the job is fully done. */ if (params.background) { @@ -837,19 +345,20 @@ bool Session::run_wait_for_work(bool no_tiles) thread_scoped_lock pause_lock(pause_mutex_); - if (!pause_ && !no_tiles) { + if (!pause_ && render_work) { /* Rendering is not paused and there is work to be done. No need to wait for anything. */ return false; } - update_status_time(pause_, no_tiles); + const bool no_work = !render_work; + update_status_time(pause_, no_work); /* Only leave the loop when rendering is not paused. But even if the current render is un-paused * but there is nothing to render keep waiting until new work is added. */ while (!cancel_) { scoped_timer pause_timer; - if (!pause_ && (!no_tiles || new_work_added_ || delayed_reset_.do_reset)) { + if (!pause_ && (render_work || new_work_added_ || delayed_reset_.do_reset)) { break; } @@ -860,52 +369,89 @@ bool Session::run_wait_for_work(bool no_tiles) progress.add_skip_time(pause_timer, params.background); } - update_status_time(pause_, no_tiles); + update_status_time(pause_, no_work); progress.set_update(); } new_work_added_ = false; - return no_tiles; + return no_work; } -bool Session::draw(BufferParams &buffer_params, DeviceDrawParams &draw_params) +void Session::draw() { - if (device_use_gl_) - return draw_gpu(buffer_params, draw_params); - else - return draw_cpu(buffer_params, draw_params); + path_trace_->draw(); } -void Session::reset_(BufferParams &buffer_params, int samples) +int2 Session::get_effective_tile_size() const { - if (buffers && buffer_params.modified(tile_manager.params)) { - gpu_draw_ready_ = false; - buffers->reset(buffer_params); - if (display) { - display->reset(buffer_params); - } + /* No support yet for baking with tiles. */ + if (!params.use_auto_tile || scene->bake_manager->get_baking()) { + return make_int2(buffer_params_.width, buffer_params_.height); } - tile_manager.reset(buffer_params, samples); - stealable_tiles_ = 0; - tile_stealing_state_ = NOT_STEALING; - progress.reset_sample(); + /* TODO(sergey): Take available memory into account, and if there is enough memory do not tile + * and prefer optimal performance. */ + + return make_int2(params.tile_size, params.tile_size); +} + +void Session::do_delayed_reset() +{ + if (!delayed_reset_.do_reset) { + return; + } + delayed_reset_.do_reset = false; + + params = delayed_reset_.session_params; + buffer_params_ = delayed_reset_.buffer_params; + + /* Store parameters used for buffers access outside of scene graph. */ + buffer_params_.samples = params.samples; + buffer_params_.exposure = scene->film->get_exposure(); + buffer_params_.use_approximate_shadow_catcher = + scene->film->get_use_approximate_shadow_catcher(); + buffer_params_.use_transparent_background = scene->background->get_transparent(); - bool show_progress = params.background || tile_manager.get_num_effective_samples() != INT_MAX; - progress.set_total_pixel_samples(show_progress ? tile_manager.state.total_pixel_samples : 0); + /* Tile and work scheduling. */ + tile_manager_.reset_scheduling(buffer_params_, get_effective_tile_size()); + render_scheduler_.reset(buffer_params_, params.samples); - if (!params.background) + /* Passes. */ + /* When multiple tiles are used SAMPLE_COUNT pass is used to keep track of possible partial + * tile results. It is safe to use generic update function here which checks for changes since + * changes in tile settings re-creates session, which ensures film is fully updated on tile + * changes. */ + scene->film->update_passes(scene, tile_manager_.has_multiple_tiles()); + + /* Update for new state of scene and passes. */ + buffer_params_.update_passes(scene->passes); + tile_manager_.update(buffer_params_, scene); + + /* Progress. */ + progress.reset_sample(); + progress.set_total_pixel_samples(buffer_params_.width * buffer_params_.height * params.samples); + + if (!params.background) { progress.set_start_time(); + } progress.set_render_start_time(); } -void Session::reset(BufferParams &buffer_params, int samples) +void Session::reset(const SessionParams &session_params, const BufferParams &buffer_params) { - if (device_use_gl_) - reset_gpu(buffer_params, samples); - else - reset_cpu(buffer_params, samples); + { + thread_scoped_lock reset_lock(delayed_reset_.mutex); + thread_scoped_lock pause_lock(pause_mutex_); + + delayed_reset_.do_reset = true; + delayed_reset_.session_params = session_params; + delayed_reset_.buffer_params = buffer_params; + + path_trace_->cancel(); + } + + pause_cond_.notify_all(); } void Session::set_samples(int samples) @@ -915,7 +461,22 @@ void Session::set_samples(int samples) } params.samples = samples; - tile_manager.set_samples(samples); + + { + thread_scoped_lock pause_lock(pause_mutex_); + new_work_added_ = true; + } + + pause_cond_.notify_all(); +} + +void Session::set_time_limit(double time_limit) +{ + if (time_limit == params.time_limit) { + return; + } + + params.time_limit = time_limit; { thread_scoped_lock pause_lock(pause_mutex_); @@ -948,38 +509,9 @@ void Session::set_pause(bool pause) } } -void Session::set_denoising(const DenoiseParams &denoising) +void Session::set_gpu_display(unique_ptr<GPUDisplay> gpu_display) { - bool need_denoise = denoising.need_denoising_task(); - - /* Lock buffers so no denoising operation is triggered while the settings are changed here. */ - thread_scoped_lock buffers_lock(buffers_mutex_); - params.denoising = denoising; - - if (!(params.device.denoisers & denoising.type)) { - if (need_denoise) { - progress.set_error("Denoiser type not supported by compute device"); - } - - params.denoising.use = false; - need_denoise = false; - } - - // TODO(pmours): Query the required overlap value for denoising from the device? - tile_manager.slice_overlap = need_denoise && !params.background ? 64 : 0; - - /* Schedule per tile denoising for final renders if we are either denoising or - * need prefiltered passes for the native denoiser. */ - tile_manager.schedule_denoising = need_denoise && !buffers; -} - -void Session::set_denoising_start_sample(int sample) -{ - if (sample != params.denoising.start_sample) { - params.denoising.start_sample = sample; - - pause_cond_.notify_all(); - } + path_trace_->set_gpu_display(move(gpu_display)); } void Session::wait() @@ -989,81 +521,67 @@ void Session::wait() delete session_thread_; } - session_thread_ = NULL; + session_thread_ = nullptr; } -bool Session::update_scene() +bool Session::update_scene(int width, int height) { - /* update camera if dimensions changed for progressive render. the camera + /* Update camera if dimensions changed for progressive render. the camera * knows nothing about progressive or cropped rendering, it just gets the - * image dimensions passed in */ + * image dimensions passed in. */ Camera *cam = scene->camera; - int width = tile_manager.state.buffer.full_width; - int height = tile_manager.state.buffer.full_height; - int resolution = tile_manager.state.resolution_divider; - - cam->set_screen_size_and_resolution(width, height, resolution); + cam->set_screen_size(width, height); - /* number of samples is needed by multi jittered - * sampling pattern and by baking */ - Integrator *integrator = scene->integrator; - BakeManager *bake_manager = scene->bake_manager; + /* First detect which kernel features are used and allocate working memory. + * This helps estimate how may device memory is available for the scene and + * how much we need to allocate on the host instead. */ + scene->update_kernel_features(); - if (integrator->get_sampling_pattern() != SAMPLING_PATTERN_SOBOL || bake_manager->get_baking()) { - integrator->set_aa_samples(tile_manager.num_samples); - } + path_trace_->load_kernels(); + path_trace_->alloc_work_memory(); - bool kernel_switch_needed = false; - if (scene->update(progress, kernel_switch_needed)) { - if (kernel_switch_needed) { - reset(tile_manager.params, params.samples); - } + if (scene->update(progress)) { return true; } + return false; } +static string status_append(const string &status, const string &suffix) +{ + string prefix = status; + if (!prefix.empty()) { + prefix += ", "; + } + return prefix + suffix; +} + void Session::update_status_time(bool show_pause, bool show_done) { - int progressive_sample = tile_manager.state.sample; - int num_samples = tile_manager.get_num_effective_samples(); + string status, substatus; - int tile = progress.get_rendered_tiles(); - int num_tiles = tile_manager.state.num_tiles; + const int current_tile = progress.get_rendered_tiles(); + const int num_tiles = tile_manager_.get_num_tiles(); - /* update status */ - string status, substatus; + const int current_sample = progress.get_current_sample(); + const int num_samples = render_scheduler_.get_num_samples(); - if (!params.progressive) { - const bool is_cpu = params.device.type == DEVICE_CPU; - const bool rendering_finished = (tile == num_tiles); - const bool is_last_tile = (tile + 1) == num_tiles; - - substatus = string_printf("Rendered %d/%d Tiles", tile, num_tiles); - - if (!rendering_finished && (device->show_samples() || (is_cpu && is_last_tile))) { - /* Some devices automatically support showing the sample number: - * - CUDADevice - * - OpenCLDevice when using the megakernel (the split kernel renders multiple - * samples at the same time, so the current sample isn't really defined) - * - CPUDevice when using one thread - * For these devices, the current sample is always shown. - * - * The other option is when the last tile is currently being rendered by the CPU. - */ - substatus += string_printf(", Sample %d/%d", progress.get_current_sample(), num_samples); - } - if (params.denoising.use && params.denoising.type != DENOISER_OPENIMAGEDENOISE) { - substatus += string_printf(", Denoised %d tiles", progress.get_denoised_tiles()); - } - else if (params.denoising.store_passes && params.denoising.type == DENOISER_NLM) { - substatus += string_printf(", Prefiltered %d tiles", progress.get_denoised_tiles()); - } + /* TIle. */ + if (tile_manager_.has_multiple_tiles()) { + substatus = status_append(substatus, + string_printf("Rendered %d/%d Tiles", current_tile, num_tiles)); } - else if (tile_manager.num_samples == Integrator::MAX_SAMPLES) - substatus = string_printf("Path Tracing Sample %d", progressive_sample + 1); - else - substatus = string_printf("Path Tracing Sample %d/%d", progressive_sample + 1, num_samples); + + /* Sample. */ + if (num_samples == Integrator::MAX_SAMPLES) { + substatus = status_append(substatus, string_printf("Sample %d", current_sample)); + } + else { + substatus = status_append(substatus, + string_printf("Sample %d/%d", current_sample, num_samples)); + } + + /* TODO(sergey): Denoising status from the path trace. */ if (show_pause) { status = "Rendering Paused"; @@ -1080,210 +598,122 @@ void Session::update_status_time(bool show_pause, bool show_done) progress.set_status(status, substatus); } -bool Session::render_need_denoise(bool &delayed) +void Session::device_free() { - delayed = false; - - /* Not supported yet for baking. */ - if (read_bake_tile_cb) { - return false; - } - - /* Denoising enabled? */ - if (!params.denoising.need_denoising_task()) { - return false; - } - - if (params.background) { - /* Background render, only denoise when rendering the last sample. */ - return tile_manager.done(); - } - - /* Viewport render. */ - - /* It can happen that denoising was already enabled, but the scene still needs an update. */ - if (scene->film->is_modified() || !scene->film->get_denoising_data_offset()) { - return false; - } + scene->device_free(); + path_trace_->device_free(); +} - /* Immediately denoise when we reach the start sample or last sample. */ - const int num_samples_finished = tile_manager.state.sample + 1; - if (num_samples_finished == params.denoising.start_sample || - num_samples_finished == params.samples) { - return true; +void Session::collect_statistics(RenderStats *render_stats) +{ + scene->collect_statistics(render_stats); + if (params.use_profiling && (params.device.type == DEVICE_CPU)) { + render_stats->collect_profiling(scene, profiler); } +} - /* Do not denoise until the sample at which denoising should start is reached. */ - if (num_samples_finished < params.denoising.start_sample) { - return false; - } +/* -------------------------------------------------------------------- + * Tile and tile pixels access. + */ - /* Avoid excessive denoising in viewport after reaching a certain amount of samples. */ - delayed = (tile_manager.state.sample >= 20 && - (time_dt() - last_display_time_) < params.progressive_update_timeout); - return !delayed; +bool Session::has_multiple_render_tiles() const +{ + return tile_manager_.has_multiple_tiles(); } -void Session::render(bool need_denoise) +int2 Session::get_render_tile_size() const { - if (buffers && tile_manager.state.sample == tile_manager.range_start_sample) { - /* Clear buffers. */ - buffers->zero(); - } - - if (tile_manager.state.buffer.width == 0 || tile_manager.state.buffer.height == 0) { - return; /* Avoid empty launches. */ - } + return path_trace_->get_render_tile_size(); +} - /* Add path trace task. */ - DeviceTask task(DeviceTask::RENDER); - - task.acquire_tile = function_bind(&Session::acquire_tile, this, _2, _1, _3); - task.release_tile = function_bind(&Session::release_tile, this, _1, need_denoise); - task.map_neighbor_tiles = function_bind(&Session::map_neighbor_tiles, this, _1, _2); - task.unmap_neighbor_tiles = function_bind(&Session::unmap_neighbor_tiles, this, _1, _2); - task.get_cancel = function_bind(&Progress::get_cancel, &this->progress); - task.update_tile_sample = function_bind(&Session::update_tile_sample, this, _1); - task.update_progress_sample = function_bind(&Progress::add_samples, &this->progress, _1, _2); - task.get_tile_stolen = function_bind(&Session::get_tile_stolen, this); - task.need_finish_queue = params.progressive_refine; - task.integrator_branched = scene->integrator->get_method() == Integrator::BRANCHED_PATH; - - task.adaptive_sampling.use = (scene->integrator->get_sampling_pattern() == - SAMPLING_PATTERN_PMJ) && - scene->dscene.data.film.pass_adaptive_aux_buffer; - task.adaptive_sampling.min_samples = scene->dscene.data.integrator.adaptive_min_samples; - task.adaptive_sampling.adaptive_step = scene->dscene.data.integrator.adaptive_step; - - /* Acquire render tiles by default. */ - task.tile_types = RenderTile::PATH_TRACE; - - if (need_denoise) { - task.denoising = params.denoising; - - task.pass_stride = scene->film->get_pass_stride(); - task.target_pass_stride = task.pass_stride; - task.pass_denoising_data = scene->film->get_denoising_data_offset(); - task.pass_denoising_clean = scene->film->get_denoising_clean_offset(); - - task.denoising_from_render = true; - - if (tile_manager.schedule_denoising) { - /* Acquire denoising tiles during rendering. */ - task.tile_types |= RenderTile::DENOISE; - } - else { - assert(buffers); - - /* Schedule rendering and wait for it to finish. */ - device->task_add(task); - device->task_wait(); - - /* Then run denoising on the whole image at once. */ - task.type = DeviceTask::DENOISE_BUFFER; - task.x = tile_manager.state.buffer.full_x; - task.y = tile_manager.state.buffer.full_y; - task.w = tile_manager.state.buffer.width; - task.h = tile_manager.state.buffer.height; - task.buffer = buffers->buffer.device_pointer; - task.sample = tile_manager.state.sample; - task.num_samples = tile_manager.state.num_samples; - tile_manager.state.buffer.get_offset_stride(task.offset, task.stride); - task.buffers = buffers; - } - } +int2 Session::get_render_tile_offset() const +{ + return path_trace_->get_render_tile_offset(); +} - device->task_add(task); +string_view Session::get_render_tile_layer() const +{ + const BufferParams &buffer_params = path_trace_->get_render_tile_params(); + return buffer_params.layer; } -void Session::copy_to_display_buffer(int sample) +string_view Session::get_render_tile_view() const { - /* add film conversion task */ - DeviceTask task(DeviceTask::FILM_CONVERT); - - task.x = tile_manager.state.buffer.full_x; - task.y = tile_manager.state.buffer.full_y; - task.w = tile_manager.state.buffer.width; - task.h = tile_manager.state.buffer.height; - task.rgba_byte = display->rgba_byte.device_pointer; - task.rgba_half = display->rgba_half.device_pointer; - task.buffer = buffers->buffer.device_pointer; - task.sample = sample; - tile_manager.state.buffer.get_offset_stride(task.offset, task.stride); - - if (task.w > 0 && task.h > 0) { - device->task_add(task); - device->task_wait(); - - /* set display to new size */ - display->draw_set(task.w, task.h); - - last_display_time_ = time_dt(); - } + const BufferParams &buffer_params = path_trace_->get_render_tile_params(); + return buffer_params.view; +} - display_outdated_ = false; +bool Session::copy_render_tile_from_device() +{ + return path_trace_->copy_render_tile_from_device(); } -bool Session::update_progressive_refine(bool cancel) +bool Session::get_render_tile_pixels(const string &pass_name, int num_components, float *pixels) { - int sample = tile_manager.state.sample + 1; - bool write = sample == tile_manager.num_samples || cancel; + /* NOTE: The code relies on a fact that session is fully update and no scene/buffer modification + * is happening while this function runs. */ - double current_time = time_dt(); + const BufferParams &buffer_params = path_trace_->get_render_tile_params(); - if (current_time - last_update_time_ < params.progressive_update_timeout) { - /* If last sample was processed, we need to write buffers anyway. */ - if (!write && sample != 1) - return false; + const BufferPass *pass = buffer_params.find_pass(pass_name); + if (pass == nullptr) { + return false; } - if (params.progressive_refine) { - foreach (Tile &tile, tile_manager.state.tiles) { - if (!tile.buffers) { - continue; - } - - RenderTile rtile; - rtile.x = tile_manager.state.buffer.full_x + tile.x; - rtile.y = tile_manager.state.buffer.full_y + tile.y; - rtile.w = tile.w; - rtile.h = tile.h; - rtile.sample = sample; - rtile.buffers = tile.buffers; - - if (write) { - if (write_render_tile_cb) - write_render_tile_cb(rtile); - } - else { - if (update_render_tile_cb) - update_render_tile_cb(rtile, true); - } + const bool has_denoised_result = path_trace_->has_denoised_result(); + if (pass->mode == PassMode::DENOISED && !has_denoised_result) { + pass = buffer_params.find_pass(pass->type); + if (pass == nullptr) { + /* Happens when denoised result pass is requested but is never written by the kernel. */ + return false; } } - last_update_time_ = current_time; + pass = buffer_params.get_actual_display_pass(pass); + + const float exposure = buffer_params.exposure; + const int num_samples = path_trace_->get_num_render_tile_samples(); - return write; + PassAccessor::PassAccessInfo pass_access_info(*pass); + pass_access_info.use_approximate_shadow_catcher = buffer_params.use_approximate_shadow_catcher; + pass_access_info.use_approximate_shadow_catcher_background = + pass_access_info.use_approximate_shadow_catcher && !buffer_params.use_transparent_background; + + const PassAccessorCPU pass_accessor(pass_access_info, exposure, num_samples); + const PassAccessor::Destination destination(pixels, num_components); + + return path_trace_->get_render_tile_pixels(pass_accessor, destination); } -void Session::device_free() +bool Session::set_render_tile_pixels(const string &pass_name, + int num_components, + const float *pixels) { - scene->device_free(); + /* NOTE: The code relies on a fact that session is fully update and no scene/buffer modification + * is happening while this function runs. */ + + const BufferPass *pass = buffer_params_.find_pass(pass_name); + if (!pass) { + return false; + } + + const float exposure = scene->film->get_exposure(); + const int num_samples = render_scheduler_.get_num_rendered_samples(); - tile_manager.device_free(); + const PassAccessor::PassAccessInfo pass_access_info(*pass); + PassAccessorCPU pass_accessor(pass_access_info, exposure, num_samples); + PassAccessor::Source source(pixels, num_components); - /* used from background render only, so no need to - * re-create render/display buffers here - */ + return path_trace_->set_render_tile_pixels(pass_accessor, source); } -void Session::collect_statistics(RenderStats *render_stats) +/* -------------------------------------------------------------------- + * Full-frame on-disk storage. + */ + +void Session::process_full_buffer_from_disk(string_view filename) { - scene->collect_statistics(render_stats); - if (params.use_profiling && (params.device.type == DEVICE_CPU)) { - render_stats->collect_profiling(scene, profiler); - } + path_trace_->process_full_buffer_from_disk(filename); } CCL_NAMESPACE_END diff --git a/intern/cycles/render/session.h b/intern/cycles/render/session.h index 05025c10f9c..5623604bfe8 100644 --- a/intern/cycles/render/session.h +++ b/intern/cycles/render/session.h @@ -18,6 +18,7 @@ #define __SESSION_H__ #include "device/device.h" +#include "integrator/render_scheduler.h" #include "render/buffers.h" #include "render/shader.h" #include "render/stats.h" @@ -26,6 +27,7 @@ #include "util/util_progress.h" #include "util/util_stats.h" #include "util/util_thread.h" +#include "util/util_unique_ptr.h" #include "util/util_vector.h" CCL_NAMESPACE_BEGIN @@ -33,41 +35,35 @@ CCL_NAMESPACE_BEGIN class BufferParams; class Device; class DeviceScene; -class DeviceRequestedFeatures; -class DisplayBuffer; +class PathTrace; class Progress; +class GPUDisplay; class RenderBuffers; class Scene; +class SceneParams; /* Session Parameters */ class SessionParams { public: DeviceInfo device; + + bool headless; bool background; - bool progressive_refine; - bool progressive; bool experimental; int samples; - int2 tile_size; - TileOrder tile_order; - int start_resolution; - int denoising_start_sample; int pixel_size; int threads; - bool adaptive_sampling; - - bool use_profiling; - bool display_buffer_linear; + /* Limit in seconds for how long path tracing is allowed to happen. + * Zero means no limit is applied. */ + double time_limit; - DenoiseParams denoising; + bool use_profiling; - double cancel_timeout; - double reset_timeout; - double text_timeout; - double progressive_update_timeout; + bool use_auto_tile; + int tile_size; ShadingSystem shadingsystem; @@ -75,50 +71,32 @@ class SessionParams { SessionParams() { + headless = false; background = false; - progressive_refine = false; - progressive = false; experimental = false; samples = 1024; - tile_size = make_int2(64, 64); - start_resolution = INT_MAX; - denoising_start_sample = 0; pixel_size = 1; threads = 0; - adaptive_sampling = false; + time_limit = 0.0; use_profiling = false; - display_buffer_linear = false; - - cancel_timeout = 0.1; - reset_timeout = 0.1; - text_timeout = 1.0; - progressive_update_timeout = 1.0; + use_auto_tile = true; + tile_size = 2048; shadingsystem = SHADINGSYSTEM_SVM; - tile_order = TILE_CENTER; } - bool modified(const SessionParams ¶ms) + bool modified(const SessionParams ¶ms) const { /* Modified means we have to recreate the session, any parameter changes * that can be handled by an existing Session are omitted. */ - return !(device == params.device && background == params.background && - progressive_refine == params.progressive_refine && - progressive == params.progressive && experimental == params.experimental && - tile_size == params.tile_size && start_resolution == params.start_resolution && + return !(device == params.device && headless == params.headless && + background == params.background && experimental == params.experimental && pixel_size == params.pixel_size && threads == params.threads && - adaptive_sampling == params.adaptive_sampling && - use_profiling == params.use_profiling && - display_buffer_linear == params.display_buffer_linear && - cancel_timeout == params.cancel_timeout && reset_timeout == params.reset_timeout && - text_timeout == params.text_timeout && - progressive_update_timeout == params.progressive_update_timeout && - tile_order == params.tile_order && shadingsystem == params.shadingsystem && - denoising.type == params.denoising.type && - (denoising.use == params.denoising.use || (device.denoisers & denoising.type))); + use_profiling == params.use_profiling && shadingsystem == params.shadingsystem && + use_auto_tile == params.use_auto_tile && tile_size == params.tile_size); } }; @@ -131,34 +109,41 @@ class Session { public: Device *device; Scene *scene; - RenderBuffers *buffers; - DisplayBuffer *display; Progress progress; SessionParams params; - TileManager tile_manager; Stats stats; Profiler profiler; - function<void(RenderTile &)> write_render_tile_cb; - function<void(RenderTile &, bool)> update_render_tile_cb; - function<void(RenderTile &)> read_bake_tile_cb; + function<void(void)> write_render_tile_cb; + function<void(void)> update_render_tile_cb; + function<void(void)> read_render_tile_cb; + + /* Callback is invoked by tile manager whenever on-dist tiles storage file is closed after + * writing. Allows an engine integration to keep track of those files without worry about + * transferring the information when it needs to re-create session during rendering. */ + function<void(string_view)> full_buffer_written_cb; - explicit Session(const SessionParams ¶ms); + explicit Session(const SessionParams ¶ms, const SceneParams &scene_params); ~Session(); void start(); - void cancel(); - bool draw(BufferParams ¶ms, DeviceDrawParams &draw_params); + + /* When quick cancel is requested path tracing is cancels as soon as possible, without waiting + * for the buffer to be uniformly sampled. */ + void cancel(bool quick = false); + + void draw(); void wait(); bool ready_to_reset(); - void reset(BufferParams ¶ms, int samples); + void reset(const SessionParams &session_params, const BufferParams &buffer_params); + void set_pause(bool pause); + void set_samples(int samples); - void set_denoising(const DenoiseParams &denoising); - void set_denoising_start_sample(int sample); + void set_time_limit(double time_limit); - bool update_scene(); + void set_gpu_display(unique_ptr<GPUDisplay> gpu_display); void device_free(); @@ -168,83 +153,95 @@ class Session { void collect_statistics(RenderStats *stats); - protected: - struct DelayedReset { - thread_mutex mutex; - bool do_reset; - BufferParams params; - int samples; - } delayed_reset_; + /* -------------------------------------------------------------------- + * Tile and tile pixels access. + */ - void run(); + bool has_multiple_render_tiles() const; - bool run_update_for_next_iteration(); - bool run_wait_for_work(bool no_tiles); + /* Get size and offset (relative to the buffer's full x/y) of the currently rendering tile. */ + int2 get_render_tile_size() const; + int2 get_render_tile_offset() const; - void update_status_time(bool show_pause = false, bool show_done = false); + string_view get_render_tile_layer() const; + string_view get_render_tile_view() const; - void render(bool use_denoise); - void copy_to_display_buffer(int sample); + bool copy_render_tile_from_device(); - void reset_(BufferParams ¶ms, int samples); + bool get_render_tile_pixels(const string &pass_name, int num_components, float *pixels); + bool set_render_tile_pixels(const string &pass_name, int num_components, const float *pixels); - void run_cpu(); - bool draw_cpu(BufferParams ¶ms, DeviceDrawParams &draw_params); - void reset_cpu(BufferParams ¶ms, int samples); + /* -------------------------------------------------------------------- + * Full-frame on-disk storage. + */ - void run_gpu(); - bool draw_gpu(BufferParams ¶ms, DeviceDrawParams &draw_params); - void reset_gpu(BufferParams ¶ms, int samples); + /* Read given full-frame file from disk, perform needed processing and write it to the software + * via the write callback. */ + void process_full_buffer_from_disk(string_view filename); - bool render_need_denoise(bool &delayed); + protected: + struct DelayedReset { + thread_mutex mutex; + bool do_reset; + SessionParams session_params; + BufferParams buffer_params; + } delayed_reset_; - bool steal_tile(RenderTile &tile, Device *tile_device, thread_scoped_lock &tile_lock); - bool get_tile_stolen(); - bool acquire_tile(RenderTile &tile, Device *tile_device, uint tile_types); - void update_tile_sample(RenderTile &tile); - void release_tile(RenderTile &tile, const bool need_denoise); + void run(); - void map_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device); - void unmap_neighbor_tiles(RenderTileNeighbors &neighbors, Device *tile_device); + /* Update for the new iteration of the main loop in run implementation (run_cpu and run_gpu). + * + * Will take care of the following things: + * - Delayed reset + * - Scene update + * - Tile manager advance + * - Render scheduler work request + * + * The updates are done in a proper order with proper locking around them, which guarantees + * that the device side of scene and render buffers are always in a consistent state. + * + * Returns render work which is to be rendered next. */ + RenderWork run_update_for_next_iteration(); + + /* Wait for rendering to be unpaused, or for new tiles for render to arrive. + * Returns true if new main render loop iteration is required after this function call. + * + * The `render_work` is the work which was scheduled by the render scheduler right before + * checking the pause. */ + bool run_wait_for_work(const RenderWork &render_work); + + void run_main_render_loop(); + + bool update_scene(int width, int height); - bool device_use_gl_; + void update_status_time(bool show_pause = false, bool show_done = false); - thread *session_thread_; + void do_delayed_reset(); - volatile bool display_outdated_; + int2 get_effective_tile_size() const; - volatile bool gpu_draw_ready_; - volatile bool gpu_need_display_buffer_update_; - thread_condition_variable gpu_need_display_buffer_update_cond_; + thread *session_thread_; - bool pause_; - bool cancel_; - bool new_work_added_; + bool pause_ = false; + bool cancel_ = false; + bool new_work_added_ = false; thread_condition_variable pause_cond_; thread_mutex pause_mutex_; thread_mutex tile_mutex_; thread_mutex buffers_mutex_; - thread_mutex display_mutex_; - thread_condition_variable denoising_cond_; - thread_condition_variable tile_steal_cond_; - - double reset_time_; - double last_update_time_; - double last_display_time_; - - RenderTile stolen_tile_; - typedef enum { - NOT_STEALING, /* There currently is no tile stealing in progress. */ - WAITING_FOR_TILE, /* A device is waiting for another device to release a tile. */ - RELEASING_TILE, /* A device has releasing a stealable tile. */ - GOT_TILE /* A device has released a stealable tile, which is now stored in stolen_tile. */ - } TileStealingState; - std::atomic<TileStealingState> tile_stealing_state_; - int stealable_tiles_; - - /* progressive refine */ - bool update_progressive_refine(bool cancel); + + TileManager tile_manager_; + BufferParams buffer_params_; + + /* Render scheduler is used to get work to be rendered with the current big tile. */ + RenderScheduler render_scheduler_; + + /* Path tracer object. + * + * Is a single full-frame path tracer for interactive viewport rendering. + * A path tracer for the current big-tile for an offline rendering. */ + unique_ptr<PathTrace> path_trace_; }; CCL_NAMESPACE_END diff --git a/intern/cycles/render/shader.cpp b/intern/cycles/render/shader.cpp index 59b60904746..f6b23606e58 100644 --- a/intern/cycles/render/shader.cpp +++ b/intern/cycles/render/shader.cpp @@ -203,6 +203,7 @@ Shader::Shader() : Node(get_node_type()) has_surface = false; has_surface_transparent = false; has_surface_emission = false; + has_surface_raytrace = false; has_surface_bssrdf = false; has_volume = false; has_displacement = false; @@ -485,7 +486,7 @@ void ShaderManager::device_update(Device *device, device_update_specific(device, dscene, scene, progress); } -void ShaderManager::device_update_common(Device *device, +void ShaderManager::device_update_common(Device * /*device*/, DeviceScene *dscene, Scene *scene, Progress & /*progress*/) @@ -508,6 +509,8 @@ void ShaderManager::device_update_common(Device *device, flag |= SD_HAS_EMISSION; if (shader->has_surface_transparent && shader->get_use_transparent_shadow()) flag |= SD_HAS_TRANSPARENT_SHADOW; + if (shader->has_surface_raytrace) + flag |= SD_HAS_RAYTRACE; if (shader->has_volume) { flag |= SD_HAS_VOLUME; has_volumes = true; @@ -528,12 +531,10 @@ void ShaderManager::device_update_common(Device *device, flag |= SD_NEED_VOLUME_ATTRIBUTES; if (shader->has_bssrdf_bump) flag |= SD_HAS_BSSRDF_BUMP; - if (device->info.has_volume_decoupled) { - if (shader->get_volume_sampling_method() == VOLUME_SAMPLING_EQUIANGULAR) - flag |= SD_VOLUME_EQUIANGULAR; - if (shader->get_volume_sampling_method() == VOLUME_SAMPLING_MULTIPLE_IMPORTANCE) - flag |= SD_VOLUME_MIS; - } + if (shader->get_volume_sampling_method() == VOLUME_SAMPLING_EQUIANGULAR) + flag |= SD_VOLUME_EQUIANGULAR; + if (shader->get_volume_sampling_method() == VOLUME_SAMPLING_MULTIPLE_IMPORTANCE) + flag |= SD_VOLUME_MIS; if (shader->get_volume_interpolation_method() == VOLUME_INTERPOLATION_CUBIC) flag |= SD_VOLUME_CUBIC; if (shader->has_bump) @@ -682,39 +683,35 @@ void ShaderManager::add_default(Scene *scene) } } -void ShaderManager::get_requested_graph_features(ShaderGraph *graph, - DeviceRequestedFeatures *requested_features) +uint ShaderManager::get_graph_kernel_features(ShaderGraph *graph) { + uint kernel_features = 0; + foreach (ShaderNode *node, graph->nodes) { - requested_features->max_nodes_group = max(requested_features->max_nodes_group, - node->get_group()); - requested_features->nodes_features |= node->get_feature(); + kernel_features |= node->get_feature(); if (node->special_type == SHADER_SPECIAL_TYPE_CLOSURE) { BsdfBaseNode *bsdf_node = static_cast<BsdfBaseNode *>(node); if (CLOSURE_IS_VOLUME(bsdf_node->get_closure_type())) { - requested_features->nodes_features |= NODE_FEATURE_VOLUME; + kernel_features |= KERNEL_FEATURE_NODE_VOLUME; } else if (CLOSURE_IS_PRINCIPLED(bsdf_node->get_closure_type())) { - requested_features->use_principled = true; + kernel_features |= KERNEL_FEATURE_PRINCIPLED; } } if (node->has_surface_bssrdf()) { - requested_features->use_subsurface = true; + kernel_features |= KERNEL_FEATURE_SUBSURFACE; } if (node->has_surface_transparent()) { - requested_features->use_transparent = true; - } - if (node->has_raytrace()) { - requested_features->use_shader_raytrace = true; + kernel_features |= KERNEL_FEATURE_TRANSPARENT; } } + + return kernel_features; } -void ShaderManager::get_requested_features(Scene *scene, - DeviceRequestedFeatures *requested_features) +uint ShaderManager::get_kernel_features(Scene *scene) { - requested_features->max_nodes_group = NODE_GROUP_LEVEL_0; - requested_features->nodes_features = 0; + uint kernel_features = KERNEL_FEATURE_NODE_BSDF | KERNEL_FEATURE_NODE_EMISSION; for (int i = 0; i < scene->shaders.size(); i++) { Shader *shader = scene->shaders[i]; if (!shader->reference_count()) { @@ -722,21 +719,22 @@ void ShaderManager::get_requested_features(Scene *scene, } /* Gather requested features from all the nodes from the graph nodes. */ - get_requested_graph_features(shader->graph, requested_features); + kernel_features |= get_graph_kernel_features(shader->graph); ShaderNode *output_node = shader->graph->output(); if (output_node->input("Displacement")->link != NULL) { - requested_features->nodes_features |= NODE_FEATURE_BUMP; + kernel_features |= KERNEL_FEATURE_NODE_BUMP; if (shader->get_displacement_method() == DISPLACE_BOTH) { - requested_features->nodes_features |= NODE_FEATURE_BUMP_STATE; - requested_features->max_nodes_group = max(requested_features->max_nodes_group, - NODE_GROUP_LEVEL_1); + kernel_features |= KERNEL_FEATURE_NODE_BUMP_STATE; } } /* On top of volume nodes, also check if we need volume sampling because - * e.g. an Emission node would slip through the NODE_FEATURE_VOLUME check */ - if (shader->has_volume) - requested_features->use_volume |= true; + * e.g. an Emission node would slip through the KERNEL_FEATURE_NODE_VOLUME check */ + if (shader->has_volume) { + kernel_features |= KERNEL_FEATURE_VOLUME; + } } + + return kernel_features; } void ShaderManager::free_memory() diff --git a/intern/cycles/render/shader.h b/intern/cycles/render/shader.h index c65cac351a4..5f9adea3949 100644 --- a/intern/cycles/render/shader.h +++ b/intern/cycles/render/shader.h @@ -38,7 +38,6 @@ CCL_NAMESPACE_BEGIN class Device; class DeviceScene; -class DeviceRequestedFeatures; class Mesh; class Progress; class Scene; @@ -117,6 +116,7 @@ class Shader : public Node { bool has_surface; bool has_surface_emission; bool has_surface_transparent; + bool has_surface_raytrace; bool has_volume; bool has_displacement; bool has_surface_bssrdf; @@ -216,7 +216,7 @@ class ShaderManager { static void add_default(Scene *scene); /* Selective nodes compilation. */ - void get_requested_features(Scene *scene, DeviceRequestedFeatures *requested_features); + uint get_kernel_features(Scene *scene); static void free_memory(); @@ -244,8 +244,7 @@ class ShaderManager { size_t beckmann_table_offset; - void get_requested_graph_features(ShaderGraph *graph, - DeviceRequestedFeatures *requested_features); + uint get_graph_kernel_features(ShaderGraph *graph); thread_spin_lock attribute_lock_; diff --git a/intern/cycles/render/stats.cpp b/intern/cycles/render/stats.cpp index 2c6273842e2..73eb7e21ff9 100644 --- a/intern/cycles/render/stats.cpp +++ b/intern/cycles/render/stats.cpp @@ -264,53 +264,34 @@ void RenderStats::collect_profiling(Scene *scene, Profiler &prof) has_profiling = true; kernel = NamedNestedSampleStats("Total render time", prof.get_event(PROFILING_UNKNOWN)); - kernel.add_entry("Ray setup", prof.get_event(PROFILING_RAY_SETUP)); - kernel.add_entry("Result writing", prof.get_event(PROFILING_WRITE_RESULT)); - - NamedNestedSampleStats &integrator = kernel.add_entry("Path integration", - prof.get_event(PROFILING_PATH_INTEGRATE)); - integrator.add_entry("Scene intersection", prof.get_event(PROFILING_SCENE_INTERSECT)); - integrator.add_entry("Indirect emission", prof.get_event(PROFILING_INDIRECT_EMISSION)); - integrator.add_entry("Volumes", prof.get_event(PROFILING_VOLUME)); - - NamedNestedSampleStats &shading = integrator.add_entry("Shading", 0); - shading.add_entry("Shader Setup", prof.get_event(PROFILING_SHADER_SETUP)); - shading.add_entry("Shader Eval", prof.get_event(PROFILING_SHADER_EVAL)); - shading.add_entry("Shader Apply", prof.get_event(PROFILING_SHADER_APPLY)); - shading.add_entry("Ambient Occlusion", prof.get_event(PROFILING_AO)); - shading.add_entry("Subsurface", prof.get_event(PROFILING_SUBSURFACE)); - - integrator.add_entry("Connect Light", prof.get_event(PROFILING_CONNECT_LIGHT)); - integrator.add_entry("Surface Bounce", prof.get_event(PROFILING_SURFACE_BOUNCE)); - - NamedNestedSampleStats &intersection = kernel.add_entry("Intersection", 0); - intersection.add_entry("Full Intersection", prof.get_event(PROFILING_INTERSECT)); - intersection.add_entry("Local Intersection", prof.get_event(PROFILING_INTERSECT_LOCAL)); - intersection.add_entry("Shadow All Intersection", - prof.get_event(PROFILING_INTERSECT_SHADOW_ALL)); - intersection.add_entry("Volume Intersection", prof.get_event(PROFILING_INTERSECT_VOLUME)); - intersection.add_entry("Volume All Intersection", - prof.get_event(PROFILING_INTERSECT_VOLUME_ALL)); - - NamedNestedSampleStats &closure = kernel.add_entry("Closures", 0); - closure.add_entry("Surface Closure Evaluation", prof.get_event(PROFILING_CLOSURE_EVAL)); - closure.add_entry("Surface Closure Sampling", prof.get_event(PROFILING_CLOSURE_SAMPLE)); - closure.add_entry("Volume Closure Evaluation", prof.get_event(PROFILING_CLOSURE_VOLUME_EVAL)); - closure.add_entry("Volume Closure Sampling", prof.get_event(PROFILING_CLOSURE_VOLUME_SAMPLE)); - - NamedNestedSampleStats &denoising = kernel.add_entry("Denoising", - prof.get_event(PROFILING_DENOISING)); - denoising.add_entry("Construct Transform", - prof.get_event(PROFILING_DENOISING_CONSTRUCT_TRANSFORM)); - denoising.add_entry("Reconstruct", prof.get_event(PROFILING_DENOISING_RECONSTRUCT)); - - NamedNestedSampleStats &prefilter = denoising.add_entry("Prefiltering", 0); - prefilter.add_entry("Divide Shadow", prof.get_event(PROFILING_DENOISING_DIVIDE_SHADOW)); - prefilter.add_entry("Non-Local means", prof.get_event(PROFILING_DENOISING_NON_LOCAL_MEANS)); - prefilter.add_entry("Get Feature", prof.get_event(PROFILING_DENOISING_GET_FEATURE)); - prefilter.add_entry("Detect Outliers", prof.get_event(PROFILING_DENOISING_DETECT_OUTLIERS)); - prefilter.add_entry("Combine Halves", prof.get_event(PROFILING_DENOISING_COMBINE_HALVES)); + kernel.add_entry("Intersect Closest", prof.get_event(PROFILING_INTERSECT_CLOSEST)); + kernel.add_entry("Intersect Shadow", prof.get_event(PROFILING_INTERSECT_SHADOW)); + kernel.add_entry("Intersect Subsurface", prof.get_event(PROFILING_INTERSECT_SUBSURFACE)); + kernel.add_entry("Intersect Volume Stack", prof.get_event(PROFILING_INTERSECT_VOLUME_STACK)); + + NamedNestedSampleStats &surface = kernel.add_entry("Shade Surface", 0); + surface.add_entry("Setup", prof.get_event(PROFILING_SHADE_SURFACE_SETUP)); + surface.add_entry("Shader Evaluation", prof.get_event(PROFILING_SHADE_SURFACE_EVAL)); + surface.add_entry("Render Passes", prof.get_event(PROFILING_SHADE_SURFACE_PASSES)); + surface.add_entry("Direct Light", prof.get_event(PROFILING_SHADE_SURFACE_DIRECT_LIGHT)); + surface.add_entry("Indirect Light", prof.get_event(PROFILING_SHADE_SURFACE_INDIRECT_LIGHT)); + surface.add_entry("Ambient Occlusion", prof.get_event(PROFILING_SHADE_SURFACE_AO)); + + NamedNestedSampleStats &volume = kernel.add_entry("Shade Volume", 0); + volume.add_entry("Setup", prof.get_event(PROFILING_SHADE_VOLUME_SETUP)); + volume.add_entry("Integrate", prof.get_event(PROFILING_SHADE_VOLUME_INTEGRATE)); + volume.add_entry("Direct Light", prof.get_event(PROFILING_SHADE_VOLUME_DIRECT_LIGHT)); + volume.add_entry("Indirect Light", prof.get_event(PROFILING_SHADE_VOLUME_INDIRECT_LIGHT)); + + NamedNestedSampleStats &shadow = kernel.add_entry("Shade Shadow", 0); + shadow.add_entry("Setup", prof.get_event(PROFILING_SHADE_SHADOW_SETUP)); + shadow.add_entry("Surface", prof.get_event(PROFILING_SHADE_SHADOW_SURFACE)); + shadow.add_entry("Volume", prof.get_event(PROFILING_SHADE_SHADOW_VOLUME)); + + NamedNestedSampleStats &light = kernel.add_entry("Shade Light", 0); + light.add_entry("Setup", prof.get_event(PROFILING_SHADE_LIGHT_SETUP)); + light.add_entry("Shader Evaluation", prof.get_event(PROFILING_SHADE_LIGHT_EVAL)); shaders.entries.clear(); foreach (Shader *shader, scene->shaders) { diff --git a/intern/cycles/render/svm.cpp b/intern/cycles/render/svm.cpp index dcb3976e15c..2379eb775a0 100644 --- a/intern/cycles/render/svm.cpp +++ b/intern/cycles/render/svm.cpp @@ -446,6 +446,8 @@ void SVMCompiler::generate_node(ShaderNode *node, ShaderNodeSet &done) if (current_type == SHADER_TYPE_SURFACE) { if (node->has_spatial_varying()) current_shader->has_surface_spatial_varying = true; + if (node->get_feature() & KERNEL_FEATURE_NODE_RAYTRACE) + current_shader->has_surface_raytrace = true; } else if (current_type == SHADER_TYPE_VOLUME) { if (node->has_spatial_varying()) @@ -492,6 +494,13 @@ void SVMCompiler::generate_svm_nodes(const ShaderNodeSet &nodes, CompilerState * void SVMCompiler::generate_closure_node(ShaderNode *node, CompilerState *state) { + /* Skip generating closure that are not supported or needed for a particular + * type of shader. For example a BSDF in a volume shader. */ + const int node_feature = node->get_feature(); + if ((state->node_feature_mask & node_feature) != node_feature) { + return; + } + /* execute dependencies for closure */ foreach (ShaderInput *in, node->inputs) { if (in->link != NULL) { @@ -555,7 +564,7 @@ void SVMCompiler::find_aov_nodes_and_dependencies(ShaderNodeSet &aov_nodes, foreach (ShaderNode *node, graph->nodes) { if (node->special_type == SHADER_SPECIAL_TYPE_OUTPUT_AOV) { OutputAOVNode *aov_node = static_cast<OutputAOVNode *>(node); - if (aov_node->slot >= 0) { + if (aov_node->offset >= 0) { aov_nodes.insert(aov_node); foreach (ShaderInput *in, node->inputs) { if (in->link != NULL) { @@ -785,17 +794,21 @@ void SVMCompiler::compile_type(Shader *shader, ShaderGraph *graph, ShaderType ty case SHADER_TYPE_SURFACE: /* generate surface shader */ generate = true; shader->has_surface = true; + state.node_feature_mask = KERNEL_FEATURE_NODE_MASK_SURFACE; break; case SHADER_TYPE_VOLUME: /* generate volume shader */ generate = true; shader->has_volume = true; + state.node_feature_mask = KERNEL_FEATURE_NODE_MASK_VOLUME; break; case SHADER_TYPE_DISPLACEMENT: /* generate displacement shader */ generate = true; shader->has_displacement = true; + state.node_feature_mask = KERNEL_FEATURE_NODE_MASK_DISPLACEMENT; break; case SHADER_TYPE_BUMP: /* generate bump shader */ generate = true; + state.node_feature_mask = KERNEL_FEATURE_NODE_MASK_BUMP; break; default: break; @@ -867,6 +880,7 @@ void SVMCompiler::compile(Shader *shader, array<int4> &svm_nodes, int index, Sum shader->has_surface = false; shader->has_surface_emission = false; shader->has_surface_transparent = false; + shader->has_surface_raytrace = false; shader->has_surface_bssrdf = false; shader->has_bump = has_bump; shader->has_bssrdf_bump = has_bump; @@ -964,6 +978,7 @@ SVMCompiler::CompilerState::CompilerState(ShaderGraph *graph) max_id = max(node->id, max_id); } nodes_done_flag.resize(max_id + 1, false); + node_feature_mask = 0; } CCL_NAMESPACE_END diff --git a/intern/cycles/render/svm.h b/intern/cycles/render/svm.h index d23ff3e2a47..0353c393ae4 100644 --- a/intern/cycles/render/svm.h +++ b/intern/cycles/render/svm.h @@ -192,6 +192,9 @@ class SVMCompiler { * all areas to use this flags array. */ vector<bool> nodes_done_flag; + + /* Node features that can be compiled. */ + uint node_feature_mask; }; void stack_clear_temporary(ShaderNode *node); diff --git a/intern/cycles/render/tile.cpp b/intern/cycles/render/tile.cpp index 375c9fd8e09..28910bffa7b 100644 --- a/intern/cycles/render/tile.cpp +++ b/intern/cycles/render/tile.cpp @@ -16,601 +16,559 @@ #include "render/tile.h" +#include <atomic> + +#include "graph/node.h" +#include "render/background.h" +#include "render/film.h" +#include "render/integrator.h" +#include "render/scene.h" #include "util/util_algorithm.h" #include "util/util_foreach.h" +#include "util/util_logging.h" +#include "util/util_path.h" +#include "util/util_string.h" +#include "util/util_system.h" #include "util/util_types.h" CCL_NAMESPACE_BEGIN -namespace { +/* -------------------------------------------------------------------- + * Internal functions. + */ -class TileComparator { - public: - TileComparator(TileOrder order_, int2 center_, Tile *tiles_) - : order(order_), center(center_), tiles(tiles_) - { - } +static const char *ATTR_PASSES_COUNT = "cycles.passes.count"; +static const char *ATTR_PASS_SOCKET_PREFIX_FORMAT = "cycles.passes.%d."; +static const char *ATTR_BUFFER_SOCKET_PREFIX = "cycles.buffer."; +static const char *ATTR_DENOISE_SOCKET_PREFIX = "cycles.denoise."; - bool operator()(int a, int b) - { - switch (order) { - case TILE_CENTER: { - float2 dist_a = make_float2(center.x - (tiles[a].x + tiles[a].w / 2), - center.y - (tiles[a].y + tiles[a].h / 2)); - float2 dist_b = make_float2(center.x - (tiles[b].x + tiles[b].w / 2), - center.y - (tiles[b].y + tiles[b].h / 2)); - return dot(dist_a, dist_a) < dot(dist_b, dist_b); - } - case TILE_LEFT_TO_RIGHT: - return (tiles[a].x == tiles[b].x) ? (tiles[a].y < tiles[b].y) : (tiles[a].x < tiles[b].x); - case TILE_RIGHT_TO_LEFT: - return (tiles[a].x == tiles[b].x) ? (tiles[a].y < tiles[b].y) : (tiles[a].x > tiles[b].x); - case TILE_TOP_TO_BOTTOM: - return (tiles[a].y == tiles[b].y) ? (tiles[a].x < tiles[b].x) : (tiles[a].y > tiles[b].y); - case TILE_BOTTOM_TO_TOP: - default: - return (tiles[a].y == tiles[b].y) ? (tiles[a].x < tiles[b].x) : (tiles[a].y < tiles[b].y); +/* Global counter of ToleManager object instances. */ +static std::atomic<uint64_t> g_instance_index = 0; + +/* Construct names of EXR channels which will ensure order of all channels to match exact offsets + * in render buffers corresponding to the given passes. + * + * Returns `std` datatypes so that it can be assigned directly to the OIIO's `ImageSpec`. */ +static std::vector<std::string> exr_channel_names_for_passes(const BufferParams &buffer_params) +{ + static const char *component_suffixes[] = {"R", "G", "B", "A"}; + + int pass_index = 0; + int num_channels = 0; + std::vector<std::string> channel_names; + for (const BufferPass &pass : buffer_params.passes) { + if (pass.offset == PASS_UNUSED) { + continue; } - } - protected: - TileOrder order; - int2 center; - Tile *tiles; -}; + const PassInfo pass_info = pass.get_info(); + num_channels += pass_info.num_components; -inline int2 hilbert_index_to_pos(int n, int d) -{ - int2 r, xy = make_int2(0, 0); - for (int s = 1; s < n; s *= 2) { - r.x = (d >> 1) & 1; - r.y = (d ^ r.x) & 1; - if (!r.y) { - if (r.x) { - xy = make_int2(s - 1, s - 1) - xy; - } - swap(xy.x, xy.y); + /* EXR canonically expects first part of channel names to be sorted alphabetically, which is + * not guaranteed to be the case with passes names. Assign a prefix based on the pass index + * with a fixed width to ensure ordering. This makes it possible to dump existing render + * buffers memory to disk and read it back without doing extra mapping. */ + const string prefix = string_printf("%08d", pass_index); + + const string channel_name_prefix = prefix + string(pass.name) + "."; + + for (int i = 0; i < pass_info.num_components; ++i) { + channel_names.push_back(channel_name_prefix + component_suffixes[i]); } - xy += r * make_int2(s, s); - d >>= 2; + + ++pass_index; } - return xy; + + return channel_names; } -enum SpiralDirection { - DIRECTION_UP, - DIRECTION_LEFT, - DIRECTION_DOWN, - DIRECTION_RIGHT, -}; - -} /* namespace */ - -TileManager::TileManager(bool progressive_, - int num_samples_, - int2 tile_size_, - int start_resolution_, - bool preserve_tile_device_, - bool background_, - TileOrder tile_order_, - int num_devices_, - int pixel_size_) +inline string node_socket_attribute_name(const SocketType &socket, const string &attr_name_prefix) { - progressive = progressive_; - tile_size = tile_size_; - tile_order = tile_order_; - start_resolution = start_resolution_; - pixel_size = pixel_size_; - slice_overlap = 0; - num_samples = num_samples_; - num_devices = num_devices_; - preserve_tile_device = preserve_tile_device_; - background = background_; - schedule_denoising = false; - - range_start_sample = 0; - range_num_samples = -1; - - BufferParams buffer_params; - reset(buffer_params, 0); + return attr_name_prefix + string(socket.name); } -TileManager::~TileManager() +template<typename ValidateValueFunc, typename GetValueFunc> +static bool node_socket_generic_to_image_spec_atttributes( + ImageSpec *image_spec, + const Node *node, + const SocketType &socket, + const string &attr_name_prefix, + const ValidateValueFunc &validate_value_func, + const GetValueFunc &get_value_func) { + if (!validate_value_func(node, socket)) { + return false; + } + + image_spec->attribute(node_socket_attribute_name(socket, attr_name_prefix), + get_value_func(node, socket)); + + return true; } -void TileManager::device_free() +static bool node_socket_to_image_spec_atttributes(ImageSpec *image_spec, + const Node *node, + const SocketType &socket, + const string &attr_name_prefix) { - if (schedule_denoising || progressive) { - for (int i = 0; i < state.tiles.size(); i++) { - delete state.tiles[i].buffers; - state.tiles[i].buffers = NULL; + const string attr_name = node_socket_attribute_name(socket, attr_name_prefix); + + switch (socket.type) { + case SocketType::ENUM: { + const ustring value = node->get_string(socket); + + /* Validate that the node is consistent with the node type definition. */ + const NodeEnum &enum_values = *socket.enum_values; + if (!enum_values.exists(value)) { + LOG(DFATAL) << "Node enum contains invalid value " << value; + return false; + } + + image_spec->attribute(attr_name, value); + + return true; } - } - state.tiles.clear(); + case SocketType::STRING: + image_spec->attribute(attr_name, node->get_string(socket)); + return true; + + case SocketType::INT: + image_spec->attribute(attr_name, node->get_int(socket)); + return true; + + case SocketType::FLOAT: + image_spec->attribute(attr_name, node->get_float(socket)); + return true; + + case SocketType::BOOLEAN: + image_spec->attribute(attr_name, node->get_bool(socket)); + return true; + + default: + LOG(DFATAL) << "Unhandled socket type " << socket.type << ", should never happen."; + return false; + } } -static int get_divider(int w, int h, int start_resolution) +static bool node_socket_from_image_spec_atttributes(Node *node, + const SocketType &socket, + const ImageSpec &image_spec, + const string &attr_name_prefix) { - int divider = 1; - if (start_resolution != INT_MAX) { - while (w * h > start_resolution * start_resolution) { - w = max(1, w / 2); - h = max(1, h / 2); + const string attr_name = node_socket_attribute_name(socket, attr_name_prefix); + + switch (socket.type) { + case SocketType::ENUM: { + /* TODO(sergey): Avoid construction of `ustring` by using `string_view` in the Node API. */ + const ustring value(image_spec.get_string_attribute(attr_name, "")); + + /* Validate that the node is consistent with the node type definition. */ + const NodeEnum &enum_values = *socket.enum_values; + if (!enum_values.exists(value)) { + LOG(ERROR) << "Invalid enumerator value " << value; + return false; + } - divider <<= 1; + node->set(socket, enum_values[value]); + + return true; } + + case SocketType::STRING: + /* TODO(sergey): Avoid construction of `ustring` by using `string_view` in the Node API. */ + node->set(socket, ustring(image_spec.get_string_attribute(attr_name, ""))); + return true; + + case SocketType::INT: + node->set(socket, image_spec.get_int_attribute(attr_name, 0)); + return true; + + case SocketType::FLOAT: + node->set(socket, image_spec.get_float_attribute(attr_name, 0)); + return true; + + case SocketType::BOOLEAN: + node->set(socket, static_cast<bool>(image_spec.get_int_attribute(attr_name, 0))); + return true; + + default: + LOG(DFATAL) << "Unhandled socket type " << socket.type << ", should never happen."; + return false; } - return divider; } -void TileManager::reset(BufferParams ¶ms_, int num_samples_) +static bool node_to_image_spec_atttributes(ImageSpec *image_spec, + const Node *node, + const string &attr_name_prefix) { - params = params_; - - set_samples(num_samples_); - - state.buffer = BufferParams(); - state.sample = range_start_sample - 1; - state.num_tiles = 0; - state.num_samples = 0; - state.resolution_divider = get_divider(params.width, params.height, start_resolution); - state.render_tiles.clear(); - state.denoising_tiles.clear(); - device_free(); + for (const SocketType &socket : node->type->inputs) { + if (!node_socket_to_image_spec_atttributes(image_spec, node, socket, attr_name_prefix)) { + return false; + } + } + + return true; } -void TileManager::set_samples(int num_samples_) +static bool node_from_image_spec_atttributes(Node *node, + const ImageSpec &image_spec, + const string &attr_name_prefix) { - num_samples = num_samples_; + for (const SocketType &socket : node->type->inputs) { + if (!node_socket_from_image_spec_atttributes(node, socket, image_spec, attr_name_prefix)) { + return false; + } + } + + return true; +} - /* No real progress indication is possible when using unlimited samples. */ - if (num_samples == INT_MAX) { - state.total_pixel_samples = 0; +static bool buffer_params_to_image_spec_atttributes(ImageSpec *image_spec, + const BufferParams &buffer_params) +{ + if (!node_to_image_spec_atttributes(image_spec, &buffer_params, ATTR_BUFFER_SOCKET_PREFIX)) { + return false; } - else { - uint64_t pixel_samples = 0; - /* While rendering in the viewport, the initial preview resolution is increased to the native - * resolution before the actual rendering begins. Therefore, additional pixel samples will be - * rendered. */ - int divider = max(get_divider(params.width, params.height, start_resolution) / 2, pixel_size); - while (divider > pixel_size) { - int image_w = max(1, params.width / divider); - int image_h = max(1, params.height / divider); - pixel_samples += image_w * image_h; - divider >>= 1; - } - int image_w = max(1, params.width / divider); - int image_h = max(1, params.height / divider); - state.total_pixel_samples = pixel_samples + - (uint64_t)get_num_effective_samples() * image_w * image_h; - if (schedule_denoising) { - state.total_pixel_samples += params.width * params.height; + /* Passes storage is not covered by the node socket. so "expand" the loop manually. */ + + const int num_passes = buffer_params.passes.size(); + image_spec->attribute(ATTR_PASSES_COUNT, num_passes); + + for (int pass_index = 0; pass_index < num_passes; ++pass_index) { + const string attr_name_prefix = string_printf(ATTR_PASS_SOCKET_PREFIX_FORMAT, pass_index); + + const BufferPass *pass = &buffer_params.passes[pass_index]; + if (!node_to_image_spec_atttributes(image_spec, pass, attr_name_prefix)) { + return false; } } + + return true; } -/* If sliced is false, splits image into tiles and assigns equal amount of tiles to every render - * device. If sliced is true, slice image into as much pieces as how many devices are rendering - * this image. */ -int TileManager::gen_tiles(bool sliced) +static bool buffer_params_from_image_spec_atttributes(BufferParams *buffer_params, + const ImageSpec &image_spec) { - int resolution = state.resolution_divider; - int image_w = max(1, params.width / resolution); - int image_h = max(1, params.height / resolution); - int2 center = make_int2(image_w / 2, image_h / 2); - - int num = preserve_tile_device || sliced ? min(image_h, num_devices) : 1; - int slice_num = sliced ? num : 1; - int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x); - - device_free(); - state.render_tiles.clear(); - state.denoising_tiles.clear(); - state.render_tiles.resize(num); - state.denoising_tiles.resize(num); - state.tile_stride = tile_w; - vector<list<int>>::iterator tile_list; - tile_list = state.render_tiles.begin(); - - if (tile_order == TILE_HILBERT_SPIRAL) { - assert(!sliced && slice_overlap == 0); - - int tile_h = (tile_size.y >= image_h) ? 1 : divide_up(image_h, tile_size.y); - state.tiles.resize(tile_w * tile_h); - - /* Size of blocks in tiles, must be a power of 2 */ - const int hilbert_size = (max(tile_size.x, tile_size.y) <= 12) ? 8 : 4; - - int tiles_per_device = divide_up(tile_w * tile_h, num); - int cur_device = 0, cur_tiles = 0; - - int2 block_size = tile_size * make_int2(hilbert_size, hilbert_size); - /* Number of blocks to fill the image */ - int blocks_x = (block_size.x >= image_w) ? 1 : divide_up(image_w, block_size.x); - int blocks_y = (block_size.y >= image_h) ? 1 : divide_up(image_h, block_size.y); - int n = max(blocks_x, blocks_y) | 0x1; /* Side length of the spiral (must be odd) */ - /* Offset of spiral (to keep it centered) */ - int2 offset = make_int2((image_w - n * block_size.x) / 2, (image_h - n * block_size.y) / 2); - offset = (offset / tile_size) * tile_size; /* Round to tile border. */ - - int2 block = make_int2(0, 0); /* Current block */ - SpiralDirection prev_dir = DIRECTION_UP, dir = DIRECTION_UP; - for (int i = 0;;) { - /* Generate the tiles in the current block. */ - for (int hilbert_index = 0; hilbert_index < hilbert_size * hilbert_size; hilbert_index++) { - int2 tile, hilbert_pos = hilbert_index_to_pos(hilbert_size, hilbert_index); - /* Rotate block according to spiral direction. */ - if (prev_dir == DIRECTION_UP && dir == DIRECTION_UP) { - tile = make_int2(hilbert_pos.y, hilbert_pos.x); - } - else if (dir == DIRECTION_LEFT || prev_dir == DIRECTION_LEFT) { - tile = hilbert_pos; - } - else if (dir == DIRECTION_DOWN) { - tile = make_int2(hilbert_size - 1 - hilbert_pos.y, hilbert_size - 1 - hilbert_pos.x); - } - else { - tile = make_int2(hilbert_size - 1 - hilbert_pos.x, hilbert_size - 1 - hilbert_pos.y); - } - - int2 pos = block * block_size + tile * tile_size + offset; - /* Only add tiles which are in the image (tiles outside of the image can be generated since - * the spiral is always square). */ - if (pos.x >= 0 && pos.y >= 0 && pos.x < image_w && pos.y < image_h) { - int w = min(tile_size.x, image_w - pos.x); - int h = min(tile_size.y, image_h - pos.y); - int2 ipos = pos / tile_size; - int idx = ipos.y * tile_w + ipos.x; - state.tiles[idx] = Tile(idx, pos.x, pos.y, w, h, cur_device, Tile::RENDER); - tile_list->push_front(idx); - cur_tiles++; - - if (cur_tiles == tiles_per_device) { - tile_list++; - cur_tiles = 0; - cur_device++; - } - } - } + if (!node_from_image_spec_atttributes(buffer_params, image_spec, ATTR_BUFFER_SOCKET_PREFIX)) { + return false; + } - /* Stop as soon as the spiral has reached the center block. */ - if (block.x == (n - 1) / 2 && block.y == (n - 1) / 2) - break; - - /* Advance to next block. */ - prev_dir = dir; - switch (dir) { - case DIRECTION_UP: - block.y++; - if (block.y == (n - i - 1)) { - dir = DIRECTION_LEFT; - } - break; - case DIRECTION_LEFT: - block.x++; - if (block.x == (n - i - 1)) { - dir = DIRECTION_DOWN; - } - break; - case DIRECTION_DOWN: - block.y--; - if (block.y == i) { - dir = DIRECTION_RIGHT; - } - break; - case DIRECTION_RIGHT: - block.x--; - if (block.x == i + 1) { - dir = DIRECTION_UP; - i++; - } - break; - } - } - return tile_w * tile_h; + /* Passes storage is not covered by the node socket. so "expand" the loop manually. */ + + const int num_passes = image_spec.get_int_attribute(ATTR_PASSES_COUNT, 0); + if (num_passes == 0) { + LOG(ERROR) << "Missing passes count attribute."; + return false; } - int idx = 0; - for (int slice = 0; slice < slice_num; slice++) { - int slice_y = (image_h / slice_num) * slice; - int slice_h = (slice == slice_num - 1) ? image_h - slice * (image_h / slice_num) : - image_h / slice_num; + for (int pass_index = 0; pass_index < num_passes; ++pass_index) { + const string attr_name_prefix = string_printf(ATTR_PASS_SOCKET_PREFIX_FORMAT, pass_index); - if (slice_overlap != 0) { - int slice_y_offset = max(slice_y - slice_overlap, 0); - slice_h = min(slice_y + slice_h + slice_overlap, image_h) - slice_y_offset; - slice_y = slice_y_offset; - } + BufferPass pass; - int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y); - - int tiles_per_device = divide_up(tile_w * tile_h, num); - int cur_device = 0, cur_tiles = 0; - - for (int tile_y = 0; tile_y < tile_h; tile_y++) { - for (int tile_x = 0; tile_x < tile_w; tile_x++, idx++) { - int x = tile_x * tile_size.x; - int y = tile_y * tile_size.y; - int w = (tile_x == tile_w - 1) ? image_w - x : tile_size.x; - int h = (tile_y == tile_h - 1) ? slice_h - y : tile_size.y; - - state.tiles.push_back( - Tile(idx, x, y + slice_y, w, h, sliced ? slice : cur_device, Tile::RENDER)); - tile_list->push_back(idx); - - if (!sliced) { - cur_tiles++; - - if (cur_tiles == tiles_per_device) { - /* Tiles are already generated in Bottom-to-Top order, so no sort is necessary in that - * case. */ - if (tile_order != TILE_BOTTOM_TO_TOP) { - tile_list->sort(TileComparator(tile_order, center, &state.tiles[0])); - } - tile_list++; - cur_tiles = 0; - cur_device++; - } - } - } - } - if (sliced) { - tile_list++; + if (!node_from_image_spec_atttributes(&pass, image_spec, attr_name_prefix)) { + return false; } + + buffer_params->passes.emplace_back(std::move(pass)); } - return idx; + buffer_params->update_passes(); + + return true; } -void TileManager::gen_render_tiles() +/* Configure image specification for the given buffer parameters and passes. + * + * Image channels will be strictly ordered to match content of corresponding buffer, and the + * metadata will be set so that the render buffers and passes can be reconstructed from it. + * + * If the tile size different from (0, 0) the image specification will be configured to use the + * given tile size for tiled IO. */ +static bool configure_image_spec_from_buffer(ImageSpec *image_spec, + const BufferParams &buffer_params, + const int2 tile_size = make_int2(0, 0)) { - /* Regenerate just the render tiles for progressive render. */ - foreach (Tile &tile, state.tiles) { - tile.state = Tile::RENDER; - state.render_tiles[tile.device].push_back(tile.index); + const std::vector<std::string> channel_names = exr_channel_names_for_passes(buffer_params); + const int num_channels = channel_names.size(); + + *image_spec = ImageSpec( + buffer_params.width, buffer_params.height, num_channels, TypeDesc::FLOAT); + + image_spec->channelnames = move(channel_names); + + if (!buffer_params_to_image_spec_atttributes(image_spec, buffer_params)) { + return false; + } + + if (tile_size.x != 0 || tile_size.y != 0) { + DCHECK_GT(tile_size.x, 0); + DCHECK_GT(tile_size.y, 0); + + image_spec->tile_width = tile_size.x; + image_spec->tile_height = tile_size.y; } + + return true; } -void TileManager::set_tiles() +/* -------------------------------------------------------------------- + * Tile Manager. + */ + +TileManager::TileManager() { - int resolution = state.resolution_divider; - int image_w = max(1, params.width / resolution); - int image_h = max(1, params.height / resolution); + /* Use process ID to separate different processes. + * To ensure uniqueness from within a process use combination of object address and instance + * index. This solves problem of possible object re-allocation at the same time, and solves + * possible conflict when the counter overflows while there are still active instances of the + * class. */ + const int tile_manager_id = g_instance_index.fetch_add(1, std::memory_order_relaxed); + tile_file_unique_part_ = to_string(system_self_process_id()) + "-" + + to_string(reinterpret_cast<uintptr_t>(this)) + "-" + + to_string(tile_manager_id); +} - state.num_tiles = gen_tiles(!background); +TileManager::~TileManager() +{ +} + +void TileManager::reset_scheduling(const BufferParams ¶ms, int2 tile_size) +{ + VLOG(3) << "Using tile size of " << tile_size; + + close_tile_output(); + + tile_size_ = tile_size; + + tile_state_.num_tiles_x = divide_up(params.width, tile_size_.x); + tile_state_.num_tiles_y = divide_up(params.height, tile_size_.y); + tile_state_.num_tiles = tile_state_.num_tiles_x * tile_state_.num_tiles_y; + + tile_state_.next_tile_index = 0; + + tile_state_.current_tile = Tile(); +} + +void TileManager::update(const BufferParams ¶ms, const Scene *scene) +{ + DCHECK_NE(params.pass_stride, -1); + + buffer_params_ = params; - state.buffer.width = image_w; - state.buffer.height = image_h; + /* TODO(sergey): Proper Error handling, so that if configuration has failed we don't attempt to + * write to a partially configured file. */ + configure_image_spec_from_buffer(&write_state_.image_spec, buffer_params_, tile_size_); - state.buffer.full_x = params.full_x / resolution; - state.buffer.full_y = params.full_y / resolution; - state.buffer.full_width = max(1, params.full_width / resolution); - state.buffer.full_height = max(1, params.full_height / resolution); + const DenoiseParams denoise_params = scene->integrator->get_denoise_params(); + node_to_image_spec_atttributes( + &write_state_.image_spec, &denoise_params, ATTR_DENOISE_SOCKET_PREFIX); } -int TileManager::get_neighbor_index(int index, int neighbor) +bool TileManager::done() { - /* Neighbor indices: - * 0 1 2 - * 3 4 5 - * 6 7 8 - */ - static const int dx[] = {-1, 0, 1, -1, 0, 1, -1, 0, 1}; - static const int dy[] = {-1, -1, -1, 0, 0, 0, 1, 1, 1}; - - int resolution = state.resolution_divider; - int image_w = max(1, params.width / resolution); - int image_h = max(1, params.height / resolution); - - int num = min(image_h, num_devices); - int slice_num = !background ? num : 1; - int slice_h = image_h / slice_num; - - int tile_w = (tile_size.x >= image_w) ? 1 : divide_up(image_w, tile_size.x); - int tile_h = (tile_size.y >= slice_h) ? 1 : divide_up(slice_h, tile_size.y); - - /* Tiles in the state tile list are always indexed from left to right, top to bottom. */ - int nx = (index % tile_w) + dx[neighbor]; - int ny = (index / tile_w) + dy[neighbor]; - if (nx < 0 || ny < 0 || nx >= tile_w || ny >= tile_h * slice_num) - return -1; - - return ny * state.tile_stride + nx; + return tile_state_.next_tile_index == tile_state_.num_tiles; } -/* Checks whether all neighbors of a tile (as well as the tile itself) are at least at state - * min_state. */ -bool TileManager::check_neighbor_state(int index, Tile::State min_state) +bool TileManager::next() { - if (index < 0 || state.tiles[index].state < min_state) { + if (done()) { return false; } - for (int neighbor = 0; neighbor < 9; neighbor++) { - int nindex = get_neighbor_index(index, neighbor); - /* Out-of-bounds tiles don't matter. */ - if (nindex >= 0 && state.tiles[nindex].state < min_state) { - return false; - } - } + + tile_state_.current_tile = get_tile_for_index(tile_state_.next_tile_index); + + ++tile_state_.next_tile_index; return true; } -/* Returns whether the tile should be written (and freed if no denoising is used) instead of - * updating. */ -bool TileManager::finish_tile(const int index, const bool need_denoise, bool &delete_tile) +Tile TileManager::get_tile_for_index(int index) const { - delete_tile = false; - - switch (state.tiles[index].state) { - case Tile::RENDER: { - if (!(schedule_denoising && need_denoise)) { - state.tiles[index].state = Tile::DONE; - delete_tile = !progressive; - return true; - } - state.tiles[index].state = Tile::RENDERED; - /* For each neighbor and the tile itself, check whether all of its neighbors have been - * rendered. If yes, it can be denoised. */ - for (int neighbor = 0; neighbor < 9; neighbor++) { - int nindex = get_neighbor_index(index, neighbor); - if (check_neighbor_state(nindex, Tile::RENDERED)) { - state.tiles[nindex].state = Tile::DENOISE; - state.denoising_tiles[state.tiles[nindex].device].push_back(nindex); - } - } - return false; - } - case Tile::DENOISE: { - state.tiles[index].state = Tile::DENOISED; - /* For each neighbor and the tile itself, check whether all of its neighbors have been - * denoised. If yes, it can be freed. */ - for (int neighbor = 0; neighbor < 9; neighbor++) { - int nindex = get_neighbor_index(index, neighbor); - if (check_neighbor_state(nindex, Tile::DENOISED)) { - state.tiles[nindex].state = Tile::DONE; - /* Do not delete finished tiles in progressive mode. */ - if (!progressive) { - /* It can happen that the tile just finished denoising and already can be freed here. - * However, in that case it still has to be written before deleting, so we can't delete - * it yet. */ - if (neighbor == 4) { - delete_tile = true; - } - else { - delete state.tiles[nindex].buffers; - state.tiles[nindex].buffers = NULL; - } - } - } - } - return true; - } - default: - assert(false); - return true; + /* TODO(sergey): Consider using hilbert spiral, or. maybe, even configurable. Not sure this + * brings a lot of value since this is only applicable to BIG tiles. */ + + const int tile_y = index / tile_state_.num_tiles_x; + const int tile_x = index - tile_y * tile_state_.num_tiles_x; + + Tile tile; + + tile.x = tile_x * tile_size_.x; + tile.y = tile_y * tile_size_.y; + tile.width = tile_size_.x; + tile.height = tile_size_.y; + + tile.width = min(tile.width, buffer_params_.width - tile.x); + tile.height = min(tile.height, buffer_params_.height - tile.y); + + return tile; +} + +const Tile &TileManager::get_current_tile() const +{ + return tile_state_.current_tile; +} + +bool TileManager::open_tile_output() +{ + write_state_.filename = path_temp_get("cycles-tile-buffer-" + tile_file_unique_part_ + "-" + + to_string(write_state_.tile_file_index) + ".exr"); + + write_state_.tile_out = ImageOutput::create(write_state_.filename); + if (!write_state_.tile_out) { + LOG(ERROR) << "Error creating image output for " << write_state_.filename; + return false; + } + + if (!write_state_.tile_out->supports("tiles")) { + LOG(ERROR) << "Progress tile file format does not support tiling."; + return false; } + + write_state_.tile_out->open(write_state_.filename, write_state_.image_spec); + write_state_.num_tiles_written = 0; + + VLOG(3) << "Opened tile file " << write_state_.filename; + + return true; } -bool TileManager::next_tile(Tile *&tile, int device, uint tile_types) +bool TileManager::close_tile_output() { - /* Preserve device if requested, unless this is a separate denoising device that just wants to - * grab any available tile. */ - const bool preserve_device = preserve_tile_device && device < num_devices; - - if (tile_types & RenderTile::DENOISE) { - int tile_index = -1; - int logical_device = preserve_device ? device : 0; - - while (logical_device < state.denoising_tiles.size()) { - if (state.denoising_tiles[logical_device].empty()) { - if (preserve_device) { - break; - } - else { - logical_device++; - continue; - } - } + if (!write_state_.tile_out) { + return true; + } - tile_index = state.denoising_tiles[logical_device].front(); - state.denoising_tiles[logical_device].pop_front(); - break; - } + const bool success = write_state_.tile_out->close(); + write_state_.tile_out = nullptr; - if (tile_index >= 0) { - tile = &state.tiles[tile_index]; - return true; - } + if (!success) { + LOG(ERROR) << "Error closing tile file."; + return false; } - if (tile_types & RenderTile::PATH_TRACE) { - int tile_index = -1; - int logical_device = preserve_device ? device : 0; - - while (logical_device < state.render_tiles.size()) { - if (state.render_tiles[logical_device].empty()) { - if (preserve_device) { - break; - } - else { - logical_device++; - continue; - } - } + VLOG(3) << "Tile output is closed."; - tile_index = state.render_tiles[logical_device].front(); - state.render_tiles[logical_device].pop_front(); - break; + return true; +} + +bool TileManager::write_tile(const RenderBuffers &tile_buffers) +{ + if (!write_state_.tile_out) { + if (!open_tile_output()) { + return false; } + } - if (tile_index >= 0) { - tile = &state.tiles[tile_index]; - return true; + DCHECK_EQ(tile_buffers.params.pass_stride, buffer_params_.pass_stride); + + const BufferParams &tile_params = tile_buffers.params; + + vector<float> pixel_storage; + const float *pixels = tile_buffers.buffer.data(); + + /* Tiled writing expects pixels to contain data for an entire tile. Pad the render buffers with + * empty pixels for tiles which are on the image boundary. */ + if (tile_params.width != tile_size_.x || tile_params.height != tile_size_.y) { + const int64_t pass_stride = tile_params.pass_stride; + const int64_t src_row_stride = tile_params.width * pass_stride; + + const int64_t dst_row_stride = tile_size_.x * pass_stride; + pixel_storage.resize(dst_row_stride * tile_size_.y); + + const float *src = tile_buffers.buffer.data(); + float *dst = pixel_storage.data(); + pixels = dst; + + for (int y = 0; y < tile_params.height; ++y, src += src_row_stride, dst += dst_row_stride) { + memcpy(dst, src, src_row_stride * sizeof(float)); } } - return false; -} + const int tile_x = tile_params.full_x - buffer_params_.full_x; + const int tile_y = tile_params.full_y - buffer_params_.full_y; -bool TileManager::done() -{ - int end_sample = (range_num_samples == -1) ? num_samples : - range_start_sample + range_num_samples; - return (state.resolution_divider == pixel_size) && - (state.sample + state.num_samples >= end_sample); + VLOG(3) << "Write tile at " << tile_x << ", " << tile_y; + if (!write_state_.tile_out->write_tile(tile_x, tile_y, 0, TypeDesc::FLOAT, pixels)) { + LOG(ERROR) << "Error writing tile " << write_state_.tile_out->geterror(); + } + + ++write_state_.num_tiles_written; + + return true; } -bool TileManager::has_tiles() +void TileManager::finish_write_tiles() { - foreach (Tile &tile, state.tiles) { - if (tile.state != Tile::DONE) { - return true; + if (!write_state_.tile_out) { + /* None of the tiles were written hence the file was not created. + * Avoid creation of fully empty file since it is redundant. */ + return; + } + + /* EXR expects all tiles to present in file. So explicitly write missing tiles as all-zero. */ + if (write_state_.num_tiles_written < tile_state_.num_tiles) { + vector<float> pixel_storage(tile_size_.x * tile_size_.y * buffer_params_.pass_stride); + + for (int tile_index = write_state_.num_tiles_written; tile_index < tile_state_.num_tiles; + ++tile_index) { + const Tile tile = get_tile_for_index(tile_index); + + VLOG(3) << "Write dummy tile at " << tile.x << ", " << tile.y; + + write_state_.tile_out->write_tile(tile.x, tile.y, 0, TypeDesc::FLOAT, pixel_storage.data()); } } - return false; + + close_tile_output(); + + if (full_buffer_written_cb) { + full_buffer_written_cb(write_state_.filename); + } + + /* Advance the counter upon explicit finish of the file. + * Makes it possible to re-use tile manager for another scene, and avoids unnecessary increments + * of the tile-file-within-session index. */ + ++write_state_.tile_file_index; + + write_state_.filename = ""; } -bool TileManager::next() +bool TileManager::read_full_buffer_from_disk(const string_view filename, + RenderBuffers *buffers, + DenoiseParams *denoise_params) { - if (done()) + unique_ptr<ImageInput> in(ImageInput::open(filename)); + if (!in) { + LOG(ERROR) << "Error opening tile file " << filename; return false; + } + + const ImageSpec &image_spec = in->spec(); - if (progressive && state.resolution_divider > pixel_size) { - state.sample = 0; - state.resolution_divider = max(state.resolution_divider / 2, pixel_size); - state.num_samples = 1; - set_tiles(); + BufferParams buffer_params; + if (!buffer_params_from_image_spec_atttributes(&buffer_params, image_spec)) { + return false; } - else { - state.sample++; + buffers->reset(buffer_params); - if (progressive) - state.num_samples = 1; - else if (range_num_samples == -1) - state.num_samples = num_samples; - else - state.num_samples = range_num_samples; + if (!node_from_image_spec_atttributes(denoise_params, image_spec, ATTR_DENOISE_SOCKET_PREFIX)) { + return false; + } - state.resolution_divider = pixel_size; + if (!in->read_image(TypeDesc::FLOAT, buffers->buffer.data())) { + LOG(ERROR) << "Error reading pixels from the tile file " << in->geterror(); + return false; + } - if (state.sample == range_start_sample) { - set_tiles(); - } - else { - gen_render_tiles(); - } + if (!in->close()) { + LOG(ERROR) << "Error closing tile file " << in->geterror(); + return false; } return true; } -int TileManager::get_num_effective_samples() -{ - return (range_num_samples == -1) ? num_samples : range_num_samples; -} - CCL_NAMESPACE_END diff --git a/intern/cycles/render/tile.h b/intern/cycles/render/tile.h index 790a56f9445..71b9e966278 100644 --- a/intern/cycles/render/tile.h +++ b/intern/cycles/render/tile.h @@ -14,159 +14,151 @@ * limitations under the License. */ -#ifndef __TILE_H__ -#define __TILE_H__ - -#include <limits.h> +#pragma once #include "render/buffers.h" -#include "util/util_list.h" +#include "util/util_image.h" +#include "util/util_string.h" +#include "util/util_unique_ptr.h" CCL_NAMESPACE_BEGIN -/* Tile */ +class DenoiseParams; +class Scene; + +/* -------------------------------------------------------------------- + * Tile. + */ class Tile { public: - int index; - int x, y, w, h; - int device; - /* RENDER: The tile has to be rendered. - * RENDERED: The tile has been rendered, but can't be denoised yet (waiting for neighbors). - * DENOISE: The tile can be denoised now. - * DENOISED: The tile has been denoised, but can't be freed yet (waiting for neighbors). - * DONE: The tile is finished and has been freed. */ - typedef enum { RENDER = 0, RENDERED, DENOISE, DENOISED, DONE } State; - State state; - RenderBuffers *buffers; + int x = 0, y = 0; + int width = 0, height = 0; Tile() { } - - Tile(int index_, int x_, int y_, int w_, int h_, int device_, State state_ = RENDER) - : index(index_), x(x_), y(y_), w(w_), h(h_), device(device_), state(state_), buffers(NULL) - { - } }; -/* Tile order */ - -/* Note: this should match enum_tile_order in properties.py */ -enum TileOrder { - TILE_CENTER = 0, - TILE_RIGHT_TO_LEFT = 1, - TILE_LEFT_TO_RIGHT = 2, - TILE_TOP_TO_BOTTOM = 3, - TILE_BOTTOM_TO_TOP = 4, - TILE_HILBERT_SPIRAL = 5, -}; - -/* Tile Manager */ +/* -------------------------------------------------------------------- + * Tile Manager. + */ class TileManager { public: - BufferParams params; - - struct State { - vector<Tile> tiles; - int tile_stride; - BufferParams buffer; - int sample; - int num_samples; - int resolution_divider; - int num_tiles; - - /* Total samples over all pixels: Generally num_samples*num_pixels, - * but can be higher due to the initial resolution division for previews. */ - uint64_t total_pixel_samples; - - /* These lists contain the indices of the tiles to be rendered/denoised and are used - * when acquiring a new tile for the device. - * Each list in each vector is for one logical device. */ - vector<list<int>> render_tiles; - vector<list<int>> denoising_tiles; - } state; - - int num_samples; - int slice_overlap; - - TileManager(bool progressive, - int num_samples, - int2 tile_size, - int start_resolution, - bool preserve_tile_device, - bool background, - TileOrder tile_order, - int num_devices = 1, - int pixel_size = 1); + /* This callback is invoked by whenever on-dist tiles storage file is closed after writing. */ + function<void(string_view)> full_buffer_written_cb; + + TileManager(); ~TileManager(); - void device_free(); - void reset(BufferParams ¶ms, int num_samples); - void set_samples(int num_samples); + TileManager(const TileManager &other) = delete; + TileManager(TileManager &&other) noexcept = delete; + TileManager &operator=(const TileManager &other) = delete; + TileManager &operator=(TileManager &&other) = delete; + + /* Reset current progress and start new rendering of the full-frame parameters in tiles of the + * given size. + * Only touches scheduling-related state of the tile manager. */ + /* TODO(sergey): Consider using tile area instead of exact size to help dealing with extreme + * cases of stretched renders. */ + void reset_scheduling(const BufferParams ¶ms, int2 tile_size); + + /* Update for the known buffer passes and scene parameters. + * Will store all parameters needed for buffers access outside of the scene graph. */ + void update(const BufferParams ¶ms, const Scene *scene); + + inline int get_num_tiles() const + { + return tile_state_.num_tiles; + } + + inline bool has_multiple_tiles() const + { + return tile_state_.num_tiles > 1; + } + bool next(); - bool next_tile(Tile *&tile, int device, uint tile_types); - bool finish_tile(const int index, const bool need_denoise, bool &delete_tile); bool done(); - bool has_tiles(); - void set_tile_order(TileOrder tile_order_) + const Tile &get_current_tile() const; + + /* Write render buffer of a tile to a file on disk. + * + * Opens file for write when first tile is written. + * + * Returns true on success. */ + bool write_tile(const RenderBuffers &tile_buffers); + + /* Inform the tile manager that no more tiles will be written to disk. + * The file will be considered final, all handles to it will be closed. */ + void finish_write_tiles(); + + /* Check whether any tile has been written to disk. */ + inline bool has_written_tiles() const { - tile_order = tile_order_; + return write_state_.num_tiles_written != 0; } - int get_neighbor_index(int index, int neighbor); - bool check_neighbor_state(int index, Tile::State state); + /* Read full frame render buffer from tiles file on disk. + * + * Returns true on success. */ + bool read_full_buffer_from_disk(string_view filename, + RenderBuffers *buffers, + DenoiseParams *denoise_params); - /* ** Sample range rendering. ** */ + protected: + /* Get tile configuration for its index. + * The tile index must be within [0, state_.tile_state_). */ + Tile get_tile_for_index(int index) const; - /* Start sample in the range. */ - int range_start_sample; + bool open_tile_output(); + bool close_tile_output(); - /* Number to samples in the rendering range. */ - int range_num_samples; + /* Part of an on-disk tile file name which avoids conflicts between several Cycles instances or + * several sessions. */ + string tile_file_unique_part_; - /* Get number of actual samples to render. */ - int get_num_effective_samples(); + int2 tile_size_ = make_int2(0, 0); - /* Schedule tiles for denoising after they've been rendered. */ - bool schedule_denoising; + BufferParams buffer_params_; - protected: - void set_tiles(); - - bool progressive; - int2 tile_size; - TileOrder tile_order; - int start_resolution; - int pixel_size; - int num_devices; - - /* in some cases it is important that the same tile will be returned for the same - * device it was originally generated for (i.e. viewport rendering when buffer is - * allocating once for tile and then always used by it) - * - * in other cases any tile could be handled by any device (i.e. final rendering - * without progressive refine) - */ - bool preserve_tile_device; - - /* for background render tiles should exactly match render parts generated from - * blender side, which means image first gets split into tiles and then tiles are - * assigning to render devices - * - * however viewport rendering expects tiles to be allocated in a special way, - * meaning image is being sliced horizontally first and every device handles - * its own slice - */ - bool background; - - /* Generate tile list, return number of tiles. */ - int gen_tiles(bool sliced); - void gen_render_tiles(); + /* Tile scheduling state. */ + struct { + int num_tiles_x = 0; + int num_tiles_y = 0; + int num_tiles = 0; + + int next_tile_index; + + Tile current_tile; + } tile_state_; + + /* State of tiles writing to a file on disk. */ + struct { + /* Index of a tile file used during the current session. + * This number is used for the file name construction, making it possible to render several + * scenes throughout duration of the session and keep all results available for later read + * access. */ + int tile_file_index = 0; + + string filename; + + /* Specification of the tile image which corresponds to the buffer parameters. + * Contains channels configured according to the passes configuration in the path traces. + * + * Output images are saved using this specification, input images are expected to have matched + * specification. */ + ImageSpec image_spec; + + /* Output handle for the tile file. + * + * This file can not be closed until all tiles has been provided, so the handle is stored in + * the state and is created whenever writing is requested. */ + unique_ptr<ImageOutput> tile_out; + + int num_tiles_written = 0; + } write_state_; }; CCL_NAMESPACE_END - -#endif /* __TILE_H__ */ |