diff options
Diffstat (limited to 'intern/cycles')
-rw-r--r-- | intern/cycles/device/metal/kernel.mm | 2 | ||||
-rw-r--r-- | intern/cycles/integrator/pass_accessor_cpu.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/integrator/path_trace.cpp | 39 | ||||
-rw-r--r-- | intern/cycles/integrator/path_trace_work_cpu.cpp | 8 | ||||
-rw-r--r-- | intern/cycles/integrator/shader_eval.cpp | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/blackbody.h | 2 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/closure.h | 3 | ||||
-rw-r--r-- | intern/cycles/kernel/svm/math_util.h | 4 | ||||
-rw-r--r-- | intern/cycles/kernel/types.h | 7 | ||||
-rw-r--r-- | intern/cycles/kernel/util/color.h | 9 | ||||
-rw-r--r-- | intern/cycles/scene/shader.cpp | 39 | ||||
-rw-r--r-- | intern/cycles/scene/shader.h | 5 | ||||
-rw-r--r-- | intern/cycles/scene/shader_nodes.cpp | 4 | ||||
-rw-r--r-- | intern/cycles/util/tbb.h | 11 | ||||
-rw-r--r-- | intern/cycles/util/transform.h | 15 |
15 files changed, 116 insertions, 38 deletions
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm index 1434b297ddd..9555ca03c8e 100644 --- a/intern/cycles/device/metal/kernel.mm +++ b/intern/cycles/device/metal/kernel.mm @@ -459,7 +459,7 @@ bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type) tbb::task_arena local_arena(max_mtlcompiler_threads); local_arena.execute([&]() { - tbb::parallel_for(int(0), int(DEVICE_KERNEL_NUM), [&](int i) { + parallel_for(int(0), int(DEVICE_KERNEL_NUM), [&](int i) { /* skip megakernel */ if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) { return; diff --git a/intern/cycles/integrator/pass_accessor_cpu.cpp b/intern/cycles/integrator/pass_accessor_cpu.cpp index 509190c8a7e..02260a54bf4 100644 --- a/intern/cycles/integrator/pass_accessor_cpu.cpp +++ b/intern/cycles/integrator/pass_accessor_cpu.cpp @@ -44,7 +44,7 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_float( const int pixel_stride = destination.pixel_stride ? destination.pixel_stride : destination.num_components; - tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) { + parallel_for(0, buffer_params.window_height, [&](int64_t y) { const float *buffer = window_data + y * buffer_row_stride; float *pixel = destination.pixels + (y * buffer_params.width + destination.offset) * pixel_stride; @@ -69,7 +69,7 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba( const int destination_stride = destination.stride != 0 ? destination.stride : buffer_params.width; - tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) { + parallel_for(0, buffer_params.window_height, [&](int64_t y) { const float *buffer = window_data + y * buffer_row_stride; half4 *pixel = dst_start + y * destination_stride; func(kfilm_convert, buffer, pixel, buffer_params.window_width, pass_stride); diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp index f1e70b7f28f..4ecd3b829e8 100644 --- a/intern/cycles/integrator/path_trace.cpp +++ b/intern/cycles/integrator/path_trace.cpp @@ -334,7 +334,7 @@ void PathTrace::init_render_buffers(const RenderWork &render_work) /* Handle initialization scheduled by the render scheduler. */ if (render_work.init_render_buffers) { - tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { + parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { path_trace_work->zero_render_buffers(); }); @@ -355,10 +355,9 @@ void PathTrace::path_trace(RenderWork &render_work) const int num_works = path_trace_works_.size(); - tbb::task_group_context *tbb_ctx = tbb::task::self().group(); - tbb_ctx->capture_fp_settings(); + thread_capture_fp_settings(); - tbb::parallel_for(0, num_works, [&](int i) { + parallel_for(0, num_works, [&](int i) { const double work_start_time = time_dt(); const int num_samples = render_work.path_trace.num_samples; @@ -408,7 +407,7 @@ void PathTrace::adaptive_sample(RenderWork &render_work) const double start_time = time_dt(); uint num_active_pixels = 0; - tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { + parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { const uint num_active_pixels_in_work = path_trace_work->adaptive_sampling_converge_filter_count_active( render_work.adaptive_sampling.threshold, render_work.adaptive_sampling.reset); @@ -486,7 +485,7 @@ void PathTrace::cryptomatte_postprocess(const RenderWork &render_work) } VLOG(3) << "Perform cryptomatte work."; - tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { + parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { path_trace_work->cryptomatte_postproces(); }); } @@ -539,7 +538,7 @@ void PathTrace::denoise(const RenderWork &render_work) if (multi_device_buffers) { multi_device_buffers->copy_from_device(); - tbb::parallel_for_each( + parallel_for_each( path_trace_works_, [&multi_device_buffers](unique_ptr<PathTraceWork> &path_trace_work) { path_trace_work->copy_from_denoised_render_buffers(multi_device_buffers.get()); }); @@ -809,7 +808,7 @@ void PathTrace::tile_buffer_read() } /* Read buffers back from device. */ - tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { + parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { path_trace_work->copy_render_buffers_from_device(); }); @@ -817,7 +816,7 @@ void PathTrace::tile_buffer_read() PathTraceTile tile(*this); if (output_driver_->read_render_tile(tile)) { /* Copy buffers to device again. */ - tbb::parallel_for_each(path_trace_works_, [](unique_ptr<PathTraceWork> &path_trace_work) { + parallel_for_each(path_trace_works_, [](unique_ptr<PathTraceWork> &path_trace_work) { path_trace_work->copy_render_buffers_to_device(); }); } @@ -881,20 +880,20 @@ void PathTrace::progress_set_status(const string &status, const string &substatu void PathTrace::copy_to_render_buffers(RenderBuffers *render_buffers) { - tbb::parallel_for_each(path_trace_works_, - [&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) { - path_trace_work->copy_to_render_buffers(render_buffers); - }); + parallel_for_each(path_trace_works_, + [&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) { + path_trace_work->copy_to_render_buffers(render_buffers); + }); render_buffers->copy_to_device(); } void PathTrace::copy_from_render_buffers(RenderBuffers *render_buffers) { render_buffers->copy_from_device(); - tbb::parallel_for_each(path_trace_works_, - [&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) { - path_trace_work->copy_from_render_buffers(render_buffers); - }); + parallel_for_each(path_trace_works_, + [&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) { + path_trace_work->copy_from_render_buffers(render_buffers); + }); } bool PathTrace::copy_render_tile_from_device() @@ -906,7 +905,7 @@ bool PathTrace::copy_render_tile_from_device() bool success = true; - tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { + parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { if (!success) { return; } @@ -1007,7 +1006,7 @@ bool PathTrace::get_render_tile_pixels(const PassAccessor &pass_accessor, bool success = true; - tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { + parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { if (!success) { return; } @@ -1024,7 +1023,7 @@ bool PathTrace::set_render_tile_pixels(PassAccessor &pass_accessor, { bool success = true; - tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { + parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) { if (!success) { return; } diff --git a/intern/cycles/integrator/path_trace_work_cpu.cpp b/intern/cycles/integrator/path_trace_work_cpu.cpp index 147e273284b..518ef3185f9 100644 --- a/intern/cycles/integrator/path_trace_work_cpu.cpp +++ b/intern/cycles/integrator/path_trace_work_cpu.cpp @@ -73,7 +73,7 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics, tbb::task_arena local_arena = local_tbb_arena_create(device_); local_arena.execute([&]() { - tbb::parallel_for(int64_t(0), total_pixels_num, [&](int64_t work_index) { + parallel_for(int64_t(0), total_pixels_num, [&](int64_t work_index) { if (is_cancel_requested()) { return; } @@ -219,7 +219,7 @@ int PathTraceWorkCPU::adaptive_sampling_converge_filter_count_active(float thres /* Check convergency and do x-filter in a single `parallel_for`, to reduce threading overhead. */ local_arena.execute([&]() { - tbb::parallel_for(full_y, full_y + height, [&](int y) { + parallel_for(full_y, full_y + height, [&](int y) { CPUKernelThreadGlobals *kernel_globals = &kernel_thread_globals_[0]; bool row_converged = true; @@ -243,7 +243,7 @@ int PathTraceWorkCPU::adaptive_sampling_converge_filter_count_active(float thres if (num_active_pixels) { local_arena.execute([&]() { - tbb::parallel_for(full_x, full_x + width, [&](int x) { + parallel_for(full_x, full_x + width, [&](int x) { CPUKernelThreadGlobals *kernel_globals = &kernel_thread_globals_[0]; kernels_.adaptive_sampling_filter_y( kernel_globals, render_buffer, x, full_y, height, offset, stride); @@ -265,7 +265,7 @@ void PathTraceWorkCPU::cryptomatte_postproces() /* Check convergency and do x-filter in a single `parallel_for`, to reduce threading overhead. */ local_arena.execute([&]() { - tbb::parallel_for(0, height, [&](int y) { + parallel_for(0, height, [&](int y) { CPUKernelThreadGlobals *kernel_globals = &kernel_thread_globals_[0]; int pixel_index = y * width; diff --git a/intern/cycles/integrator/shader_eval.cpp b/intern/cycles/integrator/shader_eval.cpp index f5036b4020d..92b9d1c662d 100644 --- a/intern/cycles/integrator/shader_eval.cpp +++ b/intern/cycles/integrator/shader_eval.cpp @@ -92,7 +92,7 @@ bool ShaderEval::eval_cpu(Device *device, tbb::task_arena local_arena(device->info.cpu_threads); local_arena.execute([&]() { - tbb::parallel_for(int64_t(0), work_size, [&](int64_t work_index) { + parallel_for(int64_t(0), work_size, [&](int64_t work_index) { /* TODO: is this fast enough? */ if (progress_.get_cancel()) { success = false; diff --git a/intern/cycles/kernel/svm/blackbody.h b/intern/cycles/kernel/svm/blackbody.h index 1618341b655..af59c2fe747 100644 --- a/intern/cycles/kernel/svm/blackbody.h +++ b/intern/cycles/kernel/svm/blackbody.h @@ -23,7 +23,7 @@ ccl_device_noinline void svm_node_blackbody(KernelGlobals kg, /* Input */ float temperature = stack_load_float(stack, temperature_offset); - float3 color_rgb = svm_math_blackbody_color(temperature); + float3 color_rgb = rec709_to_rgb(kg, svm_math_blackbody_color_rec709(temperature)); stack_store_float3(stack, col_offset, color_rgb); } diff --git a/intern/cycles/kernel/svm/closure.h b/intern/cycles/kernel/svm/closure.h index 88b44cdbacf..305bd404d27 100644 --- a/intern/cycles/kernel/svm/closure.h +++ b/intern/cycles/kernel/svm/closure.h @@ -1111,7 +1111,8 @@ ccl_device_noinline int svm_node_principled_volume(KernelGlobals kg, if (intensity > CLOSURE_WEIGHT_CUTOFF) { float3 blackbody_tint = stack_load_float3(stack, node.w); - float3 bb = blackbody_tint * intensity * svm_math_blackbody_color(T); + float3 bb = blackbody_tint * intensity * + rec709_to_rgb(kg, svm_math_blackbody_color_rec709(T)); emission_setup(sd, bb); } } diff --git a/intern/cycles/kernel/svm/math_util.h b/intern/cycles/kernel/svm/math_util.h index 2a496aee1e1..9f2d9561e26 100644 --- a/intern/cycles/kernel/svm/math_util.h +++ b/intern/cycles/kernel/svm/math_util.h @@ -189,10 +189,8 @@ ccl_device float svm_math(NodeMathType type, float a, float b, float c) } } -ccl_device float3 svm_math_blackbody_color(float t) +ccl_device float3 svm_math_blackbody_color_rec709(float t) { - /* TODO(lukas): Reimplement in XYZ. */ - /* Calculate color in range 800..12000 using an approximation * a/x+bx+c for R and G and ((at + b)t + c)t + d) for B * Max absolute error for RGB is (0.00095, 0.00077, 0.00057), diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h index db499a1e1bc..422285cd346 100644 --- a/intern/cycles/kernel/types.h +++ b/intern/cycles/kernel/types.h @@ -1117,13 +1117,18 @@ typedef struct KernelFilm { float4 xyz_to_g; float4 xyz_to_b; float4 rgb_to_y; + /* Rec709 to rendering color space. */ + float4 rec709_to_r; + float4 rec709_to_g; + float4 rec709_to_b; + int is_rec709; int pass_bake_primitive; int pass_bake_differential; int use_approximate_shadow_catcher; - int pad1, pad2; + int pad1; } KernelFilm; static_assert_align(KernelFilm, 16); diff --git a/intern/cycles/kernel/util/color.h b/intern/cycles/kernel/util/color.h index 95b6b33795d..28978d873d6 100644 --- a/intern/cycles/kernel/util/color.h +++ b/intern/cycles/kernel/util/color.h @@ -14,6 +14,15 @@ ccl_device float3 xyz_to_rgb(KernelGlobals kg, float3 xyz) dot(float4_to_float3(kernel_data.film.xyz_to_b), xyz)); } +ccl_device float3 rec709_to_rgb(KernelGlobals kg, float3 rec709) +{ + return (kernel_data.film.is_rec709) ? + rec709 : + make_float3(dot(float4_to_float3(kernel_data.film.rec709_to_r), rec709), + dot(float4_to_float3(kernel_data.film.rec709_to_g), rec709), + dot(float4_to_float3(kernel_data.film.rec709_to_b), rec709)); +} + ccl_device float linear_rgb_to_gray(KernelGlobals kg, float3 c) { return dot(c, float4_to_float3(kernel_data.film.rgb_to_y)); diff --git a/intern/cycles/scene/shader.cpp b/intern/cycles/scene/shader.cpp index 8a08f2a5be9..e1af92ea8cf 100644 --- a/intern/cycles/scene/shader.cpp +++ b/intern/cycles/scene/shader.cpp @@ -579,6 +579,10 @@ void ShaderManager::device_update_common(Device * /*device*/, kfilm->xyz_to_g = float3_to_float4(xyz_to_g); kfilm->xyz_to_b = float3_to_float4(xyz_to_b); kfilm->rgb_to_y = float3_to_float4(rgb_to_y); + kfilm->rec709_to_r = float3_to_float4(rec709_to_r); + kfilm->rec709_to_g = float3_to_float4(rec709_to_g); + kfilm->rec709_to_b = float3_to_float4(rec709_to_b); + kfilm->is_rec709 = is_rec709; } void ShaderManager::device_free_common(Device *, DeviceScene *dscene, Scene *scene) @@ -740,6 +744,11 @@ float ShaderManager::linear_rgb_to_gray(float3 c) return dot(c, rgb_to_y); } +float3 ShaderManager::rec709_to_scene_linear(float3 c) +{ + return make_float3(dot(rec709_to_r, c), dot(rec709_to_g, c), dot(rec709_to_b, c)); +} + string ShaderManager::get_cryptomatte_materials(Scene *scene) { string manifest = "{"; @@ -802,11 +811,29 @@ void ShaderManager::init_xyz_transforms() { /* Default to ITU-BT.709 in case no appropriate transform found. * Note XYZ here is defined as having a D65 white point. */ - xyz_to_r = make_float3(3.2404542f, -1.5371385f, -0.4985314f); - xyz_to_g = make_float3(-0.9692660f, 1.8760108f, 0.0415560f); - xyz_to_b = make_float3(0.0556434f, -0.2040259f, 1.0572252f); + const Transform xyz_to_rec709 = make_transform(3.2404542f, + -1.5371385f, + -0.4985314f, + 0.0f, + -0.9692660f, + 1.8760108f, + 0.0415560f, + 0.0f, + 0.0556434f, + -0.2040259f, + 1.0572252f, + 0.0f); + + xyz_to_r = float4_to_float3(xyz_to_rec709.x); + xyz_to_g = float4_to_float3(xyz_to_rec709.y); + xyz_to_b = float4_to_float3(xyz_to_rec709.z); rgb_to_y = make_float3(0.2126729f, 0.7151522f, 0.0721750f); + rec709_to_r = make_float3(1.0f, 0.0f, 0.0f); + rec709_to_g = make_float3(0.0f, 1.0f, 0.0f); + rec709_to_b = make_float3(0.0f, 0.0f, 1.0f); + is_rec709 = true; + #ifdef WITH_OCIO /* Get from OpenColorO config if it has the required roles. */ OCIO::ConstConfigRcPtr config = OCIO::GetCurrentConfig(); @@ -857,6 +884,12 @@ void ShaderManager::init_xyz_transforms() const Transform rgb_to_xyz = transform_inverse(xyz_to_rgb); rgb_to_y = float4_to_float3(rgb_to_xyz.y); + + const Transform rec709_to_rgb = xyz_to_rgb * transform_inverse(xyz_to_rec709); + rec709_to_r = float4_to_float3(rec709_to_rgb.x); + rec709_to_g = float4_to_float3(rec709_to_rgb.y); + rec709_to_b = float4_to_float3(rec709_to_rgb.z); + is_rec709 = transform_equal_threshold(xyz_to_rgb, xyz_to_rec709, 0.0001f); #endif } diff --git a/intern/cycles/scene/shader.h b/intern/cycles/scene/shader.h index cbe331d8ec2..274bb9b4fa1 100644 --- a/intern/cycles/scene/shader.h +++ b/intern/cycles/scene/shader.h @@ -208,6 +208,7 @@ class ShaderManager { static void free_memory(); float linear_rgb_to_gray(float3 c); + float3 rec709_to_scene_linear(float3 c); string get_cryptomatte_materials(Scene *scene); @@ -239,6 +240,10 @@ class ShaderManager { float3 xyz_to_g; float3 xyz_to_b; float3 rgb_to_y; + float3 rec709_to_r; + float3 rec709_to_g; + float3 rec709_to_b; + bool is_rec709; }; CCL_NAMESPACE_END diff --git a/intern/cycles/scene/shader_nodes.cpp b/intern/cycles/scene/shader_nodes.cpp index a951a558731..95fccf725f3 100644 --- a/intern/cycles/scene/shader_nodes.cpp +++ b/intern/cycles/scene/shader_nodes.cpp @@ -5763,7 +5763,9 @@ BlackbodyNode::BlackbodyNode() : ShaderNode(get_node_type()) void BlackbodyNode::constant_fold(const ConstantFolder &folder) { if (folder.all_inputs_constant()) { - folder.make_constant(svm_math_blackbody_color(temperature)); + const float3 rgb_rec709 = svm_math_blackbody_color_rec709(temperature); + const float3 rgb = folder.scene->shader_manager->rec709_to_scene_linear(rgb_rec709); + folder.make_constant(rgb); } } diff --git a/intern/cycles/util/tbb.h b/intern/cycles/util/tbb.h index 7105ddda0f8..948bf2b3e0e 100644 --- a/intern/cycles/util/tbb.h +++ b/intern/cycles/util/tbb.h @@ -25,6 +25,17 @@ CCL_NAMESPACE_BEGIN using tbb::blocked_range; using tbb::enumerable_thread_specific; using tbb::parallel_for; +using tbb::parallel_for_each; + +static inline void thread_capture_fp_settings() +{ +#if TBB_INTERFACE_VERSION_MAJOR >= 12 + tbb::task_group_context *ctx = tbb::task::current_context(); +#else + tbb::task_group_context *ctx = tbb::task::self().group(); +#endif + ctx->capture_fp_settings(); +} static inline void parallel_for_cancel() { diff --git a/intern/cycles/util/transform.h b/intern/cycles/util/transform.h index 371dbb0f4aa..477272f0ba6 100644 --- a/intern/cycles/util/transform.h +++ b/intern/cycles/util/transform.h @@ -285,6 +285,21 @@ ccl_device_inline bool operator!=(const Transform &A, const Transform &B) return !(A == B); } +ccl_device_inline bool transform_equal_threshold(const Transform &A, + const Transform &B, + const float threshold) +{ + for (int x = 0; x < 3; x++) { + for (int y = 0; y < 4; y++) { + if (fabsf(A[x][y] - B[x][y]) > threshold) { + return false; + } + } + } + + return true; +} + ccl_device_inline float3 transform_get_column(const Transform *t, int column) { return make_float3(t->x[column], t->y[column], t->z[column]); |