Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'intern/cycles')
-rw-r--r--intern/cycles/device/metal/kernel.mm2
-rw-r--r--intern/cycles/integrator/pass_accessor_cpu.cpp4
-rw-r--r--intern/cycles/integrator/path_trace.cpp39
-rw-r--r--intern/cycles/integrator/path_trace_work_cpu.cpp8
-rw-r--r--intern/cycles/integrator/shader_eval.cpp2
-rw-r--r--intern/cycles/kernel/svm/blackbody.h2
-rw-r--r--intern/cycles/kernel/svm/closure.h3
-rw-r--r--intern/cycles/kernel/svm/math_util.h4
-rw-r--r--intern/cycles/kernel/types.h7
-rw-r--r--intern/cycles/kernel/util/color.h9
-rw-r--r--intern/cycles/scene/shader.cpp39
-rw-r--r--intern/cycles/scene/shader.h5
-rw-r--r--intern/cycles/scene/shader_nodes.cpp4
-rw-r--r--intern/cycles/util/tbb.h11
-rw-r--r--intern/cycles/util/transform.h15
15 files changed, 116 insertions, 38 deletions
diff --git a/intern/cycles/device/metal/kernel.mm b/intern/cycles/device/metal/kernel.mm
index 1434b297ddd..9555ca03c8e 100644
--- a/intern/cycles/device/metal/kernel.mm
+++ b/intern/cycles/device/metal/kernel.mm
@@ -459,7 +459,7 @@ bool MetalDeviceKernels::load(MetalDevice *device, int kernel_type)
tbb::task_arena local_arena(max_mtlcompiler_threads);
local_arena.execute([&]() {
- tbb::parallel_for(int(0), int(DEVICE_KERNEL_NUM), [&](int i) {
+ parallel_for(int(0), int(DEVICE_KERNEL_NUM), [&](int i) {
/* skip megakernel */
if (i == DEVICE_KERNEL_INTEGRATOR_MEGAKERNEL) {
return;
diff --git a/intern/cycles/integrator/pass_accessor_cpu.cpp b/intern/cycles/integrator/pass_accessor_cpu.cpp
index 509190c8a7e..02260a54bf4 100644
--- a/intern/cycles/integrator/pass_accessor_cpu.cpp
+++ b/intern/cycles/integrator/pass_accessor_cpu.cpp
@@ -44,7 +44,7 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
const int pixel_stride = destination.pixel_stride ? destination.pixel_stride :
destination.num_components;
- tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) {
+ parallel_for(0, buffer_params.window_height, [&](int64_t y) {
const float *buffer = window_data + y * buffer_row_stride;
float *pixel = destination.pixels +
(y * buffer_params.width + destination.offset) * pixel_stride;
@@ -69,7 +69,7 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
const int destination_stride = destination.stride != 0 ? destination.stride :
buffer_params.width;
- tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) {
+ parallel_for(0, buffer_params.window_height, [&](int64_t y) {
const float *buffer = window_data + y * buffer_row_stride;
half4 *pixel = dst_start + y * destination_stride;
func(kfilm_convert, buffer, pixel, buffer_params.window_width, pass_stride);
diff --git a/intern/cycles/integrator/path_trace.cpp b/intern/cycles/integrator/path_trace.cpp
index f1e70b7f28f..4ecd3b829e8 100644
--- a/intern/cycles/integrator/path_trace.cpp
+++ b/intern/cycles/integrator/path_trace.cpp
@@ -334,7 +334,7 @@ void PathTrace::init_render_buffers(const RenderWork &render_work)
/* Handle initialization scheduled by the render scheduler. */
if (render_work.init_render_buffers) {
- tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
path_trace_work->zero_render_buffers();
});
@@ -355,10 +355,9 @@ void PathTrace::path_trace(RenderWork &render_work)
const int num_works = path_trace_works_.size();
- tbb::task_group_context *tbb_ctx = tbb::task::self().group();
- tbb_ctx->capture_fp_settings();
+ thread_capture_fp_settings();
- tbb::parallel_for(0, num_works, [&](int i) {
+ parallel_for(0, num_works, [&](int i) {
const double work_start_time = time_dt();
const int num_samples = render_work.path_trace.num_samples;
@@ -408,7 +407,7 @@ void PathTrace::adaptive_sample(RenderWork &render_work)
const double start_time = time_dt();
uint num_active_pixels = 0;
- tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
const uint num_active_pixels_in_work =
path_trace_work->adaptive_sampling_converge_filter_count_active(
render_work.adaptive_sampling.threshold, render_work.adaptive_sampling.reset);
@@ -486,7 +485,7 @@ void PathTrace::cryptomatte_postprocess(const RenderWork &render_work)
}
VLOG(3) << "Perform cryptomatte work.";
- tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
path_trace_work->cryptomatte_postproces();
});
}
@@ -539,7 +538,7 @@ void PathTrace::denoise(const RenderWork &render_work)
if (multi_device_buffers) {
multi_device_buffers->copy_from_device();
- tbb::parallel_for_each(
+ parallel_for_each(
path_trace_works_, [&multi_device_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
path_trace_work->copy_from_denoised_render_buffers(multi_device_buffers.get());
});
@@ -809,7 +808,7 @@ void PathTrace::tile_buffer_read()
}
/* Read buffers back from device. */
- tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
path_trace_work->copy_render_buffers_from_device();
});
@@ -817,7 +816,7 @@ void PathTrace::tile_buffer_read()
PathTraceTile tile(*this);
if (output_driver_->read_render_tile(tile)) {
/* Copy buffers to device again. */
- tbb::parallel_for_each(path_trace_works_, [](unique_ptr<PathTraceWork> &path_trace_work) {
+ parallel_for_each(path_trace_works_, [](unique_ptr<PathTraceWork> &path_trace_work) {
path_trace_work->copy_render_buffers_to_device();
});
}
@@ -881,20 +880,20 @@ void PathTrace::progress_set_status(const string &status, const string &substatu
void PathTrace::copy_to_render_buffers(RenderBuffers *render_buffers)
{
- tbb::parallel_for_each(path_trace_works_,
- [&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
- path_trace_work->copy_to_render_buffers(render_buffers);
- });
+ parallel_for_each(path_trace_works_,
+ [&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
+ path_trace_work->copy_to_render_buffers(render_buffers);
+ });
render_buffers->copy_to_device();
}
void PathTrace::copy_from_render_buffers(RenderBuffers *render_buffers)
{
render_buffers->copy_from_device();
- tbb::parallel_for_each(path_trace_works_,
- [&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
- path_trace_work->copy_from_render_buffers(render_buffers);
- });
+ parallel_for_each(path_trace_works_,
+ [&render_buffers](unique_ptr<PathTraceWork> &path_trace_work) {
+ path_trace_work->copy_from_render_buffers(render_buffers);
+ });
}
bool PathTrace::copy_render_tile_from_device()
@@ -906,7 +905,7 @@ bool PathTrace::copy_render_tile_from_device()
bool success = true;
- tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
if (!success) {
return;
}
@@ -1007,7 +1006,7 @@ bool PathTrace::get_render_tile_pixels(const PassAccessor &pass_accessor,
bool success = true;
- tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
if (!success) {
return;
}
@@ -1024,7 +1023,7 @@ bool PathTrace::set_render_tile_pixels(PassAccessor &pass_accessor,
{
bool success = true;
- tbb::parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
+ parallel_for_each(path_trace_works_, [&](unique_ptr<PathTraceWork> &path_trace_work) {
if (!success) {
return;
}
diff --git a/intern/cycles/integrator/path_trace_work_cpu.cpp b/intern/cycles/integrator/path_trace_work_cpu.cpp
index 147e273284b..518ef3185f9 100644
--- a/intern/cycles/integrator/path_trace_work_cpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_cpu.cpp
@@ -73,7 +73,7 @@ void PathTraceWorkCPU::render_samples(RenderStatistics &statistics,
tbb::task_arena local_arena = local_tbb_arena_create(device_);
local_arena.execute([&]() {
- tbb::parallel_for(int64_t(0), total_pixels_num, [&](int64_t work_index) {
+ parallel_for(int64_t(0), total_pixels_num, [&](int64_t work_index) {
if (is_cancel_requested()) {
return;
}
@@ -219,7 +219,7 @@ int PathTraceWorkCPU::adaptive_sampling_converge_filter_count_active(float thres
/* Check convergency and do x-filter in a single `parallel_for`, to reduce threading overhead. */
local_arena.execute([&]() {
- tbb::parallel_for(full_y, full_y + height, [&](int y) {
+ parallel_for(full_y, full_y + height, [&](int y) {
CPUKernelThreadGlobals *kernel_globals = &kernel_thread_globals_[0];
bool row_converged = true;
@@ -243,7 +243,7 @@ int PathTraceWorkCPU::adaptive_sampling_converge_filter_count_active(float thres
if (num_active_pixels) {
local_arena.execute([&]() {
- tbb::parallel_for(full_x, full_x + width, [&](int x) {
+ parallel_for(full_x, full_x + width, [&](int x) {
CPUKernelThreadGlobals *kernel_globals = &kernel_thread_globals_[0];
kernels_.adaptive_sampling_filter_y(
kernel_globals, render_buffer, x, full_y, height, offset, stride);
@@ -265,7 +265,7 @@ void PathTraceWorkCPU::cryptomatte_postproces()
/* Check convergency and do x-filter in a single `parallel_for`, to reduce threading overhead. */
local_arena.execute([&]() {
- tbb::parallel_for(0, height, [&](int y) {
+ parallel_for(0, height, [&](int y) {
CPUKernelThreadGlobals *kernel_globals = &kernel_thread_globals_[0];
int pixel_index = y * width;
diff --git a/intern/cycles/integrator/shader_eval.cpp b/intern/cycles/integrator/shader_eval.cpp
index f5036b4020d..92b9d1c662d 100644
--- a/intern/cycles/integrator/shader_eval.cpp
+++ b/intern/cycles/integrator/shader_eval.cpp
@@ -92,7 +92,7 @@ bool ShaderEval::eval_cpu(Device *device,
tbb::task_arena local_arena(device->info.cpu_threads);
local_arena.execute([&]() {
- tbb::parallel_for(int64_t(0), work_size, [&](int64_t work_index) {
+ parallel_for(int64_t(0), work_size, [&](int64_t work_index) {
/* TODO: is this fast enough? */
if (progress_.get_cancel()) {
success = false;
diff --git a/intern/cycles/kernel/svm/blackbody.h b/intern/cycles/kernel/svm/blackbody.h
index 1618341b655..af59c2fe747 100644
--- a/intern/cycles/kernel/svm/blackbody.h
+++ b/intern/cycles/kernel/svm/blackbody.h
@@ -23,7 +23,7 @@ ccl_device_noinline void svm_node_blackbody(KernelGlobals kg,
/* Input */
float temperature = stack_load_float(stack, temperature_offset);
- float3 color_rgb = svm_math_blackbody_color(temperature);
+ float3 color_rgb = rec709_to_rgb(kg, svm_math_blackbody_color_rec709(temperature));
stack_store_float3(stack, col_offset, color_rgb);
}
diff --git a/intern/cycles/kernel/svm/closure.h b/intern/cycles/kernel/svm/closure.h
index 88b44cdbacf..305bd404d27 100644
--- a/intern/cycles/kernel/svm/closure.h
+++ b/intern/cycles/kernel/svm/closure.h
@@ -1111,7 +1111,8 @@ ccl_device_noinline int svm_node_principled_volume(KernelGlobals kg,
if (intensity > CLOSURE_WEIGHT_CUTOFF) {
float3 blackbody_tint = stack_load_float3(stack, node.w);
- float3 bb = blackbody_tint * intensity * svm_math_blackbody_color(T);
+ float3 bb = blackbody_tint * intensity *
+ rec709_to_rgb(kg, svm_math_blackbody_color_rec709(T));
emission_setup(sd, bb);
}
}
diff --git a/intern/cycles/kernel/svm/math_util.h b/intern/cycles/kernel/svm/math_util.h
index 2a496aee1e1..9f2d9561e26 100644
--- a/intern/cycles/kernel/svm/math_util.h
+++ b/intern/cycles/kernel/svm/math_util.h
@@ -189,10 +189,8 @@ ccl_device float svm_math(NodeMathType type, float a, float b, float c)
}
}
-ccl_device float3 svm_math_blackbody_color(float t)
+ccl_device float3 svm_math_blackbody_color_rec709(float t)
{
- /* TODO(lukas): Reimplement in XYZ. */
-
/* Calculate color in range 800..12000 using an approximation
* a/x+bx+c for R and G and ((at + b)t + c)t + d) for B
* Max absolute error for RGB is (0.00095, 0.00077, 0.00057),
diff --git a/intern/cycles/kernel/types.h b/intern/cycles/kernel/types.h
index db499a1e1bc..422285cd346 100644
--- a/intern/cycles/kernel/types.h
+++ b/intern/cycles/kernel/types.h
@@ -1117,13 +1117,18 @@ typedef struct KernelFilm {
float4 xyz_to_g;
float4 xyz_to_b;
float4 rgb_to_y;
+ /* Rec709 to rendering color space. */
+ float4 rec709_to_r;
+ float4 rec709_to_g;
+ float4 rec709_to_b;
+ int is_rec709;
int pass_bake_primitive;
int pass_bake_differential;
int use_approximate_shadow_catcher;
- int pad1, pad2;
+ int pad1;
} KernelFilm;
static_assert_align(KernelFilm, 16);
diff --git a/intern/cycles/kernel/util/color.h b/intern/cycles/kernel/util/color.h
index 95b6b33795d..28978d873d6 100644
--- a/intern/cycles/kernel/util/color.h
+++ b/intern/cycles/kernel/util/color.h
@@ -14,6 +14,15 @@ ccl_device float3 xyz_to_rgb(KernelGlobals kg, float3 xyz)
dot(float4_to_float3(kernel_data.film.xyz_to_b), xyz));
}
+ccl_device float3 rec709_to_rgb(KernelGlobals kg, float3 rec709)
+{
+ return (kernel_data.film.is_rec709) ?
+ rec709 :
+ make_float3(dot(float4_to_float3(kernel_data.film.rec709_to_r), rec709),
+ dot(float4_to_float3(kernel_data.film.rec709_to_g), rec709),
+ dot(float4_to_float3(kernel_data.film.rec709_to_b), rec709));
+}
+
ccl_device float linear_rgb_to_gray(KernelGlobals kg, float3 c)
{
return dot(c, float4_to_float3(kernel_data.film.rgb_to_y));
diff --git a/intern/cycles/scene/shader.cpp b/intern/cycles/scene/shader.cpp
index 8a08f2a5be9..e1af92ea8cf 100644
--- a/intern/cycles/scene/shader.cpp
+++ b/intern/cycles/scene/shader.cpp
@@ -579,6 +579,10 @@ void ShaderManager::device_update_common(Device * /*device*/,
kfilm->xyz_to_g = float3_to_float4(xyz_to_g);
kfilm->xyz_to_b = float3_to_float4(xyz_to_b);
kfilm->rgb_to_y = float3_to_float4(rgb_to_y);
+ kfilm->rec709_to_r = float3_to_float4(rec709_to_r);
+ kfilm->rec709_to_g = float3_to_float4(rec709_to_g);
+ kfilm->rec709_to_b = float3_to_float4(rec709_to_b);
+ kfilm->is_rec709 = is_rec709;
}
void ShaderManager::device_free_common(Device *, DeviceScene *dscene, Scene *scene)
@@ -740,6 +744,11 @@ float ShaderManager::linear_rgb_to_gray(float3 c)
return dot(c, rgb_to_y);
}
+float3 ShaderManager::rec709_to_scene_linear(float3 c)
+{
+ return make_float3(dot(rec709_to_r, c), dot(rec709_to_g, c), dot(rec709_to_b, c));
+}
+
string ShaderManager::get_cryptomatte_materials(Scene *scene)
{
string manifest = "{";
@@ -802,11 +811,29 @@ void ShaderManager::init_xyz_transforms()
{
/* Default to ITU-BT.709 in case no appropriate transform found.
* Note XYZ here is defined as having a D65 white point. */
- xyz_to_r = make_float3(3.2404542f, -1.5371385f, -0.4985314f);
- xyz_to_g = make_float3(-0.9692660f, 1.8760108f, 0.0415560f);
- xyz_to_b = make_float3(0.0556434f, -0.2040259f, 1.0572252f);
+ const Transform xyz_to_rec709 = make_transform(3.2404542f,
+ -1.5371385f,
+ -0.4985314f,
+ 0.0f,
+ -0.9692660f,
+ 1.8760108f,
+ 0.0415560f,
+ 0.0f,
+ 0.0556434f,
+ -0.2040259f,
+ 1.0572252f,
+ 0.0f);
+
+ xyz_to_r = float4_to_float3(xyz_to_rec709.x);
+ xyz_to_g = float4_to_float3(xyz_to_rec709.y);
+ xyz_to_b = float4_to_float3(xyz_to_rec709.z);
rgb_to_y = make_float3(0.2126729f, 0.7151522f, 0.0721750f);
+ rec709_to_r = make_float3(1.0f, 0.0f, 0.0f);
+ rec709_to_g = make_float3(0.0f, 1.0f, 0.0f);
+ rec709_to_b = make_float3(0.0f, 0.0f, 1.0f);
+ is_rec709 = true;
+
#ifdef WITH_OCIO
/* Get from OpenColorO config if it has the required roles. */
OCIO::ConstConfigRcPtr config = OCIO::GetCurrentConfig();
@@ -857,6 +884,12 @@ void ShaderManager::init_xyz_transforms()
const Transform rgb_to_xyz = transform_inverse(xyz_to_rgb);
rgb_to_y = float4_to_float3(rgb_to_xyz.y);
+
+ const Transform rec709_to_rgb = xyz_to_rgb * transform_inverse(xyz_to_rec709);
+ rec709_to_r = float4_to_float3(rec709_to_rgb.x);
+ rec709_to_g = float4_to_float3(rec709_to_rgb.y);
+ rec709_to_b = float4_to_float3(rec709_to_rgb.z);
+ is_rec709 = transform_equal_threshold(xyz_to_rgb, xyz_to_rec709, 0.0001f);
#endif
}
diff --git a/intern/cycles/scene/shader.h b/intern/cycles/scene/shader.h
index cbe331d8ec2..274bb9b4fa1 100644
--- a/intern/cycles/scene/shader.h
+++ b/intern/cycles/scene/shader.h
@@ -208,6 +208,7 @@ class ShaderManager {
static void free_memory();
float linear_rgb_to_gray(float3 c);
+ float3 rec709_to_scene_linear(float3 c);
string get_cryptomatte_materials(Scene *scene);
@@ -239,6 +240,10 @@ class ShaderManager {
float3 xyz_to_g;
float3 xyz_to_b;
float3 rgb_to_y;
+ float3 rec709_to_r;
+ float3 rec709_to_g;
+ float3 rec709_to_b;
+ bool is_rec709;
};
CCL_NAMESPACE_END
diff --git a/intern/cycles/scene/shader_nodes.cpp b/intern/cycles/scene/shader_nodes.cpp
index a951a558731..95fccf725f3 100644
--- a/intern/cycles/scene/shader_nodes.cpp
+++ b/intern/cycles/scene/shader_nodes.cpp
@@ -5763,7 +5763,9 @@ BlackbodyNode::BlackbodyNode() : ShaderNode(get_node_type())
void BlackbodyNode::constant_fold(const ConstantFolder &folder)
{
if (folder.all_inputs_constant()) {
- folder.make_constant(svm_math_blackbody_color(temperature));
+ const float3 rgb_rec709 = svm_math_blackbody_color_rec709(temperature);
+ const float3 rgb = folder.scene->shader_manager->rec709_to_scene_linear(rgb_rec709);
+ folder.make_constant(rgb);
}
}
diff --git a/intern/cycles/util/tbb.h b/intern/cycles/util/tbb.h
index 7105ddda0f8..948bf2b3e0e 100644
--- a/intern/cycles/util/tbb.h
+++ b/intern/cycles/util/tbb.h
@@ -25,6 +25,17 @@ CCL_NAMESPACE_BEGIN
using tbb::blocked_range;
using tbb::enumerable_thread_specific;
using tbb::parallel_for;
+using tbb::parallel_for_each;
+
+static inline void thread_capture_fp_settings()
+{
+#if TBB_INTERFACE_VERSION_MAJOR >= 12
+ tbb::task_group_context *ctx = tbb::task::current_context();
+#else
+ tbb::task_group_context *ctx = tbb::task::self().group();
+#endif
+ ctx->capture_fp_settings();
+}
static inline void parallel_for_cancel()
{
diff --git a/intern/cycles/util/transform.h b/intern/cycles/util/transform.h
index 371dbb0f4aa..477272f0ba6 100644
--- a/intern/cycles/util/transform.h
+++ b/intern/cycles/util/transform.h
@@ -285,6 +285,21 @@ ccl_device_inline bool operator!=(const Transform &A, const Transform &B)
return !(A == B);
}
+ccl_device_inline bool transform_equal_threshold(const Transform &A,
+ const Transform &B,
+ const float threshold)
+{
+ for (int x = 0; x < 3; x++) {
+ for (int y = 0; y < 4; y++) {
+ if (fabsf(A[x][y] - B[x][y]) > threshold) {
+ return false;
+ }
+ }
+ }
+
+ return true;
+}
+
ccl_device_inline float3 transform_get_column(const Transform *t, int column)
{
return make_float3(t->x[column], t->y[column], t->z[column]);