Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2021-11-05 23:01:23 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-11-06 00:04:36 +0300
commit97ff37bf54474efbce39653a1387ad55091d4964 (patch)
tree58ff9592807dbd98d126b179627e5c56f5309956 /intern/cycles/integrator
parentd1a9425a2fde32b6786b333ab55661da507e818b (diff)
Cycles: perform CPU film reading in the kernel, to use AVX2 half conversion
Adds a bunch of CPU kernel function to process on row of pixels, and use those instead of calling unoptimized implementations. Fixes T92598
Diffstat (limited to 'intern/cycles/integrator')
-rw-r--r--intern/cycles/integrator/pass_accessor_cpu.cpp106
-rw-r--r--intern/cycles/integrator/pass_accessor_cpu.h32
-rw-r--r--intern/cycles/integrator/path_trace_work_cpu.cpp2
-rw-r--r--intern/cycles/integrator/shader_eval.cpp2
4 files changed, 45 insertions, 97 deletions
diff --git a/intern/cycles/integrator/pass_accessor_cpu.cpp b/intern/cycles/integrator/pass_accessor_cpu.cpp
index 820da757be0..77ca332d142 100644
--- a/intern/cycles/integrator/pass_accessor_cpu.cpp
+++ b/intern/cycles/integrator/pass_accessor_cpu.cpp
@@ -14,9 +14,12 @@
* limitations under the License.
*/
+#include "device/device.h"
+
#include "integrator/pass_accessor_cpu.h"
#include "session/buffers.h"
+
#include "util/log.h"
#include "util/tbb.h"
@@ -33,70 +36,16 @@ CCL_NAMESPACE_BEGIN
* Kernel processing.
*/
-template<typename Processor>
-inline void PassAccessorCPU::run_get_pass_kernel_processor(const RenderBuffers *render_buffers,
- const BufferParams &buffer_params,
- const Destination &destination,
- const Processor &processor) const
-{
- KernelFilmConvert kfilm_convert;
- init_kernel_film_convert(&kfilm_convert, buffer_params, destination);
-
- if (destination.pixels) {
- /* NOTE: No overlays are applied since they are not used for final renders.
- * Can be supported via some sort of specialization to avoid code duplication. */
-
- run_get_pass_kernel_processor_float(
- &kfilm_convert, render_buffers, buffer_params, destination, processor);
- }
-
- if (destination.pixels_half_rgba) {
- /* TODO(sergey): Consider adding specialization to avoid per-pixel overlay check. */
-
- if (destination.num_components == 1) {
- run_get_pass_kernel_processor_half_rgba(&kfilm_convert,
- render_buffers,
- buffer_params,
- destination,
- [&processor](const KernelFilmConvert *kfilm_convert,
- ccl_global const float *buffer,
- float *pixel_rgba) {
- float pixel;
- processor(kfilm_convert, buffer, &pixel);
-
- pixel_rgba[0] = pixel;
- pixel_rgba[1] = pixel;
- pixel_rgba[2] = pixel;
- pixel_rgba[3] = 1.0f;
- });
- }
- else if (destination.num_components == 3) {
- run_get_pass_kernel_processor_half_rgba(&kfilm_convert,
- render_buffers,
- buffer_params,
- destination,
- [&processor](const KernelFilmConvert *kfilm_convert,
- ccl_global const float *buffer,
- float *pixel_rgba) {
- processor(kfilm_convert, buffer, pixel_rgba);
- pixel_rgba[3] = 1.0f;
- });
- }
- else if (destination.num_components == 4) {
- run_get_pass_kernel_processor_half_rgba(
- &kfilm_convert, render_buffers, buffer_params, destination, processor);
- }
- }
-}
-
-template<typename Processor>
inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
const KernelFilmConvert *kfilm_convert,
const RenderBuffers *render_buffers,
const BufferParams &buffer_params,
const Destination &destination,
- const Processor &processor) const
+ const CPUKernels::FilmConvertFunction func) const
{
+ /* NOTE: No overlays are applied since they are not used for final renders.
+ * Can be supported via some sort of specialization to avoid code duplication. */
+
DCHECK_EQ(destination.stride, 0) << "Custom stride for float destination is not implemented.";
const int64_t pass_stride = buffer_params.pass_stride;
@@ -112,21 +61,16 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_float(
const float *buffer = window_data + y * buffer_row_stride;
float *pixel = destination.pixels +
(y * buffer_params.width + destination.offset) * pixel_stride;
-
- for (int64_t x = 0; x < buffer_params.window_width;
- ++x, buffer += pass_stride, pixel += pixel_stride) {
- processor(kfilm_convert, buffer, pixel);
- }
+ func(kfilm_convert, buffer, pixel, buffer_params.window_width, pass_stride, pixel_stride);
});
}
-template<typename Processor>
inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
const KernelFilmConvert *kfilm_convert,
const RenderBuffers *render_buffers,
const BufferParams &buffer_params,
const Destination &destination,
- const Processor &processor) const
+ const CPUKernels::FilmConvertHalfRGBAFunction func) const
{
const int64_t pass_stride = buffer_params.pass_stride;
const int64_t buffer_row_stride = buffer_params.stride * buffer_params.pass_stride;
@@ -141,16 +85,7 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
tbb::parallel_for(0, buffer_params.window_height, [&](int64_t y) {
const float *buffer = window_data + y * buffer_row_stride;
half4 *pixel = dst_start + y * destination_stride;
- for (int64_t x = 0; x < buffer_params.window_width; ++x, buffer += pass_stride, ++pixel) {
-
- float pixel_rgba[4];
- processor(kfilm_convert, buffer, pixel_rgba);
-
- film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba);
-
- *pixel = float4_to_half4_display(
- make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3]));
- }
+ func(kfilm_convert, buffer, pixel, buffer_params.window_width, pass_stride);
});
}
@@ -163,8 +98,25 @@ inline void PassAccessorCPU::run_get_pass_kernel_processor_half_rgba(
const BufferParams &buffer_params, \
const Destination &destination) const \
{ \
- run_get_pass_kernel_processor( \
- render_buffers, buffer_params, destination, film_get_pass_pixel_##pass); \
+ const CPUKernels &kernels = Device::get_cpu_kernels(); \
+ KernelFilmConvert kfilm_convert; \
+ init_kernel_film_convert(&kfilm_convert, buffer_params, destination); \
+\
+ if (destination.pixels) { \
+ run_get_pass_kernel_processor_float(&kfilm_convert, \
+ render_buffers, \
+ buffer_params, \
+ destination, \
+ kernels.film_convert_##pass); \
+ } \
+\
+ if (destination.pixels_half_rgba) { \
+ run_get_pass_kernel_processor_half_rgba(&kfilm_convert, \
+ render_buffers, \
+ buffer_params, \
+ destination, \
+ kernels.film_convert_half_rgba_##pass); \
+ } \
}
/* Float (scalar) passes. */
diff --git a/intern/cycles/integrator/pass_accessor_cpu.h b/intern/cycles/integrator/pass_accessor_cpu.h
index 0313dc5bb0d..9ed38ab256e 100644
--- a/intern/cycles/integrator/pass_accessor_cpu.h
+++ b/intern/cycles/integrator/pass_accessor_cpu.h
@@ -16,6 +16,8 @@
#pragma once
+#include "device/cpu/kernel.h"
+
#include "integrator/pass_accessor.h"
CCL_NAMESPACE_BEGIN
@@ -28,25 +30,19 @@ class PassAccessorCPU : public PassAccessor {
using PassAccessor::PassAccessor;
protected:
- template<typename Processor>
- inline void run_get_pass_kernel_processor(const RenderBuffers *render_buffers,
- const BufferParams &buffer_params,
- const Destination &destination,
- const Processor &processor) const;
-
- template<typename Processor>
- inline void run_get_pass_kernel_processor_float(const KernelFilmConvert *kfilm_convert,
- const RenderBuffers *render_buffers,
- const BufferParams &buffer_params,
- const Destination &destination,
- const Processor &processor) const;
+ inline void run_get_pass_kernel_processor_float(
+ const KernelFilmConvert *kfilm_convert,
+ const RenderBuffers *render_buffers,
+ const BufferParams &buffer_params,
+ const Destination &destination,
+ const CPUKernels::FilmConvertFunction func) const;
- template<typename Processor>
- inline void run_get_pass_kernel_processor_half_rgba(const KernelFilmConvert *kfilm_convert,
- const RenderBuffers *render_buffers,
- const BufferParams &buffer_params,
- const Destination &destination,
- const Processor &processor) const;
+ inline void run_get_pass_kernel_processor_half_rgba(
+ const KernelFilmConvert *kfilm_convert,
+ const RenderBuffers *render_buffers,
+ const BufferParams &buffer_params,
+ const Destination &destination,
+ const CPUKernels::FilmConvertHalfRGBAFunction func) const;
#define DECLARE_PASS_ACCESSOR(pass) \
virtual void get_pass_##pass(const RenderBuffers *render_buffers, \
diff --git a/intern/cycles/integrator/path_trace_work_cpu.cpp b/intern/cycles/integrator/path_trace_work_cpu.cpp
index 541a7eca02f..36ce2be9f6d 100644
--- a/intern/cycles/integrator/path_trace_work_cpu.cpp
+++ b/intern/cycles/integrator/path_trace_work_cpu.cpp
@@ -58,7 +58,7 @@ PathTraceWorkCPU::PathTraceWorkCPU(Device *device,
DeviceScene *device_scene,
bool *cancel_requested_flag)
: PathTraceWork(device, film, device_scene, cancel_requested_flag),
- kernels_(*(device->get_cpu_kernels()))
+ kernels_(Device::get_cpu_kernels())
{
DCHECK_EQ(device->info.type, DEVICE_CPU);
}
diff --git a/intern/cycles/integrator/shader_eval.cpp b/intern/cycles/integrator/shader_eval.cpp
index 42cbf87f254..9ec530c81df 100644
--- a/intern/cycles/integrator/shader_eval.cpp
+++ b/intern/cycles/integrator/shader_eval.cpp
@@ -96,7 +96,7 @@ bool ShaderEval::eval_cpu(Device *device,
device->get_cpu_kernel_thread_globals(kernel_thread_globals);
/* Find required kernel function. */
- const CPUKernels &kernels = *(device->get_cpu_kernels());
+ const CPUKernels &kernels = Device::get_cpu_kernels();
/* Simple parallel_for over all work items. */
KernelShaderEvalInput *input_data = input.data();