Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2021-11-05 23:01:23 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-11-06 00:04:36 +0300
commit97ff37bf54474efbce39653a1387ad55091d4964 (patch)
tree58ff9592807dbd98d126b179627e5c56f5309956 /intern/cycles/kernel/device/cpu
parentd1a9425a2fde32b6786b333ab55661da507e818b (diff)
Cycles: perform CPU film reading in the kernel, to use AVX2 half conversion
Adds a bunch of CPU kernel function to process on row of pixels, and use those instead of calling unoptimized implementations. Fixes T92598
Diffstat (limited to 'intern/cycles/kernel/device/cpu')
-rw-r--r--intern/cycles/kernel/device/cpu/kernel.h1
-rw-r--r--intern/cycles/kernel/device/cpu/kernel_arch.h31
-rw-r--r--intern/cycles/kernel/device/cpu/kernel_arch_impl.h81
3 files changed, 112 insertions, 1 deletions
diff --git a/intern/cycles/kernel/device/cpu/kernel.h b/intern/cycles/kernel/device/cpu/kernel.h
index c49d7ca445a..6af8094b1ea 100644
--- a/intern/cycles/kernel/device/cpu/kernel.h
+++ b/intern/cycles/kernel/device/cpu/kernel.h
@@ -18,6 +18,7 @@
/* CPU Kernel Interface */
+#include "util/half.h"
#include "util/types.h"
#include "kernel/types.h"
diff --git a/intern/cycles/kernel/device/cpu/kernel_arch.h b/intern/cycles/kernel/device/cpu/kernel_arch.h
index 432ac5e15a9..2f9a3f7c59d 100644
--- a/intern/cycles/kernel/device/cpu/kernel_arch.h
+++ b/intern/cycles/kernel/device/cpu/kernel_arch.h
@@ -52,6 +52,37 @@ KERNEL_INTEGRATOR_SHADE_FUNCTION(megakernel);
#undef KERNEL_INTEGRATOR_INIT_FUNCTION
#undef KERNEL_INTEGRATOR_SHADE_FUNCTION
+#define KERNEL_FILM_CONVERT_FUNCTION(name) \
+ void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
+ const float *buffer, \
+ float *pixel, \
+ const int width, \
+ const int buffer_stride, \
+ const int pixel_stride); \
+ void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
+ const KernelFilmConvert *kfilm_convert, \
+ const float *buffer, \
+ half4 *pixel, \
+ const int width, \
+ const int buffer_stride);
+
+KERNEL_FILM_CONVERT_FUNCTION(depth)
+KERNEL_FILM_CONVERT_FUNCTION(mist)
+KERNEL_FILM_CONVERT_FUNCTION(sample_count)
+KERNEL_FILM_CONVERT_FUNCTION(float)
+
+KERNEL_FILM_CONVERT_FUNCTION(light_path)
+KERNEL_FILM_CONVERT_FUNCTION(float3)
+
+KERNEL_FILM_CONVERT_FUNCTION(motion)
+KERNEL_FILM_CONVERT_FUNCTION(cryptomatte)
+KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher)
+KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow)
+KERNEL_FILM_CONVERT_FUNCTION(combined)
+KERNEL_FILM_CONVERT_FUNCTION(float4)
+
+#undef KERNEL_FILM_CONVERT_FUNCTION
+
/* --------------------------------------------------------------------
* Shader evaluation.
*/
diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
index 6df5d7787fc..1ea5002e300 100644
--- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
+++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h
@@ -47,8 +47,8 @@
# include "kernel/integrator/megakernel.h"
# include "kernel/film/adaptive_sampling.h"
-# include "kernel/film/read.h"
# include "kernel/film/id_passes.h"
+# include "kernel/film/read.h"
# include "kernel/bake/bake.h"
@@ -232,6 +232,85 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU *
#endif
}
+/* --------------------------------------------------------------------
+ * Film Convert.
+ */
+
+#ifdef KERNEL_STUB
+
+# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \
+ void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
+ const float *buffer, \
+ float *pixel, \
+ const int width, \
+ const int buffer_stride, \
+ const int pixel_stride) \
+ { \
+ STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \
+ } \
+ void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
+ const KernelFilmConvert *kfilm_convert, \
+ const float *buffer, \
+ half4 *pixel, \
+ const int width, \
+ const int buffer_stride) \
+ { \
+ STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \
+ }
+
+#else
+
+# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \
+ void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \
+ const float *buffer, \
+ float *pixel, \
+ const int width, \
+ const int buffer_stride, \
+ const int pixel_stride) \
+ { \
+ for (int i = 0; i < width; i++, buffer += buffer_stride, pixel += pixel_stride) { \
+ film_get_pass_pixel_##name(kfilm_convert, buffer, pixel); \
+ } \
+ } \
+ void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \
+ const KernelFilmConvert *kfilm_convert, \
+ const float *buffer, \
+ half4 *pixel, \
+ const int width, \
+ const int buffer_stride) \
+ { \
+ for (int i = 0; i < width; i++, buffer += buffer_stride, pixel++) { \
+ float pixel_rgba[4] = {0.0f, 0.0f, 0.0f, 1.0f}; \
+ film_get_pass_pixel_##name(kfilm_convert, buffer, pixel_rgba); \
+ if (is_float) { \
+ pixel_rgba[1] = pixel_rgba[0]; \
+ pixel_rgba[2] = pixel_rgba[0]; \
+ } \
+ film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba); \
+ *pixel = float4_to_half4_display( \
+ make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3])); \
+ } \
+ }
+
+#endif
+
+KERNEL_FILM_CONVERT_FUNCTION(depth, true)
+KERNEL_FILM_CONVERT_FUNCTION(mist, true)
+KERNEL_FILM_CONVERT_FUNCTION(sample_count, true)
+KERNEL_FILM_CONVERT_FUNCTION(float, true)
+
+KERNEL_FILM_CONVERT_FUNCTION(light_path, false)
+KERNEL_FILM_CONVERT_FUNCTION(float3, false)
+
+KERNEL_FILM_CONVERT_FUNCTION(motion, false)
+KERNEL_FILM_CONVERT_FUNCTION(cryptomatte, false)
+KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher, false)
+KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow, false)
+KERNEL_FILM_CONVERT_FUNCTION(combined, false)
+KERNEL_FILM_CONVERT_FUNCTION(float4, false)
+
+#undef KERNEL_FILM_CONVERT_FUNCTION
+
#undef KERNEL_INVOKE
#undef DEFINE_INTEGRATOR_KERNEL
#undef DEFINE_INTEGRATOR_SHADE_KERNEL