diff options
author | Brecht Van Lommel <brecht@blender.org> | 2021-11-05 23:01:23 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-11-06 00:04:36 +0300 |
commit | 97ff37bf54474efbce39653a1387ad55091d4964 (patch) | |
tree | 58ff9592807dbd98d126b179627e5c56f5309956 /intern/cycles/kernel/device/cpu | |
parent | d1a9425a2fde32b6786b333ab55661da507e818b (diff) |
Cycles: perform CPU film reading in the kernel, to use AVX2 half conversion
Adds a bunch of CPU kernel function to process on row of pixels, and use those
instead of calling unoptimized implementations.
Fixes T92598
Diffstat (limited to 'intern/cycles/kernel/device/cpu')
-rw-r--r-- | intern/cycles/kernel/device/cpu/kernel.h | 1 | ||||
-rw-r--r-- | intern/cycles/kernel/device/cpu/kernel_arch.h | 31 | ||||
-rw-r--r-- | intern/cycles/kernel/device/cpu/kernel_arch_impl.h | 81 |
3 files changed, 112 insertions, 1 deletions
diff --git a/intern/cycles/kernel/device/cpu/kernel.h b/intern/cycles/kernel/device/cpu/kernel.h index c49d7ca445a..6af8094b1ea 100644 --- a/intern/cycles/kernel/device/cpu/kernel.h +++ b/intern/cycles/kernel/device/cpu/kernel.h @@ -18,6 +18,7 @@ /* CPU Kernel Interface */ +#include "util/half.h" #include "util/types.h" #include "kernel/types.h" diff --git a/intern/cycles/kernel/device/cpu/kernel_arch.h b/intern/cycles/kernel/device/cpu/kernel_arch.h index 432ac5e15a9..2f9a3f7c59d 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch.h @@ -52,6 +52,37 @@ KERNEL_INTEGRATOR_SHADE_FUNCTION(megakernel); #undef KERNEL_INTEGRATOR_INIT_FUNCTION #undef KERNEL_INTEGRATOR_SHADE_FUNCTION +#define KERNEL_FILM_CONVERT_FUNCTION(name) \ + void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \ + const float *buffer, \ + float *pixel, \ + const int width, \ + const int buffer_stride, \ + const int pixel_stride); \ + void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \ + const KernelFilmConvert *kfilm_convert, \ + const float *buffer, \ + half4 *pixel, \ + const int width, \ + const int buffer_stride); + +KERNEL_FILM_CONVERT_FUNCTION(depth) +KERNEL_FILM_CONVERT_FUNCTION(mist) +KERNEL_FILM_CONVERT_FUNCTION(sample_count) +KERNEL_FILM_CONVERT_FUNCTION(float) + +KERNEL_FILM_CONVERT_FUNCTION(light_path) +KERNEL_FILM_CONVERT_FUNCTION(float3) + +KERNEL_FILM_CONVERT_FUNCTION(motion) +KERNEL_FILM_CONVERT_FUNCTION(cryptomatte) +KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher) +KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow) +KERNEL_FILM_CONVERT_FUNCTION(combined) +KERNEL_FILM_CONVERT_FUNCTION(float4) + +#undef KERNEL_FILM_CONVERT_FUNCTION + /* -------------------------------------------------------------------- * Shader evaluation. */ diff --git a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h index 6df5d7787fc..1ea5002e300 100644 --- a/intern/cycles/kernel/device/cpu/kernel_arch_impl.h +++ b/intern/cycles/kernel/device/cpu/kernel_arch_impl.h @@ -47,8 +47,8 @@ # include "kernel/integrator/megakernel.h" # include "kernel/film/adaptive_sampling.h" -# include "kernel/film/read.h" # include "kernel/film/id_passes.h" +# include "kernel/film/read.h" # include "kernel/bake/bake.h" @@ -232,6 +232,85 @@ void KERNEL_FUNCTION_FULL_NAME(cryptomatte_postprocess)(const KernelGlobalsCPU * #endif } +/* -------------------------------------------------------------------- + * Film Convert. + */ + +#ifdef KERNEL_STUB + +# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \ + void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \ + const float *buffer, \ + float *pixel, \ + const int width, \ + const int buffer_stride, \ + const int pixel_stride) \ + { \ + STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \ + } \ + void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \ + const KernelFilmConvert *kfilm_convert, \ + const float *buffer, \ + half4 *pixel, \ + const int width, \ + const int buffer_stride) \ + { \ + STUB_ASSERT(KERNEL_ARCH, film_convert_##name); \ + } + +#else + +# define KERNEL_FILM_CONVERT_FUNCTION(name, is_float) \ + void KERNEL_FUNCTION_FULL_NAME(film_convert_##name)(const KernelFilmConvert *kfilm_convert, \ + const float *buffer, \ + float *pixel, \ + const int width, \ + const int buffer_stride, \ + const int pixel_stride) \ + { \ + for (int i = 0; i < width; i++, buffer += buffer_stride, pixel += pixel_stride) { \ + film_get_pass_pixel_##name(kfilm_convert, buffer, pixel); \ + } \ + } \ + void KERNEL_FUNCTION_FULL_NAME(film_convert_half_rgba_##name)( \ + const KernelFilmConvert *kfilm_convert, \ + const float *buffer, \ + half4 *pixel, \ + const int width, \ + const int buffer_stride) \ + { \ + for (int i = 0; i < width; i++, buffer += buffer_stride, pixel++) { \ + float pixel_rgba[4] = {0.0f, 0.0f, 0.0f, 1.0f}; \ + film_get_pass_pixel_##name(kfilm_convert, buffer, pixel_rgba); \ + if (is_float) { \ + pixel_rgba[1] = pixel_rgba[0]; \ + pixel_rgba[2] = pixel_rgba[0]; \ + } \ + film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel_rgba); \ + *pixel = float4_to_half4_display( \ + make_float4(pixel_rgba[0], pixel_rgba[1], pixel_rgba[2], pixel_rgba[3])); \ + } \ + } + +#endif + +KERNEL_FILM_CONVERT_FUNCTION(depth, true) +KERNEL_FILM_CONVERT_FUNCTION(mist, true) +KERNEL_FILM_CONVERT_FUNCTION(sample_count, true) +KERNEL_FILM_CONVERT_FUNCTION(float, true) + +KERNEL_FILM_CONVERT_FUNCTION(light_path, false) +KERNEL_FILM_CONVERT_FUNCTION(float3, false) + +KERNEL_FILM_CONVERT_FUNCTION(motion, false) +KERNEL_FILM_CONVERT_FUNCTION(cryptomatte, false) +KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher, false) +KERNEL_FILM_CONVERT_FUNCTION(shadow_catcher_matte_with_shadow, false) +KERNEL_FILM_CONVERT_FUNCTION(combined, false) +KERNEL_FILM_CONVERT_FUNCTION(float4, false) + +#undef KERNEL_FILM_CONVERT_FUNCTION + #undef KERNEL_INVOKE #undef DEFINE_INTEGRATOR_KERNEL #undef DEFINE_INTEGRATOR_SHADE_KERNEL |