diff options
author | Michael Jones <michael_p_jones@apple.com> | 2021-11-24 23:34:27 +0300 |
---|---|---|
committer | Michael Jones <michael_p_jones@apple.com> | 2021-11-26 16:58:48 +0300 |
commit | eb7827e7970cca8e3fb0e0bf39e8742e69f0b2b6 (patch) | |
tree | d64e14d386428dc8336b4e4cdaa50d5704261a41 /intern/cycles/kernel/device/gpu/kernel.h | |
parent | 12a83db83c5e23f5d0dccd420fa875a736a7b10f (diff) |
Cycles: Fix film convert address space mismatch on Metal
This patch fixes an address space mismatch in the film convert kernels on Metal. The `film_get_pass_pixel_...` functions take a `ccl_private` result pointer, but the film convert kernels pass a `ccl_global` memory pointer. Specialising the pass-fetch functions with templates results in compilation errors on Visual Studio, so instead this patch just adds an intermediate local on Metal.
Reviewed By: brecht
Differential Revision: https://developer.blender.org/D13350
Diffstat (limited to 'intern/cycles/kernel/device/gpu/kernel.h')
-rw-r--r-- | intern/cycles/kernel/device/gpu/kernel.h | 29 |
1 files changed, 28 insertions, 1 deletions
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index 22e2a61a06d..24702de496c 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -547,6 +547,33 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb #endif } +#ifdef __KERNEL_METAL__ + +/* Fetch into a local variable on Metal - there is minimal overhead. Templating the + * film_get_pass_pixel_... functions works on MSL, but not on other compilers. */ +# define FILM_GET_PASS_PIXEL_F32(variant, input_channel_count) \ + float local_pixel[4]; \ + film_get_pass_pixel_##variant(&kfilm_convert, buffer, local_pixel); \ + if (input_channel_count >= 1) { \ + pixel[0] = local_pixel[0]; \ + } \ + if (input_channel_count >= 2) { \ + pixel[1] = local_pixel[1]; \ + } \ + if (input_channel_count >= 3) { \ + pixel[2] = local_pixel[2]; \ + } \ + if (input_channel_count >= 4) { \ + pixel[3] = local_pixel[3]; \ + } + +#else + +# define FILM_GET_PASS_PIXEL_F32(variant, input_channel_count) \ + film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); + +#endif + #define KERNEL_FILM_CONVERT_VARIANT(variant, input_channel_count) \ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \ ccl_gpu_kernel_signature(film_convert_##variant, \ @@ -574,7 +601,7 @@ ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgb ccl_global float *pixel = pixels + \ (render_pixel_index + rgba_offset) * kfilm_convert.pixel_stride; \ \ - film_get_pass_pixel_##variant(&kfilm_convert, buffer, pixel); \ + FILM_GET_PASS_PIXEL_F32(variant, input_channel_count); \ } \ \ ccl_gpu_kernel(GPU_KERNEL_BLOCK_NUM_THREADS, GPU_KERNEL_MAX_REGISTERS) \ |