diff options
author | Brecht Van Lommel <brecht@blender.org> | 2021-11-10 21:43:19 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-11-10 22:03:07 +0300 |
commit | 6b0008129e6370866808bd937161579a2cb5cb51 (patch) | |
tree | bfe3f8cde97629f6d84e58721e1743b649e20dcc | |
parent | c8e93da0a754a4f936a0849d5eba753695b7a153 (diff) |
Fix T92972: Cycles HIP wrong render display after a recent refactor
It's unclear why this fails. Maybe the size of half4 is not the expected
8 bytes and adjacent pixels are overwritten. Or there is some bug in the
HIP compiler writing a struct into global memory, which we probably don't
do elsewhere in the kernel.
Thanks to Thomas, William and Jeroen for helping investigate this.
-rw-r--r-- | intern/cycles/kernel/device/gpu/kernel.h | 25 |
1 files changed, 23 insertions, 2 deletions
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index 5848ba5df9d..844bbf90f67 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -486,6 +486,26 @@ ccl_device_inline void kernel_gpu_film_convert_common(const KernelFilmConvert *k processor(kfilm_convert, buffer, pixel); } +ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgba, + const int rgba_offset, + const int rgba_stride, + const int x, + const int y, + const half4 half_pixel) +{ + /* Work around HIP issue with half float display, see T92972. */ +#ifdef __KERNEL_HIP__ + ccl_global half *out = ((ccl_global half *)rgba) + (rgba_offset + y * rgba_stride + x) * 4; + out[0] = half_pixel.x; + out[1] = half_pixel.y; + out[2] = half_pixel.z; + out[3] = half_pixel.w; +#else + ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x; + *out = half_pixel; +#endif +} + /* Common implementation for half4 destination and 4-channel input pass. */ template<typename Processor> ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba( @@ -516,8 +536,9 @@ ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba( film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel); - ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x; - *out = float4_to_half4_display(make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); + const half4 half_pixel = float4_to_half4_display( + make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); + kernel_gpu_film_convert_half_write(rgba, rgba_offset, rgba_stride, x, y, half_pixel); } /* Common implementation for half4 destination and 3-channel input pass. */ |