Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2021-11-10 21:43:19 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-11-10 22:03:07 +0300
commit6b0008129e6370866808bd937161579a2cb5cb51 (patch)
treebfe3f8cde97629f6d84e58721e1743b649e20dcc /intern/cycles/kernel/device/gpu
parentc8e93da0a754a4f936a0849d5eba753695b7a153 (diff)
Fix T92972: Cycles HIP wrong render display after a recent refactor
It's unclear why this fails. Maybe the size of half4 is not the expected 8 bytes and adjacent pixels are overwritten. Or there is some bug in the HIP compiler writing a struct into global memory, which we probably don't do elsewhere in the kernel. Thanks to Thomas, William and Jeroen for helping investigate this.
Diffstat (limited to 'intern/cycles/kernel/device/gpu')
-rw-r--r--intern/cycles/kernel/device/gpu/kernel.h25
1 files changed, 23 insertions, 2 deletions
diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h
index 5848ba5df9d..844bbf90f67 100644
--- a/intern/cycles/kernel/device/gpu/kernel.h
+++ b/intern/cycles/kernel/device/gpu/kernel.h
@@ -486,6 +486,26 @@ ccl_device_inline void kernel_gpu_film_convert_common(const KernelFilmConvert *k
processor(kfilm_convert, buffer, pixel);
}
+ccl_device_inline void kernel_gpu_film_convert_half_write(ccl_global uchar4 *rgba,
+ const int rgba_offset,
+ const int rgba_stride,
+ const int x,
+ const int y,
+ const half4 half_pixel)
+{
+ /* Work around HIP issue with half float display, see T92972. */
+#ifdef __KERNEL_HIP__
+ ccl_global half *out = ((ccl_global half *)rgba) + (rgba_offset + y * rgba_stride + x) * 4;
+ out[0] = half_pixel.x;
+ out[1] = half_pixel.y;
+ out[2] = half_pixel.z;
+ out[3] = half_pixel.w;
+#else
+ ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x;
+ *out = half_pixel;
+#endif
+}
+
/* Common implementation for half4 destination and 4-channel input pass. */
template<typename Processor>
ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba(
@@ -516,8 +536,9 @@ ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba(
film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel);
- ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x;
- *out = float4_to_half4_display(make_float4(pixel[0], pixel[1], pixel[2], pixel[3]));
+ const half4 half_pixel = float4_to_half4_display(
+ make_float4(pixel[0], pixel[1], pixel[2], pixel[3]));
+ kernel_gpu_film_convert_half_write(rgba, rgba_offset, rgba_stride, x, y, half_pixel);
}
/* Common implementation for half4 destination and 3-channel input pass. */