From 282516e53eba9bb3aaddd67b2b099fea98bd4c1f Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Thu, 21 Oct 2021 19:25:38 +0200 Subject: Cleanup: refactor float/half conversions for clarity --- intern/cycles/kernel/device/cpu/image.h | 4 ++-- intern/cycles/kernel/device/cuda/compat.h | 7 +++++++ intern/cycles/kernel/device/gpu/kernel.h | 2 +- intern/cycles/kernel/device/optix/compat.h | 7 +++++++ 4 files changed, 17 insertions(+), 3 deletions(-) (limited to 'intern/cycles/kernel/device') diff --git a/intern/cycles/kernel/device/cpu/image.h b/intern/cycles/kernel/device/cpu/image.h index 44c5d7ef065..93f956e354d 100644 --- a/intern/cycles/kernel/device/cpu/image.h +++ b/intern/cycles/kernel/device/cpu/image.h @@ -72,12 +72,12 @@ template struct TextureInterpolator { static ccl_always_inline float4 read(half4 r) { - return half4_to_float4(r); + return half4_to_float4_image(r); } static ccl_always_inline float4 read(half r) { - float f = half_to_float(r); + float f = half_to_float_image(r); return make_float4(f, f, f, 1.0f); } diff --git a/intern/cycles/kernel/device/cuda/compat.h b/intern/cycles/kernel/device/cuda/compat.h index 685c7a5b753..8a50eb1a3d5 100644 --- a/intern/cycles/kernel/device/cuda/compat.h +++ b/intern/cycles/kernel/device/cuda/compat.h @@ -128,6 +128,13 @@ __device__ half __float2half(const float f) return val; } +__device__ float __half2float(const half h) +{ + float val; + asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h)); + return val; +} + /* Types */ #include "util/util_half.h" diff --git a/intern/cycles/kernel/device/gpu/kernel.h b/intern/cycles/kernel/device/gpu/kernel.h index eeac09d4b29..335cb1ec0c0 100644 --- a/intern/cycles/kernel/device/gpu/kernel.h +++ b/intern/cycles/kernel/device/gpu/kernel.h @@ -516,7 +516,7 @@ ccl_device_inline void kernel_gpu_film_convert_half_rgba_common_rgba( film_apply_pass_pixel_overlays_rgba(kfilm_convert, buffer, pixel); ccl_global half4 *out = ((ccl_global half4 *)rgba) + rgba_offset + y * rgba_stride + x; - float4_store_half((ccl_global half *)out, make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); + *out = float4_to_half4_display(make_float4(pixel[0], pixel[1], pixel[2], pixel[3])); } /* Common implementation for half4 destination and 3-channel input pass. */ diff --git a/intern/cycles/kernel/device/optix/compat.h b/intern/cycles/kernel/device/optix/compat.h index c9ec9be05df..d27b7d55475 100644 --- a/intern/cycles/kernel/device/optix/compat.h +++ b/intern/cycles/kernel/device/optix/compat.h @@ -120,6 +120,13 @@ __device__ half __float2half(const float f) return val; } +__device__ float __half2float(const half h) +{ + float val; + asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(h)); + return val; +} + /* Types */ #include "util/util_half.h" -- cgit v1.2.3