From 9937d5379ca936b4ba93534185477fa7e529181c Mon Sep 17 00:00:00 2001 From: Brecht Van Lommel Date: Tue, 16 Nov 2021 14:03:59 +0100 Subject: Cycles: add packed_float3 type for storage Introduce a packed_float3 type for smaller storage that is exactly 3 floats, instead of 4. For computation float3 is still used since it can use SIMD instructions. Ref T92212 Differential Revision: https://developer.blender.org/D13243 --- intern/cycles/util/defines.h | 2 ++ intern/cycles/util/math_float3.h | 26 ++++++++++++++++++++++++++ intern/cycles/util/types_float3.h | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) (limited to 'intern/cycles/util') diff --git a/intern/cycles/util/defines.h b/intern/cycles/util/defines.h index a778bef52b2..edc36b14745 100644 --- a/intern/cycles/util/defines.h +++ b/intern/cycles/util/defines.h @@ -44,6 +44,7 @@ # if defined(_WIN32) && !defined(FREE_WINDOWS) # define ccl_device_inline static __forceinline # define ccl_device_forceinline static __forceinline +# define ccl_device_inline_method __forceinline # define ccl_align(...) __declspec(align(__VA_ARGS__)) # ifdef __KERNEL_64_BIT__ # define ccl_try_align(...) __declspec(align(__VA_ARGS__)) @@ -58,6 +59,7 @@ # else /* _WIN32 && !FREE_WINDOWS */ # define ccl_device_inline static inline __attribute__((always_inline)) # define ccl_device_forceinline static inline __attribute__((always_inline)) +# define ccl_device_inline_method __attribute__((always_inline)) # define ccl_align(...) __attribute__((aligned(__VA_ARGS__))) # ifndef FREE_WINDOWS64 # define __forceinline inline __attribute__((always_inline)) diff --git a/intern/cycles/util/math_float3.h b/intern/cycles/util/math_float3.h index 81550c5d03c..031aac1b5d4 100644 --- a/intern/cycles/util/math_float3.h +++ b/intern/cycles/util/math_float3.h @@ -222,6 +222,32 @@ ccl_device_inline float3 operator/=(float3 &a, float f) return a = a * invf; } +#if !(defined(__KERNEL_METAL__) || defined(__KERNEL_CUDA__)) +ccl_device_inline packed_float3 operator*=(packed_float3 &a, const float3 &b) +{ + a = float3(a) * b; + return a; +} + +ccl_device_inline packed_float3 operator*=(packed_float3 &a, float f) +{ + a = float3(a) * f; + return a; +} + +ccl_device_inline packed_float3 operator/=(packed_float3 &a, const float3 &b) +{ + a = float3(a) / b; + return a; +} + +ccl_device_inline packed_float3 operator/=(packed_float3 &a, float f) +{ + a = float3(a) / f; + return a; +} +#endif + ccl_device_inline bool operator==(const float3 &a, const float3 &b) { #ifdef __KERNEL_SSE__ diff --git a/intern/cycles/util/types_float3.h b/intern/cycles/util/types_float3.h index f990367e7b8..fc0f35fa87f 100644 --- a/intern/cycles/util/types_float3.h +++ b/intern/cycles/util/types_float3.h @@ -55,6 +55,41 @@ ccl_device_inline float3 make_float3(float x, float y, float z); ccl_device_inline void print_float3(const char *label, const float3 &a); #endif /* __KERNEL_GPU__ */ +/* Smaller float3 for storage. For math operations this must be converted to float3, so that on the + * CPU SIMD instructions can be used. */ +#if defined(__KERNEL_METAL__) +/* Metal has native packed_float3. */ +#elif defined(__KERNEL_CUDA__) +/* CUDA float3 is already packed. */ +typedef float3 packed_float3; +#else +/* HIP float3 is not packed (https://github.com/ROCm-Developer-Tools/HIP/issues/706). */ +struct packed_float3 { + ccl_device_inline_method packed_float3(){}; + + ccl_device_inline_method packed_float3(const float3 &a) : x(a.x), y(a.y), z(a.z) + { + } + + ccl_device_inline_method operator float3() const + { + return make_float3(x, y, z); + } + + ccl_device_inline_method packed_float3 &operator=(const float3 &a) + { + x = a.x; + y = a.y; + z = a.z; + return *this; + } + + float x, y, z; +}; +#endif + +static_assert(sizeof(packed_float3) == 12); + CCL_NAMESPACE_END #endif /* __UTIL_TYPES_FLOAT3_H__ */ -- cgit v1.2.3