diff options
author | Brecht Van Lommel <brecht@blender.org> | 2021-11-16 16:03:59 +0300 |
---|---|---|
committer | Brecht Van Lommel <brecht@blender.org> | 2021-11-17 19:29:41 +0300 |
commit | 9937d5379ca936b4ba93534185477fa7e529181c (patch) | |
tree | 69fc56fad9dbb8c40f8e08f14355b75584a9220b /intern/cycles/util | |
parent | 89d5714d8f233b4bbb83f6a7b33237e2ec04ee79 (diff) |
Cycles: add packed_float3 type for storage
Introduce a packed_float3 type for smaller storage that is exactly 3
floats, instead of 4. For computation float3 is still used since it can
use SIMD instructions.
Ref T92212
Differential Revision: https://developer.blender.org/D13243
Diffstat (limited to 'intern/cycles/util')
-rw-r--r-- | intern/cycles/util/defines.h | 2 | ||||
-rw-r--r-- | intern/cycles/util/math_float3.h | 26 | ||||
-rw-r--r-- | intern/cycles/util/types_float3.h | 35 |
3 files changed, 63 insertions, 0 deletions
diff --git a/intern/cycles/util/defines.h b/intern/cycles/util/defines.h index a778bef52b2..edc36b14745 100644 --- a/intern/cycles/util/defines.h +++ b/intern/cycles/util/defines.h @@ -44,6 +44,7 @@ # if defined(_WIN32) && !defined(FREE_WINDOWS) # define ccl_device_inline static __forceinline # define ccl_device_forceinline static __forceinline +# define ccl_device_inline_method __forceinline # define ccl_align(...) __declspec(align(__VA_ARGS__)) # ifdef __KERNEL_64_BIT__ # define ccl_try_align(...) __declspec(align(__VA_ARGS__)) @@ -58,6 +59,7 @@ # else /* _WIN32 && !FREE_WINDOWS */ # define ccl_device_inline static inline __attribute__((always_inline)) # define ccl_device_forceinline static inline __attribute__((always_inline)) +# define ccl_device_inline_method __attribute__((always_inline)) # define ccl_align(...) __attribute__((aligned(__VA_ARGS__))) # ifndef FREE_WINDOWS64 # define __forceinline inline __attribute__((always_inline)) diff --git a/intern/cycles/util/math_float3.h b/intern/cycles/util/math_float3.h index 81550c5d03c..031aac1b5d4 100644 --- a/intern/cycles/util/math_float3.h +++ b/intern/cycles/util/math_float3.h @@ -222,6 +222,32 @@ ccl_device_inline float3 operator/=(float3 &a, float f) return a = a * invf; } +#if !(defined(__KERNEL_METAL__) || defined(__KERNEL_CUDA__)) +ccl_device_inline packed_float3 operator*=(packed_float3 &a, const float3 &b) +{ + a = float3(a) * b; + return a; +} + +ccl_device_inline packed_float3 operator*=(packed_float3 &a, float f) +{ + a = float3(a) * f; + return a; +} + +ccl_device_inline packed_float3 operator/=(packed_float3 &a, const float3 &b) +{ + a = float3(a) / b; + return a; +} + +ccl_device_inline packed_float3 operator/=(packed_float3 &a, float f) +{ + a = float3(a) / f; + return a; +} +#endif + ccl_device_inline bool operator==(const float3 &a, const float3 &b) { #ifdef __KERNEL_SSE__ diff --git a/intern/cycles/util/types_float3.h b/intern/cycles/util/types_float3.h index f990367e7b8..fc0f35fa87f 100644 --- a/intern/cycles/util/types_float3.h +++ b/intern/cycles/util/types_float3.h @@ -55,6 +55,41 @@ ccl_device_inline float3 make_float3(float x, float y, float z); ccl_device_inline void print_float3(const char *label, const float3 &a); #endif /* __KERNEL_GPU__ */ +/* Smaller float3 for storage. For math operations this must be converted to float3, so that on the + * CPU SIMD instructions can be used. */ +#if defined(__KERNEL_METAL__) +/* Metal has native packed_float3. */ +#elif defined(__KERNEL_CUDA__) +/* CUDA float3 is already packed. */ +typedef float3 packed_float3; +#else +/* HIP float3 is not packed (https://github.com/ROCm-Developer-Tools/HIP/issues/706). */ +struct packed_float3 { + ccl_device_inline_method packed_float3(){}; + + ccl_device_inline_method packed_float3(const float3 &a) : x(a.x), y(a.y), z(a.z) + { + } + + ccl_device_inline_method operator float3() const + { + return make_float3(x, y, z); + } + + ccl_device_inline_method packed_float3 &operator=(const float3 &a) + { + x = a.x; + y = a.y; + z = a.z; + return *this; + } + + float x, y, z; +}; +#endif + +static_assert(sizeof(packed_float3) == 12); + CCL_NAMESPACE_END #endif /* __UTIL_TYPES_FLOAT3_H__ */ |