Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBrecht Van Lommel <brecht@blender.org>2021-11-16 16:03:59 +0300
committerBrecht Van Lommel <brecht@blender.org>2021-11-17 19:29:41 +0300
commit9937d5379ca936b4ba93534185477fa7e529181c (patch)
tree69fc56fad9dbb8c40f8e08f14355b75584a9220b /intern/cycles/util
parent89d5714d8f233b4bbb83f6a7b33237e2ec04ee79 (diff)
Cycles: add packed_float3 type for storage
Introduce a packed_float3 type for smaller storage that is exactly 3 floats, instead of 4. For computation float3 is still used since it can use SIMD instructions. Ref T92212 Differential Revision: https://developer.blender.org/D13243
Diffstat (limited to 'intern/cycles/util')
-rw-r--r--intern/cycles/util/defines.h2
-rw-r--r--intern/cycles/util/math_float3.h26
-rw-r--r--intern/cycles/util/types_float3.h35
3 files changed, 63 insertions, 0 deletions
diff --git a/intern/cycles/util/defines.h b/intern/cycles/util/defines.h
index a778bef52b2..edc36b14745 100644
--- a/intern/cycles/util/defines.h
+++ b/intern/cycles/util/defines.h
@@ -44,6 +44,7 @@
# if defined(_WIN32) && !defined(FREE_WINDOWS)
# define ccl_device_inline static __forceinline
# define ccl_device_forceinline static __forceinline
+# define ccl_device_inline_method __forceinline
# define ccl_align(...) __declspec(align(__VA_ARGS__))
# ifdef __KERNEL_64_BIT__
# define ccl_try_align(...) __declspec(align(__VA_ARGS__))
@@ -58,6 +59,7 @@
# else /* _WIN32 && !FREE_WINDOWS */
# define ccl_device_inline static inline __attribute__((always_inline))
# define ccl_device_forceinline static inline __attribute__((always_inline))
+# define ccl_device_inline_method __attribute__((always_inline))
# define ccl_align(...) __attribute__((aligned(__VA_ARGS__)))
# ifndef FREE_WINDOWS64
# define __forceinline inline __attribute__((always_inline))
diff --git a/intern/cycles/util/math_float3.h b/intern/cycles/util/math_float3.h
index 81550c5d03c..031aac1b5d4 100644
--- a/intern/cycles/util/math_float3.h
+++ b/intern/cycles/util/math_float3.h
@@ -222,6 +222,32 @@ ccl_device_inline float3 operator/=(float3 &a, float f)
return a = a * invf;
}
+#if !(defined(__KERNEL_METAL__) || defined(__KERNEL_CUDA__))
+ccl_device_inline packed_float3 operator*=(packed_float3 &a, const float3 &b)
+{
+ a = float3(a) * b;
+ return a;
+}
+
+ccl_device_inline packed_float3 operator*=(packed_float3 &a, float f)
+{
+ a = float3(a) * f;
+ return a;
+}
+
+ccl_device_inline packed_float3 operator/=(packed_float3 &a, const float3 &b)
+{
+ a = float3(a) / b;
+ return a;
+}
+
+ccl_device_inline packed_float3 operator/=(packed_float3 &a, float f)
+{
+ a = float3(a) / f;
+ return a;
+}
+#endif
+
ccl_device_inline bool operator==(const float3 &a, const float3 &b)
{
#ifdef __KERNEL_SSE__
diff --git a/intern/cycles/util/types_float3.h b/intern/cycles/util/types_float3.h
index f990367e7b8..fc0f35fa87f 100644
--- a/intern/cycles/util/types_float3.h
+++ b/intern/cycles/util/types_float3.h
@@ -55,6 +55,41 @@ ccl_device_inline float3 make_float3(float x, float y, float z);
ccl_device_inline void print_float3(const char *label, const float3 &a);
#endif /* __KERNEL_GPU__ */
+/* Smaller float3 for storage. For math operations this must be converted to float3, so that on the
+ * CPU SIMD instructions can be used. */
+#if defined(__KERNEL_METAL__)
+/* Metal has native packed_float3. */
+#elif defined(__KERNEL_CUDA__)
+/* CUDA float3 is already packed. */
+typedef float3 packed_float3;
+#else
+/* HIP float3 is not packed (https://github.com/ROCm-Developer-Tools/HIP/issues/706). */
+struct packed_float3 {
+ ccl_device_inline_method packed_float3(){};
+
+ ccl_device_inline_method packed_float3(const float3 &a) : x(a.x), y(a.y), z(a.z)
+ {
+ }
+
+ ccl_device_inline_method operator float3() const
+ {
+ return make_float3(x, y, z);
+ }
+
+ ccl_device_inline_method packed_float3 &operator=(const float3 &a)
+ {
+ x = a.x;
+ y = a.y;
+ z = a.z;
+ return *this;
+ }
+
+ float x, y, z;
+};
+#endif
+
+static_assert(sizeof(packed_float3) == 12);
+
CCL_NAMESPACE_END
#endif /* __UTIL_TYPES_FLOAT3_H__ */