Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrii Symkin <pembem22>2022-07-25 17:55:48 +0300
committerBrecht Van Lommel <brecht@blender.org>2022-07-25 18:36:58 +0300
commit793d2031395246fb2421d130742e0fe61ab2b29c (patch)
tree1c6a1ecea8b9bb0fe1b0149029ea4a0befd8a6e3
parent7a74d91e323c4d695b908ca4178837cee756eeaf (diff)
Cycles: add math functions for float8
This patch adds required math functions for float8 to make it possible using float8 instead of float3 for color data. Differential Revision: https://developer.blender.org/D15525
-rw-r--r--intern/cycles/kernel/CMakeLists.txt1
-rw-r--r--intern/cycles/util/CMakeLists.txt1
-rw-r--r--intern/cycles/util/math.h1
-rw-r--r--intern/cycles/util/math_float8.h419
-rw-r--r--intern/cycles/util/types_float8.h15
-rw-r--r--intern/cycles/util/types_float8_impl.h23
6 files changed, 442 insertions, 18 deletions
diff --git a/intern/cycles/kernel/CMakeLists.txt b/intern/cycles/kernel/CMakeLists.txt
index 94632dff200..8ecdac6ee27 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -317,6 +317,7 @@ set(SRC_UTIL_HEADERS
../util/math_float2.h
../util/math_float3.h
../util/math_float4.h
+ ../util/math_float8.h
../util/math_int2.h
../util/math_int3.h
../util/math_int4.h
diff --git a/intern/cycles/util/CMakeLists.txt b/intern/cycles/util/CMakeLists.txt
index fddac1dbbcf..9bc9f00e142 100644
--- a/intern/cycles/util/CMakeLists.txt
+++ b/intern/cycles/util/CMakeLists.txt
@@ -63,6 +63,7 @@ set(SRC_HEADERS
math_float2.h
math_float3.h
math_float4.h
+ math_float8.h
math_int2.h
math_int3.h
math_int4.h
diff --git a/intern/cycles/util/math.h b/intern/cycles/util/math.h
index 8360ce05a56..2631304c84b 100644
--- a/intern/cycles/util/math.h
+++ b/intern/cycles/util/math.h
@@ -540,6 +540,7 @@ CCL_NAMESPACE_END
#include "util/math_float2.h"
#include "util/math_float3.h"
#include "util/math_float4.h"
+#include "util/math_float8.h"
#include "util/rect.h"
diff --git a/intern/cycles/util/math_float8.h b/intern/cycles/util/math_float8.h
new file mode 100644
index 00000000000..8ed8d56a034
--- /dev/null
+++ b/intern/cycles/util/math_float8.h
@@ -0,0 +1,419 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright 2022 Blender Foundation */
+
+#ifndef __UTIL_MATH_FLOAT8_H__
+#define __UTIL_MATH_FLOAT8_H__
+
+#ifndef __UTIL_MATH_H__
+# error "Do not include this file directly, include util/types.h instead."
+#endif
+
+CCL_NAMESPACE_BEGIN
+
+/*******************************************************************************
+ * Declaration.
+ */
+
+ccl_device_inline float8 operator+(const float8 &a, const float8 &b);
+ccl_device_inline float8 operator+(const float8 &a, const float f);
+ccl_device_inline float8 operator+(const float f, const float8 &a);
+
+ccl_device_inline float8 operator-(const float8 &a);
+ccl_device_inline float8 operator-(const float8 &a, const float8 &b);
+ccl_device_inline float8 operator-(const float8 &a, const float f);
+ccl_device_inline float8 operator-(const float f, const float8 &a);
+
+ccl_device_inline float8 operator*(const float8 &a, const float8 &b);
+ccl_device_inline float8 operator*(const float8 &a, const float f);
+ccl_device_inline float8 operator*(const float f, const float8 &a);
+
+ccl_device_inline float8 operator/(const float8 &a, const float8 &b);
+ccl_device_inline float8 operator/(const float8 &a, float f);
+ccl_device_inline float8 operator/(const float f, const float8 &a);
+
+ccl_device_inline float8 operator+=(float8 &a, const float8 &b);
+
+ccl_device_inline float8 operator*=(float8 &a, const float8 &b);
+ccl_device_inline float8 operator*=(float8 &a, float f);
+
+ccl_device_inline float8 operator/=(float8 &a, float f);
+
+ccl_device_inline bool operator==(const float8 &a, const float8 &b);
+
+ccl_device_inline float8 rcp(const float8 &a);
+ccl_device_inline float8 sqrt(const float8 &a);
+ccl_device_inline float8 sqr(const float8 &a);
+ccl_device_inline bool is_zero(const float8 &a);
+ccl_device_inline float average(const float8 &a);
+ccl_device_inline float8 min(const float8 &a, const float8 &b);
+ccl_device_inline float8 max(const float8 &a, const float8 &b);
+ccl_device_inline float8 clamp(const float8 &a, const float8 &mn, const float8 &mx);
+ccl_device_inline float8 fabs(const float8 &a);
+ccl_device_inline float8 mix(const float8 &a, const float8 &b, float t);
+
+ccl_device_inline float8 safe_divide(const float8 a, const float b);
+ccl_device_inline float8 safe_divide(const float8 a, const float8 b);
+
+ccl_device_inline float reduce_min(const float8 &a);
+ccl_device_inline float reduce_max(const float8 &a);
+ccl_device_inline float reduce_add(const float8 &a);
+
+ccl_device_inline float8 saturate(const float8 &a);
+ccl_device_inline bool isequal(const float8 a, const float8 b);
+
+/*******************************************************************************
+ * Definition.
+ */
+
+ccl_device_inline float8 zero_float8()
+{
+#ifdef __KERNEL_AVX2__
+ return float8(_mm256_setzero_ps());
+#else
+ return make_float8(0.0f);
+#endif
+}
+
+ccl_device_inline float8 one_float8()
+{
+ return make_float8(1.0f);
+}
+
+ccl_device_inline float8 operator+(const float8 &a, const float8 &b)
+{
+#ifdef __KERNEL_AVX2__
+ return float8(_mm256_add_ps(a.m256, b.m256));
+#else
+ return make_float8(
+ a.a + b.a, a.b + b.b, a.c + b.c, a.d + b.d, a.e + b.e, a.f + b.f, a.g + b.g, a.h + b.h);
+#endif
+}
+
+ccl_device_inline float8 operator+(const float8 &a, const float f)
+{
+ return a + make_float8(f);
+}
+
+ccl_device_inline float8 operator+(const float f, const float8 &a)
+{
+ return make_float8(f) + a;
+}
+
+ccl_device_inline float8 operator-(const float8 &a)
+{
+#ifdef __KERNEL_AVX2__
+ __m256 mask = _mm256_castsi256_ps(_mm256_set1_epi32(0x80000000));
+ return float8(_mm256_xor_ps(a.m256, mask));
+#else
+ return make_float8(-a.a, -a.b, -a.c, -a.d, -a.e, -a.f, -a.g, -a.h);
+#endif
+}
+
+ccl_device_inline float8 operator-(const float8 &a, const float8 &b)
+{
+#ifdef __KERNEL_AVX2__
+ return float8(_mm256_sub_ps(a.m256, b.m256));
+#else
+ return make_float8(
+ a.a - b.a, a.b - b.b, a.c - b.c, a.d - b.d, a.e - b.e, a.f - b.f, a.g - b.g, a.h - b.h);
+#endif
+}
+
+ccl_device_inline float8 operator-(const float8 &a, const float f)
+{
+ return a - make_float8(f);
+}
+
+ccl_device_inline float8 operator-(const float f, const float8 &a)
+{
+ return make_float8(f) - a;
+}
+
+ccl_device_inline float8 operator*(const float8 &a, const float8 &b)
+{
+#ifdef __KERNEL_AVX2__
+ return float8(_mm256_mul_ps(a.m256, b.m256));
+#else
+ return make_float8(
+ a.a * b.a, a.b * b.b, a.c * b.c, a.d * b.d, a.e * b.e, a.f * b.f, a.g * b.g, a.h * b.h);
+#endif
+}
+
+ccl_device_inline float8 operator*(const float8 &a, const float f)
+{
+ return a * make_float8(f);
+}
+
+ccl_device_inline float8 operator*(const float f, const float8 &a)
+{
+ return make_float8(f) * a;
+}
+
+ccl_device_inline float8 operator/(const float8 &a, const float8 &b)
+{
+#ifdef __KERNEL_AVX2__
+ return float8(_mm256_div_ps(a.m256, b.m256));
+#else
+ return make_float8(
+ a.a / b.a, a.b / b.b, a.c / b.c, a.d / b.d, a.e / b.e, a.f / b.f, a.g / b.g, a.h / b.h);
+#endif
+}
+
+ccl_device_inline float8 operator/(const float8 &a, const float f)
+{
+ return a / make_float8(f);
+}
+
+ccl_device_inline float8 operator/(const float f, const float8 &a)
+{
+ return make_float8(f) / a;
+}
+
+ccl_device_inline float8 operator+=(float8 &a, const float8 &b)
+{
+ return a = a + b;
+}
+
+ccl_device_inline float8 operator-=(float8 &a, const float8 &b)
+{
+ return a = a - b;
+}
+
+ccl_device_inline float8 operator*=(float8 &a, const float8 &b)
+{
+ return a = a * b;
+}
+
+ccl_device_inline float8 operator*=(float8 &a, float f)
+{
+ return a = a * f;
+}
+
+ccl_device_inline float8 operator/=(float8 &a, float f)
+{
+ return a = a / f;
+}
+
+ccl_device_inline bool operator==(const float8 &a, const float8 &b)
+{
+#ifdef __KERNEL_AVX2__
+ return (_mm256_movemask_ps(_mm256_castsi256_ps(
+ _mm256_cmpeq_epi32(_mm256_castps_si256(a.m256), _mm256_castps_si256(b.m256)))) &
+ 0b11111111) == 0b11111111;
+#else
+ return (a.a == b.a && a.b == b.b && a.c == b.c && a.d == b.d && a.e == b.e && a.f == b.f &&
+ a.g == b.g && a.h == b.h);
+#endif
+}
+
+ccl_device_inline float8 rcp(const float8 &a)
+{
+#ifdef __KERNEL_AVX2__
+ return float8(_mm256_rcp_ps(a.m256));
+#else
+ return make_float8(1.0f / a.a,
+ 1.0f / a.b,
+ 1.0f / a.c,
+ 1.0f / a.d,
+ 1.0f / a.e,
+ 1.0f / a.f,
+ 1.0f / a.g,
+ 1.0f / a.h);
+#endif
+}
+
+ccl_device_inline float8 sqrt(const float8 &a)
+{
+#ifdef __KERNEL_AVX2__
+ return float8(_mm256_sqrt_ps(a.m256));
+#else
+ return make_float8(sqrtf(a.a),
+ sqrtf(a.b),
+ sqrtf(a.c),
+ sqrtf(a.d),
+ sqrtf(a.e),
+ sqrtf(a.f),
+ sqrtf(a.g),
+ sqrtf(a.h));
+#endif
+}
+
+ccl_device_inline float8 sqr(const float8 &a)
+{
+ return a * a;
+}
+
+ccl_device_inline bool is_zero(const float8 &a)
+{
+ return a == make_float8(0.0f);
+}
+
+ccl_device_inline float average(const float8 &a)
+{
+ return reduce_add(a) / 8.0f;
+}
+
+ccl_device_inline float8 min(const float8 &a, const float8 &b)
+{
+#ifdef __KERNEL_AVX2__
+ return float8(_mm256_min_ps(a.m256, b.m256));
+#else
+ return make_float8(min(a.a, b.a),
+ min(a.b, b.b),
+ min(a.c, b.c),
+ min(a.d, b.d),
+ min(a.e, b.e),
+ min(a.f, b.f),
+ min(a.g, b.g),
+ min(a.h, b.h));
+#endif
+}
+
+ccl_device_inline float8 max(const float8 &a, const float8 &b)
+{
+#ifdef __KERNEL_AVX2__
+ return float8(_mm256_max_ps(a.m256, b.m256));
+#else
+ return make_float8(max(a.a, b.a),
+ max(a.b, b.b),
+ max(a.c, b.c),
+ max(a.d, b.d),
+ max(a.e, b.e),
+ max(a.f, b.f),
+ max(a.g, b.g),
+ max(a.h, b.h));
+#endif
+}
+
+ccl_device_inline float8 clamp(const float8 &a, const float8 &mn, const float8 &mx)
+{
+ return min(max(a, mn), mx);
+}
+
+ccl_device_inline float8 fabs(const float8 &a)
+{
+#ifdef __KERNEL_AVX2__
+ return float8(_mm256_and_ps(a.m256, _mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))));
+#else
+ return make_float8(fabsf(a.a),
+ fabsf(a.b),
+ fabsf(a.c),
+ fabsf(a.d),
+ fabsf(a.e),
+ fabsf(a.f),
+ fabsf(a.g),
+ fabsf(a.h));
+#endif
+}
+
+ccl_device_inline float8 mix(const float8 &a, const float8 &b, float t)
+{
+ return a + t * (b - a);
+}
+
+ccl_device_inline float reduce_min(const float8 &a)
+{
+ return min(min(min(a.a, a.b), min(a.c, a.d)), min(min(a.e, a.f), min(a.g, a.h)));
+}
+
+ccl_device_inline float reduce_max(const float8 &a)
+{
+ return max(max(max(a.a, a.b), max(a.c, a.d)), max(max(a.e, a.f), max(a.g, a.h)));
+}
+
+ccl_device_inline float reduce_add(const float8 &a)
+{
+#ifdef __KERNEL_AVX2__
+ float8 b(_mm256_hadd_ps(a.m256, a.m256));
+ float8 h(_mm256_hadd_ps(b.m256, b.m256));
+ return h[0] + h[4];
+#else
+ return a.a + a.b + a.c + a.d + a.e + a.f + a.g + a.h;
+#endif
+}
+
+ccl_device_inline float8 saturate(const float8 &a)
+{
+ return clamp(a, make_float8(0.0f), make_float8(1.0f));
+}
+
+ccl_device_inline bool isequal(const float8 a, const float8 b)
+{
+ return a == b;
+}
+
+ccl_device_inline float8 safe_divide(const float8 a, const float b)
+{
+ return (b != 0.0f) ? a / b : make_float8(0.0f);
+}
+
+ccl_device_inline float8 safe_divide(const float8 a, const float8 b)
+{
+ return make_float8((b.a != 0.0f) ? a.a / b.a : 0.0f,
+ (b.b != 0.0f) ? a.b / b.b : 0.0f,
+ (b.c != 0.0f) ? a.c / b.c : 0.0f,
+ (b.d != 0.0f) ? a.d / b.d : 0.0f,
+ (b.e != 0.0f) ? a.e / b.e : 0.0f,
+ (b.f != 0.0f) ? a.f / b.f : 0.0f,
+ (b.g != 0.0f) ? a.g / b.g : 0.0f,
+ (b.h != 0.0f) ? a.h / b.h : 0.0f);
+}
+
+ccl_device_inline float8 ensure_finite(float8 v)
+{
+ v.a = ensure_finite(v.a);
+ v.b = ensure_finite(v.b);
+ v.c = ensure_finite(v.c);
+ v.d = ensure_finite(v.d);
+ v.e = ensure_finite(v.e);
+ v.f = ensure_finite(v.f);
+ v.g = ensure_finite(v.g);
+ v.h = ensure_finite(v.h);
+
+ return v;
+}
+
+ccl_device_inline bool isfinite_safe(float8 v)
+{
+ return isfinite_safe(v.a) && isfinite_safe(v.b) && isfinite_safe(v.c) && isfinite_safe(v.d) &&
+ isfinite_safe(v.e) && isfinite_safe(v.f) && isfinite_safe(v.g) && isfinite_safe(v.h);
+}
+
+ccl_device_inline float8 pow(float8 v, float e)
+{
+ return make_float8(powf(v.a, e),
+ powf(v.b, e),
+ powf(v.c, e),
+ powf(v.d, e),
+ powf(v.e, e),
+ powf(v.f, e),
+ powf(v.g, e),
+ powf(v.h, e));
+}
+
+ccl_device_inline float8 exp(float8 v)
+{
+ return make_float8(
+ expf(v.a), expf(v.b), expf(v.c), expf(v.d), expf(v.e), expf(v.f), expf(v.g), expf(v.h));
+}
+
+ccl_device_inline float8 log(float8 v)
+{
+ return make_float8(
+ logf(v.a), logf(v.b), logf(v.c), logf(v.d), logf(v.e), logf(v.f), logf(v.g), logf(v.h));
+}
+
+ccl_device_inline float dot(const float8 &a, const float8 &b)
+{
+#ifdef __KERNEL_AVX2__
+ float8 t(_mm256_dp_ps(a.m256, b.m256, 0xFF));
+ return t[0] + t[4];
+#else
+ return (a.a * b.a) + (a.b * b.b) + (a.c * b.c) + (a.d * b.d) + (a.e * b.e) + (a.f * b.f) +
+ (a.g * b.g) + (a.h * b.h);
+#endif
+}
+
+CCL_NAMESPACE_END
+
+#endif /* __UTIL_MATH_FLOAT8_H__ */
diff --git a/intern/cycles/util/types_float8.h b/intern/cycles/util/types_float8.h
index d71149946f7..f04dc675c84 100644
--- a/intern/cycles/util/types_float8.h
+++ b/intern/cycles/util/types_float8.h
@@ -11,11 +11,13 @@
CCL_NAMESPACE_BEGIN
-#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
-
+#ifdef __KERNEL_GPU__
+struct float8
+#else
struct ccl_try_align(32) float8
+#endif
{
-# ifdef __KERNEL_AVX2__
+#ifdef __KERNEL_AVX2__
union {
__m256 m256;
struct {
@@ -32,18 +34,19 @@ struct ccl_try_align(32) float8
__forceinline float8 &operator=(const float8 &a);
-# else /* __KERNEL_AVX2__ */
+#else /* __KERNEL_AVX2__ */
float a, b, c, d, e, f, g, h;
-# endif /* __KERNEL_AVX2__ */
+#endif /* __KERNEL_AVX2__ */
+#ifndef __KERNEL_GPU__
__forceinline float operator[](int i) const;
__forceinline float &operator[](int i);
+#endif
};
ccl_device_inline float8 make_float8(float f);
ccl_device_inline float8
make_float8(float a, float b, float c, float d, float e, float f, float g, float h);
-#endif /* !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__) */
CCL_NAMESPACE_END
diff --git a/intern/cycles/util/types_float8_impl.h b/intern/cycles/util/types_float8_impl.h
index 0694f5205a5..21931c55071 100644
--- a/intern/cycles/util/types_float8_impl.h
+++ b/intern/cycles/util/types_float8_impl.h
@@ -15,8 +15,7 @@
CCL_NAMESPACE_BEGIN
-#if !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__)
-# ifdef __KERNEL_AVX2__
+#ifdef __KERNEL_AVX2__
__forceinline float8::float8()
{
}
@@ -44,8 +43,9 @@ __forceinline float8 &float8::operator=(const float8 &f)
m256 = f.m256;
return *this;
}
-# endif /* __KERNEL_AVX2__ */
+#endif /* __KERNEL_AVX2__ */
+#ifndef __KERNEL_GPU__
__forceinline float float8::operator[](int i) const
{
util_assert(i >= 0);
@@ -59,30 +59,29 @@ __forceinline float &float8::operator[](int i)
util_assert(i < 8);
return *(&a + i);
}
+#endif
ccl_device_inline float8 make_float8(float f)
{
-# ifdef __KERNEL_AVX2__
+#ifdef __KERNEL_AVX2__
float8 r(_mm256_set1_ps(f));
-# else
+#else
float8 r = {f, f, f, f, f, f, f, f};
-# endif
+#endif
return r;
}
ccl_device_inline float8
make_float8(float a, float b, float c, float d, float e, float f, float g, float h)
{
-# ifdef __KERNEL_AVX2__
- float8 r(_mm256_set_ps(a, b, c, d, e, f, g, h));
-# else
+#ifdef __KERNEL_AVX2__
+ float8 r(_mm256_setr_ps(a, b, c, d, e, f, g, h));
+#else
float8 r = {a, b, c, d, e, f, g, h};
-# endif
+#endif
return r;
}
-#endif /* !defined(__KERNEL_GPU__) || defined(__KERNEL_ONEAPI__) */
-
CCL_NAMESPACE_END
#endif /* __UTIL_TYPES_FLOAT8_IMPL_H__ */