1 files changed, 53 insertions, 15 deletions
diff --git a/intern/cycles/util/util_avxf.h b/intern/cycles/util/util_avxf.h
index 156607e65fb..1fb3ded422f 100644
--- a/intern/cycles/util/util_avxf.h
+++ b/intern/cycles/util/util_avxf.h
@@ -15,7 +15,7 @@
  */
 
 #ifndef __UTIL_AVXF_H__
-#  define __UTIL_AVXF_H__
+#define __UTIL_AVXF_H__
 
 CCL_NAMESPACE_BEGIN
 
@@ -140,6 +140,11 @@ __forceinline void dot3(const avxf &a, const avxf &b, float &den, float &den2)
 /// Unary Operators
 ////////////////////////////////////////////////////////////////////////////////
 
+__forceinline const avxf cast(const __m256i &a)
+{
+  return _mm256_castsi256_ps(a);
+}
+
 __forceinline const avxf mm256_sqrt(const avxf &a)
 {
   return _mm256_sqrt_ps(a.m256);
@@ -259,16 +264,35 @@ template<size_t i0> __forceinline const avxf shuffle(const avxf &a)
   return shuffle<i0>(a, a);
 }
 
+template<size_t i> __forceinline float extract(const avxf &a)
+{
+  __m256 b = shuffle<i, i, i, i>(a).m256;
+  return _mm256_cvtss_f32(b);
+}
+template<> __forceinline float extract<0>(const avxf &a)
+{
+  return _mm256_cvtss_f32(a.m256);
+}
+
+__forceinline ssef low(const avxf &a)
+{
+  return _mm256_extractf128_ps(a.m256, 0);
+}
+__forceinline ssef high(const avxf &a)
+{
+  return _mm256_extractf128_ps(a.m256, 1);
+}
+
 template<int i0, int i1, int i2, int i3, int i4, int i5, int i6, int i7>
 __forceinline const avxf permute(const avxf &a)
 {
-#  ifdef __KERNEL_AVX2__
+#ifdef __KERNEL_AVX2__
   return _mm256_permutevar8x32_ps(a, _mm256_set_epi32(i7, i6, i5, i4, i3, i2, i1, i0));
-#  else
+#else
   float temp[8];
   _mm256_storeu_ps((float *)&temp, a);
   return avxf(temp[i7], temp[i6], temp[i5], temp[i4], temp[i3], temp[i2], temp[i1], temp[i0]);
-#  endif
+#endif
 }
 
 template<int S0, int S1, int S2, int S3, int S4, int S5, int S6, int S7>
@@ -309,39 +333,51 @@ __forceinline avxf mini(const avxf &a, const avxf &b)
 ////////////////////////////////////////////////////////////////////////////////
 __forceinline const avxf madd(const avxf &a, const avxf &b, const avxf &c)
 {
-#  ifdef __KERNEL_AVX2__
+#ifdef __KERNEL_AVX2__
   return _mm256_fmadd_ps(a, b, c);
-#  else
+#else
   return c + (a * b);
-#  endif
+#endif
 }
 
 __forceinline const avxf nmadd(const avxf &a, const avxf &b, const avxf &c)
 {
-#  ifdef __KERNEL_AVX2__
+#ifdef __KERNEL_AVX2__
   return _mm256_fnmadd_ps(a, b, c);
-#  else
+#else
   return c - (a * b);
-#  endif
+#endif
 }
 __forceinline const avxf msub(const avxf &a, const avxf &b, const avxf &c)
 {
-#  ifdef __KERNEL_AVX2__
+#ifdef __KERNEL_AVX2__
   return _mm256_fmsub_ps(a, b, c);
-#  else
+#else
   return (a * b) - c;
-#  endif
+#endif
 }
 
 ////////////////////////////////////////////////////////////////////////////////
-/// Comparison Operators
+/// Comparison Operators + Select
 ////////////////////////////////////////////////////////////////////////////////
 __forceinline const avxb operator<=(const avxf &a, const avxf &b)
 {
   return _mm256_cmp_ps(a.m256, b.m256, _CMP_LE_OS);
 }
 
-#endif
+__forceinline const avxf select(const avxb &m, const avxf &t, const avxf &f)
+{
+  return _mm256_blendv_ps(f, t, m);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+/// Common Functions
+////////////////////////////////////////////////////////////////////////////////
+
+__forceinline avxf mix(const avxf &a, const avxf &b, const avxf &t)
+{
+  return madd(t, b, (avxf(1.0f) - t) * a);
+}
 
 #ifndef _mm256_set_m128
 #  define _mm256_set_m128(/* __m128 */ hi, /* __m128 */ lo) \
@@ -352,3 +388,5 @@ __forceinline const avxb operator<=(const avxf &a, const avxf &b)
   _mm256_set_m128(_mm_loadu_ps(hiaddr), _mm_loadu_ps(loaddr))
 
 CCL_NAMESPACE_END
+
+#endif