Cycles: Improved robustness of hair motion blur.motion_curve_fix

In some instances, the number of control vertices of a hair could change mid-frame. Cycles would then be unable to calculate proper motion blur for those hairs. This adds interpolated CVs to fill in for the missing data. While this will not necessarily result in a fully accurate reconstruction of the guide hair, it preserves motion blur instead of disabling it. Reviewers: #cycles, sergey Reviewed By: #cycles, sergey Subscribers: sergey, brecht, #cycles Tags: #cycles Differential Revision: https://developer.blender.org/D3695
author: Stefan Werner <stefan.werner@tangent-animation.com> 2018-11-23 15:08:15 +0300
committer: Stefan Werner <stefan.werner@tangent-animation.com> 2018-11-23 15:19:53 +0300
commit: 071f4f4ce0b9520ab0c73d6d68365ad449ca8b80 (patch)
tree: 9f37bfcac669366b9ad5fb7605f2fbbed9b71b0a /intern/cycles/util/util_avxb.h
parent: 0a2b2d59a5897212ba3771503feb6770fb636bc8 (diff)
1 files changed, 17 insertions, 55 deletions
diff --git a/intern/cycles/util/util_avxb.h b/intern/cycles/util/util_avxb.h
index 60d9bb44256..25ef39d39ae 100644
--- a/intern/cycles/util/util_avxb.h
+++ b/intern/cycles/util/util_avxb.h
@@ -44,23 +44,12 @@ struct avxb
 	__forceinline operator const __m256i( void ) const { return _mm256_castps_si256(m256); }
 	__forceinline operator const __m256d( void ) const { return _mm256_castps_pd(m256); }
 
-	//__forceinline avxb           ( bool  a )
-	//	: m256(_mm_lookupmask_ps[(size_t(a) << 3) | (size_t(a) << 2) | (size_t(a) << 1) | size_t(a)]) {}
-	//__forceinline avxb           ( bool  a, bool  b)
-	//	: m256(_mm_lookupmask_ps[(size_t(b) << 3) | (size_t(a) << 2) | (size_t(b) << 1) | size_t(a)]) {}
-	//__forceinline avxb           ( bool  a, bool  b, bool  c, bool  d)
-	//	: m256(_mm_lookupmask_ps[(size_t(d) << 3) | (size_t(c) << 2) | (size_t(b) << 1) | size_t(a)]) {}
-	//__forceinline avxb(int mask) {
-	//	assert(mask >= 0 && mask < 16);
-	//	m128 = _mm_lookupmask_ps[mask];
-	//}
-
 	////////////////////////////////////////////////////////////////////////////////
 	/// Constants
 	////////////////////////////////////////////////////////////////////////////////
 
 	__forceinline avxb( FalseTy ) : m256(_mm256_setzero_ps()) {}
-	__forceinline avxb( TrueTy  ) : m256(_mm256_castsi256_ps(_mm256_cmpeq_epi32(_mm256_setzero_si256(), _mm256_setzero_si256()))) {}
+	__forceinline avxb( TrueTy  ) : m256(_mm256_castsi256_ps(_mm256_set1_epi32(-1))) {}
 
 	////////////////////////////////////////////////////////////////////////////////
 	/// Array Access
@@ -97,7 +86,21 @@ __forceinline const avxb operator ^=( avxb& a, const avxb& b ) { return a = a ^
 ////////////////////////////////////////////////////////////////////////////////
 
 __forceinline const avxb operator !=( const avxb& a, const avxb& b ) { return _mm256_xor_ps(a, b); }
-__forceinline const avxb operator ==( const avxb& a, const avxb& b ) { return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b)); }
+__forceinline const avxb operator ==( const avxb& a, const avxb& b )
+{
+#ifdef __KERNEL_AVX2__
+	return _mm256_castsi256_ps(_mm256_cmpeq_epi32(a, b));
+#else
+	__m128i a_lo = _mm_castps_si128(_mm256_extractf128_ps(a, 0));
+	__m128i a_hi = _mm_castps_si128(_mm256_extractf128_ps(a, 1));
+	__m128i b_lo = _mm_castps_si128(_mm256_extractf128_ps(b, 0));
+	__m128i b_hi = _mm_castps_si128(_mm256_extractf128_ps(b, 1));
+	__m128i c_lo = _mm_cmpeq_epi32(a_lo, b_lo);
+	__m128i c_hi = _mm_cmpeq_epi32(a_hi, b_hi);
+	__m256i result = _mm256_insertf128_si256(_mm256_castsi128_si256(c_lo), c_hi, 1);
+	return _mm256_castsi256_ps(result);
+#endif
+}
 
 __forceinline const avxb select( const avxb& m, const avxb& t, const avxb& f ) {
 #if defined(__KERNEL_SSE41__)
@@ -114,47 +117,6 @@ __forceinline const avxb select( const avxb& m, const avxb& t, const avxb& f ) {
 __forceinline const avxb unpacklo( const avxb& a, const avxb& b ) { return _mm256_unpacklo_ps(a, b); }
 __forceinline const avxb unpackhi( const avxb& a, const avxb& b ) { return _mm256_unpackhi_ps(a, b); }
 
-#define _MM256_SHUFFLE(fp7,fp6,fp5,fp4,fp3,fp2,fp1,fp0) (((fp7) << 14) | ((fp6) << 12) | ((fp5) << 10) | ((fp4) << 8) | \
-                                                      ((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | ((fp0)))
-
-template<size_t i0, size_t i1, size_t i2, size_t i3, size_t i4, size_t i5, size_t i6, size_t i7>
-__forceinline const avxb shuffle( const avxb& a ) {
-	return _mm256_cvtepi32_ps(_mm256_shuffle_epi32(a, _MM256_SHUFFLE(i7, i6, i5, i4, i3, i2, i1, i0)));
-}
-
-/*
-template<> __forceinline const avxb shuffle<0, 1, 0, 1, 0, 1, 0, 1>( const avxb& a ) {
-	return _mm_movelh_ps(a, a);
-}
-
-template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a ) {
-	return _mm_movehl_ps(a, a);
-}
-
-template<size_t i0, size_t i1, size_t i2, size_t i3> __forceinline const sseb shuffle( const sseb& a, const sseb& b ) {
-	return _mm_shuffle_ps(a, b, _MM_SHUFFLE(i3, i2, i1, i0));
-}
-
-template<> __forceinline const sseb shuffle<0, 1, 0, 1>( const sseb& a, const sseb& b ) {
-	return _mm_movelh_ps(a, b);
-}
-
-template<> __forceinline const sseb shuffle<2, 3, 2, 3>( const sseb& a, const sseb& b ) {
-	return _mm_movehl_ps(b, a);
-}
-
-#if defined(__KERNEL_SSE3__)
-template<> __forceinline const sseb shuffle<0, 0, 2, 2>( const sseb& a ) { return _mm_moveldup_ps(a); }
-template<> __forceinline const sseb shuffle<1, 1, 3, 3>( const sseb& a ) { return _mm_movehdup_ps(a); }
-#endif
-
-#if defined(__KERNEL_SSE41__)
-template<size_t dst, size_t src, size_t clr> __forceinline const sseb insert( const sseb& a, const sseb& b ) { return _mm_insert_ps(a, b, (dst << 4) | (src << 6) | clr); }
-template<size_t dst, size_t src> __forceinline const sseb insert( const sseb& a, const sseb& b ) { return insert<dst, src, 0>(a, b); }
-template<size_t dst>             __forceinline const sseb insert( const sseb& a, const bool b ) { return insert<dst,0>(a, sseb(b)); }
-#endif
-*/
-
 ////////////////////////////////////////////////////////////////////////////////
 /// Reduction Operations
 ////////////////////////////////////////////////////////////////////////////////
@@ -180,7 +142,7 @@ __forceinline size_t movemask( const avxb& a ) { return _mm256_movemask_ps(a); }
 
 ccl_device_inline void print_avxb(const char *label, const avxb &a)
 {
-	printf("%s: %df %df %df %df %df %df %df %d\n",
+	printf("%s: %d %d %d %d %d %d %d %d\n",
 	       label, a[0], a[1], a[2], a[3], a[4], a[5], a[6], a[7]);
 }
author	Stefan Werner <stefan.werner@tangent-animation.com>	2018-11-23 15:08:15 +0300
committer	Stefan Werner <stefan.werner@tangent-animation.com>	2018-11-23 15:19:53 +0300
commit	071f4f4ce0b9520ab0c73d6d68365ad449ca8b80 (patch)
tree	9f37bfcac669366b9ad5fb7605f2fbbed9b71b0a /intern/cycles/util/util_avxb.h
parent	0a2b2d59a5897212ba3771503feb6770fb636bc8 (diff)