Welcome to mirror list, hosted at ThFree Co, Russian Federation.

git.blender.org/blender.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMartijn Berger <mberger@denc.com>2015-12-10 14:37:46 +0300
committerMartijn Berger <mberger@denc.com>2015-12-10 14:37:46 +0300
commit9ad13d70faff0c510c313a7d58a58b974e8ce11f (patch)
tree3b7847573047815fc474105195d06c24cfefaaad /extern/Eigen3/Eigen/src/Core/arch
parentffc750a4fafc6fe4e6fb556506becaa0e1ce8c9a (diff)
Update Eigen to version 3.2.7
The main purpose of this is to get MSVC 2015 fixes
Diffstat (limited to 'extern/Eigen3/Eigen/src/Core/arch')
-rw-r--r--extern/Eigen3/Eigen/src/Core/arch/NEON/Complex.h2
-rw-r--r--extern/Eigen3/Eigen/src/Core/arch/NEON/PacketMath.h20
-rw-r--r--extern/Eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h14
3 files changed, 23 insertions, 13 deletions
diff --git a/extern/Eigen3/Eigen/src/Core/arch/NEON/Complex.h b/extern/Eigen3/Eigen/src/Core/arch/NEON/Complex.h
index f183d31de2a..8d9255eef6a 100644
--- a/extern/Eigen3/Eigen/src/Core/arch/NEON/Complex.h
+++ b/extern/Eigen3/Eigen/src/Core/arch/NEON/Complex.h
@@ -110,7 +110,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex<
template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); }
template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); }
-template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { __pld((float *)addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); }
template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a)
{
diff --git a/extern/Eigen3/Eigen/src/Core/arch/NEON/PacketMath.h b/extern/Eigen3/Eigen/src/Core/arch/NEON/PacketMath.h
index 163bac215e6..d49670e0410 100644
--- a/extern/Eigen3/Eigen/src/Core/arch/NEON/PacketMath.h
+++ b/extern/Eigen3/Eigen/src/Core/arch/NEON/PacketMath.h
@@ -48,9 +48,18 @@ typedef uint32x4_t Packet4ui;
#define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y}
#define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W}
#endif
-
-#ifndef __pld
-#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" );
+
+// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function
+// which available on LLVM and GCC (at least)
+#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || defined(__GNUC__)
+ #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR);
+#elif defined __pld
+ #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR)
+#elif !defined(__aarch64__)
+ #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
+#else
+ // by default no explicit prefetching
+ #define EIGEN_ARM_PREFETCH(ADDR)
#endif
template<> struct packet_traits<float> : default_packet_traits
@@ -209,8 +218,8 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f
template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); }
template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); }
-template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { __pld(addr); }
-template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { __pld(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); }
+template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); }
// FIXME only store the 2 first elements ?
template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; }
@@ -375,6 +384,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
a_lo = vget_low_s32(a);
a_hi = vget_high_s32(a);
max = vpmax_s32(a_lo, a_hi);
+ max = vpmax_s32(max, max);
return vget_lane_s32(max, 0);
}
diff --git a/extern/Eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h b/extern/Eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h
index 99cbd0d95be..2b07168ae4f 100644
--- a/extern/Eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h
+++ b/extern/Eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h
@@ -52,7 +52,7 @@ Packet4f plog<Packet4f>(const Packet4f& _x)
Packet4i emm0;
- Packet4f invalid_mask = _mm_cmplt_ps(x, _mm_setzero_ps());
+ Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps()); // not greater equal is true if x is NaN
Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps());
x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */
@@ -126,7 +126,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f);
- Packet4f tmp = _mm_setzero_ps(), fx;
+ Packet4f tmp, fx;
Packet4i emm0;
// clamp x
@@ -166,7 +166,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x)
emm0 = _mm_cvttps_epi32(fx);
emm0 = _mm_add_epi32(emm0, p4i_0x7f);
emm0 = _mm_slli_epi32(emm0, 23);
- return pmul(y, _mm_castsi128_ps(emm0));
+ return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x);
}
template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
Packet2d pexp<Packet2d>(const Packet2d& _x)
@@ -195,7 +195,7 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
_EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6);
static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0);
- Packet2d tmp = _mm_setzero_pd(), fx;
+ Packet2d tmp, fx;
Packet4i emm0;
// clamp x
@@ -239,7 +239,7 @@ Packet2d pexp<Packet2d>(const Packet2d& _x)
emm0 = _mm_add_epi32(emm0, p4i_1023_0);
emm0 = _mm_slli_epi32(emm0, 20);
emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3));
- return pmul(x, _mm_castsi128_pd(emm0));
+ return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x);
}
/* evaluation of 4 sines at onces, using SSE2 intrinsics.
@@ -279,7 +279,7 @@ Packet4f psin<Packet4f>(const Packet4f& _x)
_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
- Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y;
+ Packet4f xmm1, xmm2, xmm3, sign_bit, y;
Packet4i emm0, emm2;
sign_bit = x;
@@ -378,7 +378,7 @@ Packet4f pcos<Packet4f>(const Packet4f& _x)
_EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f);
_EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI
- Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, y;
+ Packet4f xmm1, xmm2, xmm3, y;
Packet4i emm0, emm2;
x = pabs(x);