diff options
author | Martijn Berger <mberger@denc.com> | 2015-12-10 14:37:46 +0300 |
---|---|---|
committer | Martijn Berger <mberger@denc.com> | 2015-12-10 14:37:46 +0300 |
commit | 9ad13d70faff0c510c313a7d58a58b974e8ce11f (patch) | |
tree | 3b7847573047815fc474105195d06c24cfefaaad /extern/Eigen3/Eigen/src/Core/arch | |
parent | ffc750a4fafc6fe4e6fb556506becaa0e1ce8c9a (diff) |
Update Eigen to version 3.2.7
The main purpose of this is to get MSVC 2015 fixes
Diffstat (limited to 'extern/Eigen3/Eigen/src/Core/arch')
-rw-r--r-- | extern/Eigen3/Eigen/src/Core/arch/NEON/Complex.h | 2 | ||||
-rw-r--r-- | extern/Eigen3/Eigen/src/Core/arch/NEON/PacketMath.h | 20 | ||||
-rw-r--r-- | extern/Eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h | 14 |
3 files changed, 23 insertions, 13 deletions
diff --git a/extern/Eigen3/Eigen/src/Core/arch/NEON/Complex.h b/extern/Eigen3/Eigen/src/Core/arch/NEON/Complex.h index f183d31de2a..8d9255eef6a 100644 --- a/extern/Eigen3/Eigen/src/Core/arch/NEON/Complex.h +++ b/extern/Eigen3/Eigen/src/Core/arch/NEON/Complex.h @@ -110,7 +110,7 @@ template<> EIGEN_STRONG_INLINE Packet2cf ploaddup<Packet2cf>(const std::complex< template<> EIGEN_STRONG_INLINE void pstore <std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_ALIGNED_STORE pstore((float*)to, from.v); } template<> EIGEN_STRONG_INLINE void pstoreu<std::complex<float> >(std::complex<float> * to, const Packet2cf& from) { EIGEN_DEBUG_UNALIGNED_STORE pstoreu((float*)to, from.v); } -template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { __pld((float *)addr); } +template<> EIGEN_STRONG_INLINE void prefetch<std::complex<float> >(const std::complex<float> * addr) { EIGEN_ARM_PREFETCH((float *)addr); } template<> EIGEN_STRONG_INLINE std::complex<float> pfirst<Packet2cf>(const Packet2cf& a) { diff --git a/extern/Eigen3/Eigen/src/Core/arch/NEON/PacketMath.h b/extern/Eigen3/Eigen/src/Core/arch/NEON/PacketMath.h index 163bac215e6..d49670e0410 100644 --- a/extern/Eigen3/Eigen/src/Core/arch/NEON/PacketMath.h +++ b/extern/Eigen3/Eigen/src/Core/arch/NEON/PacketMath.h @@ -48,9 +48,18 @@ typedef uint32x4_t Packet4ui; #define EIGEN_INIT_NEON_PACKET2(X, Y) {X, Y} #define EIGEN_INIT_NEON_PACKET4(X, Y, Z, W) {X, Y, Z, W} #endif - -#ifndef __pld -#define __pld(x) asm volatile ( " pld [%[addr]]\n" :: [addr] "r" (x) : "cc" ); + +// arm64 does have the pld instruction. If available, let's trust the __builtin_prefetch built-in function +// which available on LLVM and GCC (at least) +#if EIGEN_HAS_BUILTIN(__builtin_prefetch) || defined(__GNUC__) + #define EIGEN_ARM_PREFETCH(ADDR) __builtin_prefetch(ADDR); +#elif defined __pld + #define EIGEN_ARM_PREFETCH(ADDR) __pld(ADDR) +#elif !defined(__aarch64__) + #define EIGEN_ARM_PREFETCH(ADDR) __asm__ __volatile__ ( " pld [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" ); +#else + // by default no explicit prefetching + #define EIGEN_ARM_PREFETCH(ADDR) #endif template<> struct packet_traits<float> : default_packet_traits @@ -209,8 +218,8 @@ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& f template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_f32(to, from); } template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { EIGEN_DEBUG_UNALIGNED_STORE vst1q_s32(to, from); } -template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { __pld(addr); } -template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { __pld(addr); } +template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ARM_PREFETCH(addr); } +template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ARM_PREFETCH(addr); } // FIXME only store the 2 first elements ? template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; vst1q_f32(x, a); return x[0]; } @@ -375,6 +384,7 @@ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a) a_lo = vget_low_s32(a); a_hi = vget_high_s32(a); max = vpmax_s32(a_lo, a_hi); + max = vpmax_s32(max, max); return vget_lane_s32(max, 0); } diff --git a/extern/Eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h b/extern/Eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h index 99cbd0d95be..2b07168ae4f 100644 --- a/extern/Eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h +++ b/extern/Eigen3/Eigen/src/Core/arch/SSE/MathFunctions.h @@ -52,7 +52,7 @@ Packet4f plog<Packet4f>(const Packet4f& _x) Packet4i emm0; - Packet4f invalid_mask = _mm_cmplt_ps(x, _mm_setzero_ps()); + Packet4f invalid_mask = _mm_cmpnge_ps(x, _mm_setzero_ps()); // not greater equal is true if x is NaN Packet4f iszero_mask = _mm_cmpeq_ps(x, _mm_setzero_ps()); x = pmax(x, p4f_min_norm_pos); /* cut off denormalized stuff */ @@ -126,7 +126,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x) _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p4, 1.6666665459E-1f); _EIGEN_DECLARE_CONST_Packet4f(cephes_exp_p5, 5.0000001201E-1f); - Packet4f tmp = _mm_setzero_ps(), fx; + Packet4f tmp, fx; Packet4i emm0; // clamp x @@ -166,7 +166,7 @@ Packet4f pexp<Packet4f>(const Packet4f& _x) emm0 = _mm_cvttps_epi32(fx); emm0 = _mm_add_epi32(emm0, p4i_0x7f); emm0 = _mm_slli_epi32(emm0, 23); - return pmul(y, _mm_castsi128_ps(emm0)); + return pmax(pmul(y, Packet4f(_mm_castsi128_ps(emm0))), _x); } template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED Packet2d pexp<Packet2d>(const Packet2d& _x) @@ -195,7 +195,7 @@ Packet2d pexp<Packet2d>(const Packet2d& _x) _EIGEN_DECLARE_CONST_Packet2d(cephes_exp_C2, 1.42860682030941723212e-6); static const __m128i p4i_1023_0 = _mm_setr_epi32(1023, 1023, 0, 0); - Packet2d tmp = _mm_setzero_pd(), fx; + Packet2d tmp, fx; Packet4i emm0; // clamp x @@ -239,7 +239,7 @@ Packet2d pexp<Packet2d>(const Packet2d& _x) emm0 = _mm_add_epi32(emm0, p4i_1023_0); emm0 = _mm_slli_epi32(emm0, 20); emm0 = _mm_shuffle_epi32(emm0, _MM_SHUFFLE(1,2,0,3)); - return pmul(x, _mm_castsi128_pd(emm0)); + return pmax(pmul(x, Packet2d(_mm_castsi128_pd(emm0))), _x); } /* evaluation of 4 sines at onces, using SSE2 intrinsics. @@ -279,7 +279,7 @@ Packet4f psin<Packet4f>(const Packet4f& _x) _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f); _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI - Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, sign_bit, y; + Packet4f xmm1, xmm2, xmm3, sign_bit, y; Packet4i emm0, emm2; sign_bit = x; @@ -378,7 +378,7 @@ Packet4f pcos<Packet4f>(const Packet4f& _x) _EIGEN_DECLARE_CONST_Packet4f(coscof_p2, 4.166664568298827E-002f); _EIGEN_DECLARE_CONST_Packet4f(cephes_FOPI, 1.27323954473516f); // 4 / M_PI - Packet4f xmm1, xmm2 = _mm_setzero_ps(), xmm3, y; + Packet4f xmm1, xmm2, xmm3, y; Packet4i emm0, emm2; x = pabs(x); |