From 449df29db01cd915223230718b08ba6e1d4cd1ab Mon Sep 17 00:00:00 2001 From: Soumith Chintala Date: Thu, 23 Feb 2017 05:00:55 -0500 Subject: fix AVX2 detection bugs --- lib/TH/CMakeLists.txt | 19 +++++++++++++++++++ lib/TH/generic/simd/simd.h | 11 ++++++----- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/lib/TH/CMakeLists.txt b/lib/TH/CMakeLists.txt index 3f66edc..5fe6c89 100644 --- a/lib/TH/CMakeLists.txt +++ b/lib/TH/CMakeLists.txt @@ -95,6 +95,25 @@ IF(HAVE_GCC_GET_CPUID) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DHAVE_GCC_GET_CPUID") ENDIF(HAVE_GCC_GET_CPUID) +CHECK_C_SOURCE_COMPILES("#include + static inline void cpuid(uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) + { + uint32_t a = *eax, b, c = *ecx, d; + asm volatile ( \"cpuid\" : \"+a\"(a), \"=b\"(b), \"+c\"(c), \"=d\"(d) ); + *eax = a; *ebx = b; *ecx = c; *edx = d; + } + int main() { + uint32_t a,b,c,d; + cpuid(&a, &b, &c, &d); + return 0; + }" NO_GCC_EBX_FPIC_BUG) + +IF(NOT NO_GCC_EBX_FPIC_BUG) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DUSE_GCC_GET_CPUID") +ENDIF(NOT NO_GCC_EBX_FPIC_BUG) + + FIND_PACKAGE(SSE) IF(C_SSE2_FOUND) SET(CMAKE_C_FLAGS "${C_SSE2_FLAGS} -DUSE_SSE2 ${CMAKE_C_FLAGS}") diff --git a/lib/TH/generic/simd/simd.h b/lib/TH/generic/simd/simd.h index aa3b722..2f0c1f9 100644 --- a/lib/TH/generic/simd/simd.h +++ b/lib/TH/generic/simd/simd.h @@ -4,12 +4,12 @@ #include #if defined(_MSC_VER) #include -#elif defined(HAVE_GCC_GET_CPUID) +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) #include #endif // Can be found on Intel ISA Reference for CPUID -#define CPUID_AVX2_BIT 0x10 // Bit 5 of EBX for EAX=0x7 +#define CPUID_AVX2_BIT 0x20 // Bit 5 of EBX for EAX=0x7 #define CPUID_AVX_BIT 0x10000000 // Bit 28 of ECX for EAX=0x1 #define CPUID_SSE_BIT 0x2000000 // bit 25 of EDX for EAX=0x1 @@ -99,13 +99,13 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t * *ebx = cpuInfo[1]; *ecx = cpuInfo[2]; *edx = cpuInfo[3]; -#elif defined(HAVE_GCC_GET_CPUID) +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) uint32_t level = *eax; __get_cpuid (level, eax, ebx, ecx, edx); #else - uint32_t a = *eax, b, c, d; + uint32_t a = *eax, b, c = *ecx, d; asm volatile ( "cpuid\n\t" - : "+a"(a), "=b"(b), "=c"(c), "=d"(d) ); + : "+a"(a), "=b"(b), "+c"(c), "=d"(d) ); *eax = a; *ebx = b; *ecx = c; @@ -120,6 +120,7 @@ static inline uint32_t detectHostSIMDExtensions() // Check for AVX2. Requires separate CPUID eax = 0x7; + ecx = 0x0; cpuid(&eax, &ebx, &ecx, &edx); if (ebx & CPUID_AVX2_BIT) hostSimdExts |= SIMDExtension_AVX2; -- cgit v1.2.3