diff options
author | Soumith Chintala <soumith@gmail.com> | 2017-02-23 13:00:55 +0300 |
---|---|---|
committer | Soumith Chintala <soumith@gmail.com> | 2017-02-23 13:00:55 +0300 |
commit | 449df29db01cd915223230718b08ba6e1d4cd1ab (patch) | |
tree | 8eb86ddb66cbd099e51ae931897fd43232ca4e17 | |
parent | 8766dc01f022015d988eb206b8a156598689e8b6 (diff) |
fix AVX2 detection bugsavx2detect
-rw-r--r-- | lib/TH/CMakeLists.txt | 19 | ||||
-rw-r--r-- | lib/TH/generic/simd/simd.h | 11 |
2 files changed, 25 insertions, 5 deletions
diff --git a/lib/TH/CMakeLists.txt b/lib/TH/CMakeLists.txt index 3f66edc..5fe6c89 100644 --- a/lib/TH/CMakeLists.txt +++ b/lib/TH/CMakeLists.txt @@ -95,6 +95,25 @@ IF(HAVE_GCC_GET_CPUID) SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DHAVE_GCC_GET_CPUID") ENDIF(HAVE_GCC_GET_CPUID) +CHECK_C_SOURCE_COMPILES("#include <stdint.h> + static inline void cpuid(uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) + { + uint32_t a = *eax, b, c = *ecx, d; + asm volatile ( \"cpuid\" : \"+a\"(a), \"=b\"(b), \"+c\"(c), \"=d\"(d) ); + *eax = a; *ebx = b; *ecx = c; *edx = d; + } + int main() { + uint32_t a,b,c,d; + cpuid(&a, &b, &c, &d); + return 0; + }" NO_GCC_EBX_FPIC_BUG) + +IF(NOT NO_GCC_EBX_FPIC_BUG) + SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DUSE_GCC_GET_CPUID") +ENDIF(NOT NO_GCC_EBX_FPIC_BUG) + + FIND_PACKAGE(SSE) IF(C_SSE2_FOUND) SET(CMAKE_C_FLAGS "${C_SSE2_FLAGS} -DUSE_SSE2 ${CMAKE_C_FLAGS}") diff --git a/lib/TH/generic/simd/simd.h b/lib/TH/generic/simd/simd.h index aa3b722..2f0c1f9 100644 --- a/lib/TH/generic/simd/simd.h +++ b/lib/TH/generic/simd/simd.h @@ -4,12 +4,12 @@ #include <stdint.h> #if defined(_MSC_VER) #include <intrin.h> -#elif defined(HAVE_GCC_GET_CPUID) +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) #include <cpuid.h> #endif // Can be found on Intel ISA Reference for CPUID -#define CPUID_AVX2_BIT 0x10 // Bit 5 of EBX for EAX=0x7 +#define CPUID_AVX2_BIT 0x20 // Bit 5 of EBX for EAX=0x7 #define CPUID_AVX_BIT 0x10000000 // Bit 28 of ECX for EAX=0x1 #define CPUID_SSE_BIT 0x2000000 // bit 25 of EDX for EAX=0x1 @@ -99,13 +99,13 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t * *ebx = cpuInfo[1]; *ecx = cpuInfo[2]; *edx = cpuInfo[3]; -#elif defined(HAVE_GCC_GET_CPUID) +#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID) uint32_t level = *eax; __get_cpuid (level, eax, ebx, ecx, edx); #else - uint32_t a = *eax, b, c, d; + uint32_t a = *eax, b, c = *ecx, d; asm volatile ( "cpuid\n\t" - : "+a"(a), "=b"(b), "=c"(c), "=d"(d) ); + : "+a"(a), "=b"(b), "+c"(c), "=d"(d) ); *eax = a; *ebx = b; *ecx = c; @@ -120,6 +120,7 @@ static inline uint32_t detectHostSIMDExtensions() // Check for AVX2. Requires separate CPUID eax = 0x7; + ecx = 0x0; cpuid(&eax, &ebx, &ecx, &edx); if (ebx & CPUID_AVX2_BIT) hostSimdExts |= SIMDExtension_AVX2; |