Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/torch/torch7.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSoumith Chintala <soumith@gmail.com>2017-02-23 13:00:55 +0300
committerSoumith Chintala <soumith@gmail.com>2017-02-23 13:00:55 +0300
commit449df29db01cd915223230718b08ba6e1d4cd1ab (patch)
tree8eb86ddb66cbd099e51ae931897fd43232ca4e17
parent8766dc01f022015d988eb206b8a156598689e8b6 (diff)
fix AVX2 detection bugsavx2detect
-rw-r--r--lib/TH/CMakeLists.txt19
-rw-r--r--lib/TH/generic/simd/simd.h11
2 files changed, 25 insertions, 5 deletions
diff --git a/lib/TH/CMakeLists.txt b/lib/TH/CMakeLists.txt
index 3f66edc..5fe6c89 100644
--- a/lib/TH/CMakeLists.txt
+++ b/lib/TH/CMakeLists.txt
@@ -95,6 +95,25 @@ IF(HAVE_GCC_GET_CPUID)
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DHAVE_GCC_GET_CPUID")
ENDIF(HAVE_GCC_GET_CPUID)
+CHECK_C_SOURCE_COMPILES("#include <stdint.h>
+ static inline void cpuid(uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+ {
+ uint32_t a = *eax, b, c = *ecx, d;
+ asm volatile ( \"cpuid\" : \"+a\"(a), \"=b\"(b), \"+c\"(c), \"=d\"(d) );
+ *eax = a; *ebx = b; *ecx = c; *edx = d;
+ }
+ int main() {
+ uint32_t a,b,c,d;
+ cpuid(&a, &b, &c, &d);
+ return 0;
+ }" NO_GCC_EBX_FPIC_BUG)
+
+IF(NOT NO_GCC_EBX_FPIC_BUG)
+ SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DUSE_GCC_GET_CPUID")
+ENDIF(NOT NO_GCC_EBX_FPIC_BUG)
+
+
FIND_PACKAGE(SSE)
IF(C_SSE2_FOUND)
SET(CMAKE_C_FLAGS "${C_SSE2_FLAGS} -DUSE_SSE2 ${CMAKE_C_FLAGS}")
diff --git a/lib/TH/generic/simd/simd.h b/lib/TH/generic/simd/simd.h
index aa3b722..2f0c1f9 100644
--- a/lib/TH/generic/simd/simd.h
+++ b/lib/TH/generic/simd/simd.h
@@ -4,12 +4,12 @@
#include <stdint.h>
#if defined(_MSC_VER)
#include <intrin.h>
-#elif defined(HAVE_GCC_GET_CPUID)
+#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
#include <cpuid.h>
#endif
// Can be found on Intel ISA Reference for CPUID
-#define CPUID_AVX2_BIT 0x10 // Bit 5 of EBX for EAX=0x7
+#define CPUID_AVX2_BIT 0x20 // Bit 5 of EBX for EAX=0x7
#define CPUID_AVX_BIT 0x10000000 // Bit 28 of ECX for EAX=0x1
#define CPUID_SSE_BIT 0x2000000 // bit 25 of EDX for EAX=0x1
@@ -99,13 +99,13 @@ static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *
*ebx = cpuInfo[1];
*ecx = cpuInfo[2];
*edx = cpuInfo[3];
-#elif defined(HAVE_GCC_GET_CPUID)
+#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
uint32_t level = *eax;
__get_cpuid (level, eax, ebx, ecx, edx);
#else
- uint32_t a = *eax, b, c, d;
+ uint32_t a = *eax, b, c = *ecx, d;
asm volatile ( "cpuid\n\t"
- : "+a"(a), "=b"(b), "=c"(c), "=d"(d) );
+ : "+a"(a), "=b"(b), "+c"(c), "=d"(d) );
*eax = a;
*ebx = b;
*ecx = c;
@@ -120,6 +120,7 @@ static inline uint32_t detectHostSIMDExtensions()
// Check for AVX2. Requires separate CPUID
eax = 0x7;
+ ecx = 0x0;
cpuid(&eax, &ebx, &ecx, &edx);
if (ebx & CPUID_AVX2_BIT)
hostSimdExts |= SIMDExtension_AVX2;