Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <kheafiel@amazon.com>2020-03-05 16:31:48 +0300
committerKenneth Heafield <kheafiel@amazon.com>2020-03-05 16:31:48 +0300
commit39ddb9950399565d11990cbaf7507b86dd27acc9 (patch)
tree66a7b6b23e50b6ba57f2e26172f6cc88aed1e7b0
parent9113ea73d8c35b57375fa6bfe54eece6b67d1f5e (diff)
Change CPUID dispatch for gcc and clang
-rw-r--r--compile_test_avx512vnni.cc10
-rw-r--r--intgemm.h86
2 files changed, 57 insertions, 39 deletions
diff --git a/compile_test_avx512vnni.cc b/compile_test_avx512vnni.cc
index 611cc53..deb0f88 100644
--- a/compile_test_avx512vnni.cc
+++ b/compile_test_avx512vnni.cc
@@ -19,11 +19,13 @@ bool Foo() {
}
int main() {
- return Foo() &&
-#ifdef __INTEL_COMPILER
- _may_i_use_cpu_feature(_FEATURE_AVX512_VNNI)
+ return Foo()
+#if defined(__GNUC__) || defined(__clang__)
+ // uses cpuid
+#elif defined(__INTEL_COMPILER)
+ && _may_i_use_cpu_feature(_FEATURE_AVX512_VNNI)
#else
- __builtin_cpu_supports("avx512vnni")
+ && __builtin_cpu_supports("avx512vnni")
#endif
;
}
diff --git a/intgemm.h b/intgemm.h
index ba86e53..5519814 100644
--- a/intgemm.h
+++ b/intgemm.h
@@ -135,26 +135,6 @@ static inline float MaxAbsolute(const float *begin, const float *end) {
typedef Unsupported_8bit AVX512VNNI_8bit;
#endif
-
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
-// gcc 5.4.0 bizarrely supports avx512bw targets but not __builtin_cpu_supports("avx512bw"). So implement it manually.
-inline bool CheckAVX512BW() {
- __builtin_cpu_init ();
-#ifdef __INTEL_COMPILER
- return _may_i_use_cpu_feature(_FEATURE_AVX512BW)
-#elif __GNUC__
- unsigned int m = __get_cpuid_max(0, NULL);
- if (m < 7) return false;
- unsigned int eax, ebx, ecx, edx;
- __cpuid_count(7, 0, eax, ebx, ecx, edx);
- const unsigned int avx512bw_bit = (1 << 30);
- return ebx & avx512bw_bit;
-#else
- return __builtin_cpu_supports("avx512bw");
-#endif
-}
-#endif
-
/* Returns:
* axx512vnni if the CPU supports AVX512VNNI
*
@@ -172,24 +152,59 @@ template <class T> T ChooseCPU(T
#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
avx512vnni
#endif
- , T avx512bw, T avx2, T ssse3, T sse2, T unsupported) {
- __builtin_cpu_init ();
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
- if (
-#ifdef __INTEL_COMPILER
- _may_i_use_cpu_feature(_FEATURE_AVX512_VNNI)
-#else
- __builtin_cpu_supports("avx512vnni")
+ , T
+#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+ avx512bw
#endif
- ) {
- return avx512vnni;
+ , T avx2, T ssse3, T sse2, T unsupported) {
+ /* If intgemm is compiled by gcc 6.4.1 then dlopened into an executable
+ * compiled by gcc 7.3.0, there will be a undefined symbol __cpu_info.
+ * Work around this by calling the intrinsics more directly instead of
+ * __builtin_cpu_supports.
+ *
+ * clang 6.0.0-1ubuntu2 supports vnni but doesn't have
+ * __builtin_cpu_supports("avx512vnni")
+ * so use the hand-coded CPUID for clang.
+ */
+#if defined(__GNUC__) || defined(__clang__)
+ unsigned int m = __get_cpuid_max(0, NULL);
+ unsigned int eax, ebx, ecx, edx;
+ if (m >= 7) {
+ __cpuid_count(7, 0, eax, ebx, ecx, edx);
+# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
+ if (ecx & (1 << 11)) return avx512vnni;
+# endif
+# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+ if (ebx & (1 << 30)) return avx512bw;
+# endif
+ if (ebx & (1 << 5)) return avx2;
}
-#endif
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
- if (CheckAVX512BW()) {
- return avx512bw;
+ if (m >= 1) {
+ __cpuid_count(1, 0, eax, ebx, ecx, edx);
+ if (ecx & (1 << 9)) return ssse3;
+ if (edx & (1 << 26)) return sse2;
}
-#endif
+ return unsupported;
+#else // not gcc or clang.
+ __builtin_cpu_init();
+# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
+ if (
+# ifdef __INTEL_COMPILER
+ _may_i_use_cpu_feature(_FEATURE_AVX512_VNNI)
+# else
+ __builtin_cpu_supports("avx512vnni")
+# endif
+ ) return vnni;
+# endif
+# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512
+ if (
+# ifdef __INTEL_COMPILER
+ _may_i_use_cpu_feature(_FEATURE_AVX512BW)
+# else
+ __builtin_cpu_supports("avx512bw")
+# endif
+ ) return avx512bw;
+# endif
if (__builtin_cpu_supports("avx2")) {
return avx2;
} else if (__builtin_cpu_supports("ssse3")) {
@@ -199,6 +214,7 @@ template <class T> T ChooseCPU(T
} else {
return unsupported;
}
+#endif
}
struct TileInfo {