diff options
author | Kenneth Heafield <github@kheafield.com> | 2020-01-08 19:15:06 +0300 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2020-01-08 19:20:04 +0300 |
commit | 86feaac3c5049b27f4ef571965242d4a8fb1943c (patch) | |
tree | c3893ade619f4760835c6488047bc21314752606 | |
parent | da45fa2b2198107a63cda81d6c6807fd138bf545 (diff) |
AVX512BW detection for older gcc
-rw-r--r-- | intgemm.h | 41 |
1 files changed, 29 insertions, 12 deletions
@@ -1,5 +1,4 @@ #pragma once - /* Main interface for integer matrix multiplication. * * We are computing C = A * B with an optional scaling factor. @@ -52,6 +51,10 @@ #include "avx512_gemm.h" #include "avx512vnni_gemm.h" +#if defined(__GNUC__) && defined(INTGEMM_COMPILER_SUPPORTS_AVX512) +#include "cpuid.h" +#endif + /* Dispatch to functions based on runtime CPUID. This adds one call-by-variable to each call. */ namespace intgemm { @@ -117,9 +120,29 @@ static inline float MaxAbsolute(const float *begin, const float *end) { typedef Unsupported_8bit AVX512VNNI_8bit; #endif + +#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512 +// gcc 5.4.0 bizarrely supports avx512bw targets but not __builtin_cpu_supports("avx512bw"). So implement it manually. +inline bool CheckAVX512BW() { +#ifdef __INTEL_COMPILER + return _may_i_use_cpu_feature(_FEATURE_AVX512BW) +#elif __GNUC__ + unsigned int m = __get_cpuid_max(0, NULL); + if (m < 7) return false; + unsigned int eax, ebx, ecx, edx; + __cpuid_count(7, 0, eax, ebx, ecx, edx); + const unsigned int avx512bw_bit = (1 << 30); + return ebx & avx512bw_bit; +#else + return __builtin_cpu_supports("avx512bw"); +#endif +} +#endif + /* Returns: - * avx512 if the CPU supports AVX512F (though really it should be AVX512BW, but - * cloud providers lie). TODO: don't catch Knights processors with this. + * axx512vnni if the CPU supports AVX512VNNI + * + * avx512bw if the CPU supports AVX512BW * * avx2 if the CPU supports AVX2 * @@ -129,7 +152,7 @@ typedef Unsupported_8bit AVX512VNNI_8bit; * * unsupported otherwise */ -template <class T> T ChooseCPU(T avx512vnni, T avx512, T avx2, T ssse3, T sse2, T unsupported) { +template <class T> T ChooseCPU(T avx512vnni, T avx512bw, T avx2, T ssse3, T sse2, T unsupported) { #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI if ( #ifdef __INTEL_COMPILER @@ -142,14 +165,8 @@ template <class T> T ChooseCPU(T avx512vnni, T avx512, T avx2, T ssse3, T sse2, } #endif #ifdef INTGEMM_COMPILER_SUPPORTS_AVX512 - if ( -#ifdef __INTEL_COMPILER - _may_i_use_cpu_feature(_FEATURE_AVX512BW) -#else - __builtin_cpu_supports("avx512bw") -#endif - ) { - return avx512; + if (CheckAVX512BW()) { + return avx512bw; } #endif if (__builtin_cpu_supports("avx2")) { |