diff options
author | Kenneth Heafield <github@kheafield.com> | 2021-05-01 17:27:53 +0300 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2021-05-01 17:27:53 +0300 |
commit | b578ae4521229c53ba0512be3d562b0afbd68fd4 (patch) | |
tree | ab6319b1394ad12698e233080d87d0bdfae0b11c | |
parent | b52c3edde5af8d0bfb21626a69aa8386779c7dfc (diff) |
Environment variable INTGEMM_CPUID to downgrade CPU model
-rw-r--r-- | intgemm/intgemm.cc | 93 | ||||
-rw-r--r-- | intgemm/intgemm.h | 89 | ||||
-rw-r--r-- | intgemm/types.h | 10 |
3 files changed, 102 insertions, 90 deletions
diff --git a/intgemm/intgemm.cc b/intgemm/intgemm.cc index 82ad750..e604ead 100644 --- a/intgemm/intgemm.cc +++ b/intgemm/intgemm.cc @@ -1,8 +1,101 @@ #include "intgemm.h" #include "stats.h" +#include <stdlib.h> + +#include <iostream> + namespace intgemm { +namespace { + +// Return the maximum CPU model that's found and supported at compile time. +CPUType RealCPUID() { +#if defined(WASM) + // emscripten does SSE4.1 but we only use up to SSSE3. + return CPUType::SSSE3; +#elif defined(__INTEL_COMPILER) +# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI + if (_may_i_use_cpu_feature(_FEATURE_AVX512_VNNI)) return CPUType::AVX512VNNI; +# endif +# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW + if (_may_i_use_cpu_feature(_FEATURE_AVX512BW)) return CPUType::AVX512BW; +# endif +# ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 + if (_may_i_use_cpu_feature(_FEATURE_AVX2)) return CPUType::AVX2; +# endif + if (_may_i_use_cpu_feature(_FEATURE_SSSE3)) return CPUType::SSSE3; + if (_may_i_use_cpu_feature(_FEATURE_SSE2)) return CPUType::SSE2; + return CPUType::UNSUPPORTED; +#else +// Not emscripten, not Intel compiler +# if defined(_MSC_VER) + int regs[4]; + int &eax = regs[0], &ebx = regs[1], &ecx = regs[2], &edx = regs[3]; + __cpuid(regs, 0); + int m = eax; +# else + /* gcc and clang. + * If intgemm is compiled by gcc 6.4.1 then dlopened into an executable + * compiled by gcc 7.3.0, there will be a undefined symbol __cpu_info. + * Work around this by calling the intrinsics more directly instead of + * __builtin_cpu_supports. + * + * clang 6.0.0-1ubuntu2 supports vnni but doesn't have + * __builtin_cpu_supports("avx512vnni") + * so use the hand-coded CPUID for clang. + */ + unsigned int m = __get_cpuid_max(0, 0); + unsigned int eax, ebx, ecx, edx; +# endif + if (m >= 7) { +# if defined(_MSC_VER) + __cpuid(regs, 7); +# else + __cpuid_count(7, 0, eax, ebx, ecx, edx); +# endif +# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI + if (ecx & (1 << 11)) return CPUType::AVX512VNNI; +# endif +# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW + if (ebx & (1 << 30)) return CPUType::AVX512BW; +# endif +# ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 + if (ebx & (1 << 5)) return CPUType::AVX2; +# endif + } + if (m >= 1) { +# if defined(_MSC_VER) + __cpuid(regs, 1); +# else + __cpuid_count(1, 0, eax, ebx, ecx, edx); +# endif + if (ecx & (1 << 9)) return CPUType::SSSE3; + if (edx & (1 << 26)) return CPUType::SSE2; + } + return CPUType::UNSUPPORTED; +#endif +} + +CPUType EnvironmentCPUID() { + const char *env_override = getenv("INTGEMM_CPUID"); + if (!env_override) return CPUType::AVX512VNNI; /* This will be capped to actual ID */ + if (!strcmp(env_override, "AVX512VNNI")) return CPUType::AVX512VNNI; + if (!strcmp(env_override, "AVX512BW")) return CPUType::AVX512BW; + if (!strcmp(env_override, "AVX2")) return CPUType::AVX2; + if (!strcmp(env_override, "SSSE3")) return CPUType::SSSE3; + if (!strcmp(env_override, "SSE2")) return CPUType::SSE2; + std::cerr << "Unrecognized INTGEMM_CPUID " << env_override << std::endl; + return CPUType::AVX512VNNI; +} + +} // namespace + +CPUType GetCPUID() { + static const CPUType kCPU = std::min(RealCPUID(), EnvironmentCPUID()); + return kCPU; +} + float Unsupported_MaxAbsolute(const float * /*begin*/, const float * /*end*/) { throw UnsupportedCPU(); } diff --git a/intgemm/intgemm.h b/intgemm/intgemm.h index fdf6980..977210d 100644 --- a/intgemm/intgemm.h +++ b/intgemm/intgemm.h @@ -144,6 +144,7 @@ typedef Unsupported_16bit Kernels16; } // namespace AVX2 #endif +CPUType GetCPUID(); /* Returns: * axx512vnni if the CPU supports AVX512VNNI @@ -158,91 +159,9 @@ typedef Unsupported_16bit Kernels16; * * unsupported otherwise */ -template <class T> T ChooseCPU(T -#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI - avx512vnni -#endif - , T -#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW - avx512bw -#endif - , T -#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 - avx2 -#endif - , T ssse3, T -#ifndef WASM - sse2 -#endif - , T -#ifndef WASM - unsupported -#endif - ) { -#if defined(WASM) - // emscripten does SSE4.1 but we only use up to SSSE3. - return ssse3; -#elif defined(__INTEL_COMPILER) -# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI - if (_may_i_use_cpu_feature(_FEATURE_AVX512_VNNI)) return avx512vnni; -# endif -# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW - if (_may_i_use_cpu_feature(_FEATURE_AVX512BW)) return avx512bw; -# endif -# ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 - if (_may_i_use_cpu_feature(_FEATURE_AVX2)) return avx2; -# endif - if (_may_i_use_cpu_feature(_FEATURE_SSSE3)) return ssse3; - if (_may_i_use_cpu_feature(_FEATURE_SSE2)) return sse2; - return unsupported; -#else -// Not emscripten, not Intel compiler -# if defined(_MSC_VER) - int regs[4]; - int &eax = regs[0], &ebx = regs[1], &ecx = regs[2], &edx = regs[3]; - __cpuid(regs, 0); - int m = eax; -# else - /* gcc and clang. - * If intgemm is compiled by gcc 6.4.1 then dlopened into an executable - * compiled by gcc 7.3.0, there will be a undefined symbol __cpu_info. - * Work around this by calling the intrinsics more directly instead of - * __builtin_cpu_supports. - * - * clang 6.0.0-1ubuntu2 supports vnni but doesn't have - * __builtin_cpu_supports("avx512vnni") - * so use the hand-coded CPUID for clang. - */ - unsigned int m = __get_cpuid_max(0, 0); - unsigned int eax, ebx, ecx, edx; -# endif - if (m >= 7) { -# if defined(_MSC_VER) - __cpuid(regs, 7); -# else - __cpuid_count(7, 0, eax, ebx, ecx, edx); -# endif -# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI - if (ecx & (1 << 11)) return avx512vnni; -# endif -# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW - if (ebx & (1 << 30)) return avx512bw; -# endif -# ifdef INTGEMM_COMPILER_SUPPORTS_AVX2 - if (ebx & (1 << 5)) return avx2; -# endif - } - if (m >= 1) { -# if defined(_MSC_VER) - __cpuid(regs, 1); -# else - __cpuid_count(1, 0, eax, ebx, ecx, edx); -# endif - if (ecx & (1 << 9)) return ssse3; - if (edx & (1 << 26)) return sse2; - } - return unsupported; -#endif +template <class T> T ChooseCPU(T avx512vnni, T avx512bw, T avx2, T ssse3, T sse2, T unsupported) { + const T ret[] = {unsupported, sse2, ssse3, avx2, avx512bw, avx512vnni}; + return ret[(int)GetCPUID()]; } struct TileInfo { diff --git a/intgemm/types.h b/intgemm/types.h index f6b083c..81b38af 100644 --- a/intgemm/types.h +++ b/intgemm/types.h @@ -58,11 +58,11 @@ typedef unsigned int Index; // If you want to detect the CPU and dispatch yourself, here's what to use: enum class CPUType { UNSUPPORTED = 0, - SSE2, - SSSE3, - AVX2, - AVX512BW, - AVX512VNNI + SSE2 = 1, + SSSE3 = 2, + AVX2 = 3, + AVX512BW = 4, + AVX512VNNI = 5 }; // Running CPU type. This is defined in intgemm.cc (as the dispatcher). |