Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/marian-nmt/intgemm/intgemm.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKenneth Heafield <github@kheafield.com>2021-05-01 17:27:53 +0300
committerKenneth Heafield <github@kheafield.com>2021-05-01 17:27:53 +0300
commitb578ae4521229c53ba0512be3d562b0afbd68fd4 (patch)
treeab6319b1394ad12698e233080d87d0bdfae0b11c
parentb52c3edde5af8d0bfb21626a69aa8386779c7dfc (diff)
Environment variable INTGEMM_CPUID to downgrade CPU model
-rw-r--r--intgemm/intgemm.cc93
-rw-r--r--intgemm/intgemm.h89
-rw-r--r--intgemm/types.h10
3 files changed, 102 insertions, 90 deletions
diff --git a/intgemm/intgemm.cc b/intgemm/intgemm.cc
index 82ad750..e604ead 100644
--- a/intgemm/intgemm.cc
+++ b/intgemm/intgemm.cc
@@ -1,8 +1,101 @@
#include "intgemm.h"
#include "stats.h"
+#include <stdlib.h>
+
+#include <iostream>
+
namespace intgemm {
+namespace {
+
+// Return the maximum CPU model that's found and supported at compile time.
+CPUType RealCPUID() {
+#if defined(WASM)
+ // emscripten does SSE4.1 but we only use up to SSSE3.
+ return CPUType::SSSE3;
+#elif defined(__INTEL_COMPILER)
+# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
+ if (_may_i_use_cpu_feature(_FEATURE_AVX512_VNNI)) return CPUType::AVX512VNNI;
+# endif
+# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
+ if (_may_i_use_cpu_feature(_FEATURE_AVX512BW)) return CPUType::AVX512BW;
+# endif
+# ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
+ if (_may_i_use_cpu_feature(_FEATURE_AVX2)) return CPUType::AVX2;
+# endif
+ if (_may_i_use_cpu_feature(_FEATURE_SSSE3)) return CPUType::SSSE3;
+ if (_may_i_use_cpu_feature(_FEATURE_SSE2)) return CPUType::SSE2;
+ return CPUType::UNSUPPORTED;
+#else
+// Not emscripten, not Intel compiler
+# if defined(_MSC_VER)
+ int regs[4];
+ int &eax = regs[0], &ebx = regs[1], &ecx = regs[2], &edx = regs[3];
+ __cpuid(regs, 0);
+ int m = eax;
+# else
+ /* gcc and clang.
+ * If intgemm is compiled by gcc 6.4.1 then dlopened into an executable
+ * compiled by gcc 7.3.0, there will be a undefined symbol __cpu_info.
+ * Work around this by calling the intrinsics more directly instead of
+ * __builtin_cpu_supports.
+ *
+ * clang 6.0.0-1ubuntu2 supports vnni but doesn't have
+ * __builtin_cpu_supports("avx512vnni")
+ * so use the hand-coded CPUID for clang.
+ */
+ unsigned int m = __get_cpuid_max(0, 0);
+ unsigned int eax, ebx, ecx, edx;
+# endif
+ if (m >= 7) {
+# if defined(_MSC_VER)
+ __cpuid(regs, 7);
+# else
+ __cpuid_count(7, 0, eax, ebx, ecx, edx);
+# endif
+# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
+ if (ecx & (1 << 11)) return CPUType::AVX512VNNI;
+# endif
+# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
+ if (ebx & (1 << 30)) return CPUType::AVX512BW;
+# endif
+# ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
+ if (ebx & (1 << 5)) return CPUType::AVX2;
+# endif
+ }
+ if (m >= 1) {
+# if defined(_MSC_VER)
+ __cpuid(regs, 1);
+# else
+ __cpuid_count(1, 0, eax, ebx, ecx, edx);
+# endif
+ if (ecx & (1 << 9)) return CPUType::SSSE3;
+ if (edx & (1 << 26)) return CPUType::SSE2;
+ }
+ return CPUType::UNSUPPORTED;
+#endif
+}
+
+CPUType EnvironmentCPUID() {
+ const char *env_override = getenv("INTGEMM_CPUID");
+ if (!env_override) return CPUType::AVX512VNNI; /* This will be capped to actual ID */
+ if (!strcmp(env_override, "AVX512VNNI")) return CPUType::AVX512VNNI;
+ if (!strcmp(env_override, "AVX512BW")) return CPUType::AVX512BW;
+ if (!strcmp(env_override, "AVX2")) return CPUType::AVX2;
+ if (!strcmp(env_override, "SSSE3")) return CPUType::SSSE3;
+ if (!strcmp(env_override, "SSE2")) return CPUType::SSE2;
+ std::cerr << "Unrecognized INTGEMM_CPUID " << env_override << std::endl;
+ return CPUType::AVX512VNNI;
+}
+
+} // namespace
+
+CPUType GetCPUID() {
+ static const CPUType kCPU = std::min(RealCPUID(), EnvironmentCPUID());
+ return kCPU;
+}
+
float Unsupported_MaxAbsolute(const float * /*begin*/, const float * /*end*/) {
throw UnsupportedCPU();
}
diff --git a/intgemm/intgemm.h b/intgemm/intgemm.h
index fdf6980..977210d 100644
--- a/intgemm/intgemm.h
+++ b/intgemm/intgemm.h
@@ -144,6 +144,7 @@ typedef Unsupported_16bit Kernels16;
} // namespace AVX2
#endif
+CPUType GetCPUID();
/* Returns:
* axx512vnni if the CPU supports AVX512VNNI
@@ -158,91 +159,9 @@ typedef Unsupported_16bit Kernels16;
*
* unsupported otherwise
*/
-template <class T> T ChooseCPU(T
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
- avx512vnni
-#endif
- , T
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
- avx512bw
-#endif
- , T
-#ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
- avx2
-#endif
- , T ssse3, T
-#ifndef WASM
- sse2
-#endif
- , T
-#ifndef WASM
- unsupported
-#endif
- ) {
-#if defined(WASM)
- // emscripten does SSE4.1 but we only use up to SSSE3.
- return ssse3;
-#elif defined(__INTEL_COMPILER)
-# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
- if (_may_i_use_cpu_feature(_FEATURE_AVX512_VNNI)) return avx512vnni;
-# endif
-# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
- if (_may_i_use_cpu_feature(_FEATURE_AVX512BW)) return avx512bw;
-# endif
-# ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
- if (_may_i_use_cpu_feature(_FEATURE_AVX2)) return avx2;
-# endif
- if (_may_i_use_cpu_feature(_FEATURE_SSSE3)) return ssse3;
- if (_may_i_use_cpu_feature(_FEATURE_SSE2)) return sse2;
- return unsupported;
-#else
-// Not emscripten, not Intel compiler
-# if defined(_MSC_VER)
- int regs[4];
- int &eax = regs[0], &ebx = regs[1], &ecx = regs[2], &edx = regs[3];
- __cpuid(regs, 0);
- int m = eax;
-# else
- /* gcc and clang.
- * If intgemm is compiled by gcc 6.4.1 then dlopened into an executable
- * compiled by gcc 7.3.0, there will be a undefined symbol __cpu_info.
- * Work around this by calling the intrinsics more directly instead of
- * __builtin_cpu_supports.
- *
- * clang 6.0.0-1ubuntu2 supports vnni but doesn't have
- * __builtin_cpu_supports("avx512vnni")
- * so use the hand-coded CPUID for clang.
- */
- unsigned int m = __get_cpuid_max(0, 0);
- unsigned int eax, ebx, ecx, edx;
-# endif
- if (m >= 7) {
-# if defined(_MSC_VER)
- __cpuid(regs, 7);
-# else
- __cpuid_count(7, 0, eax, ebx, ecx, edx);
-# endif
-# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512VNNI
- if (ecx & (1 << 11)) return avx512vnni;
-# endif
-# ifdef INTGEMM_COMPILER_SUPPORTS_AVX512BW
- if (ebx & (1 << 30)) return avx512bw;
-# endif
-# ifdef INTGEMM_COMPILER_SUPPORTS_AVX2
- if (ebx & (1 << 5)) return avx2;
-# endif
- }
- if (m >= 1) {
-# if defined(_MSC_VER)
- __cpuid(regs, 1);
-# else
- __cpuid_count(1, 0, eax, ebx, ecx, edx);
-# endif
- if (ecx & (1 << 9)) return ssse3;
- if (edx & (1 << 26)) return sse2;
- }
- return unsupported;
-#endif
+template <class T> T ChooseCPU(T avx512vnni, T avx512bw, T avx2, T ssse3, T sse2, T unsupported) {
+ const T ret[] = {unsupported, sse2, ssse3, avx2, avx512bw, avx512vnni};
+ return ret[(int)GetCPUID()];
}
struct TileInfo {
diff --git a/intgemm/types.h b/intgemm/types.h
index f6b083c..81b38af 100644
--- a/intgemm/types.h
+++ b/intgemm/types.h
@@ -58,11 +58,11 @@ typedef unsigned int Index;
// If you want to detect the CPU and dispatch yourself, here's what to use:
enum class CPUType {
UNSUPPORTED = 0,
- SSE2,
- SSSE3,
- AVX2,
- AVX512BW,
- AVX512VNNI
+ SSE2 = 1,
+ SSSE3 = 2,
+ AVX2 = 3,
+ AVX512BW = 4,
+ AVX512VNNI = 5
};
// Running CPU type. This is defined in intgemm.cc (as the dispatcher).