From 17ffb651173b85cc0156d6a5c403e12077a6e3a1 Mon Sep 17 00:00:00 2001 From: Jeff Hammond Date: Tue, 22 Sep 2020 00:29:46 -0700 Subject: detect AVX-512 FMA count (#125) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add Ice Lake Server and Sapphire Rapids models The information contained in this commit was obtained from "IntelĀ® Architecture Instruction Set Extensions and Future Features Programming Reference" document 319433-040 from https://software.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html Signed-off-by: Jeff Hammond * Tiger Lake; Ice Lake NNP-I; SPR string Signed-off-by: Hammond, Jeff R * second FMA features - incomplete and wrong Signed-off-by: Hammond, Jeff R * oops: use T/F not 2/1 Signed-off-by: Jeff Hammond * implement SKX lookup Signed-off-by: Hammond, Jeff R * add Intel copyright * cleanup AVX512 second FMA code 1) remove debug stuff 2) remove ICX - will add details when available Signed-off-by: Hammond, Jeff R * fix CPX detection Signed-off-by: Hammond, Jeff R * remove elses Signed-off-by: Hammond, Jeff R * remove curly braces from single-line conditional bodies Signed-off-by: Hammond, Jeff R * apply clang-format Signed-off-by: Hammond, Jeff R Fixes #120 --- include/cpuinfo_x86.h | 5 +++- src/cpuinfo_x86.c | 72 +++++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 63 insertions(+), 14 deletions(-) diff --git a/include/cpuinfo_x86.h b/include/cpuinfo_x86.h index 60af05a..c21a46a 100644 --- a/include/cpuinfo_x86.h +++ b/include/cpuinfo_x86.h @@ -69,10 +69,11 @@ typedef struct { int avx512bitalg : 1; int avx512vpopcntdq : 1; int avx512_4vnniw : 1; + int avx512_4vbmi2 : 1; + int avx512_second_fma : 1; int avx512_4fmaps : 1; int avx512_bf16 : 1; int avx512_vp2intersect : 1; - int amx_bf16 : 1; int amx_tile : 1; int amx_int8 : 1; @@ -194,6 +195,8 @@ typedef enum { X86_AVX512BITALG, X86_AVX512VPOPCNTDQ, X86_AVX512_4VNNIW, + X86_AVX512_4VBMI2, + X86_AVX512_SECOND_FMA, X86_AVX512_4FMAPS, X86_AVX512_BF16, X86_AVX512_VP2INTERSECT, diff --git a/src/cpuinfo_x86.c b/src/cpuinfo_x86.c index 21bbe0c..d5edd30 100644 --- a/src/cpuinfo_x86.c +++ b/src/cpuinfo_x86.c @@ -14,12 +14,13 @@ // limitations under the License. #include "cpuinfo_x86.h" -#include "internal/bit_utils.h" -#include "internal/cpuid_x86.h" #include #include +#include "internal/bit_utils.h" +#include "internal/cpuid_x86.h" + #if !defined(CPU_FEATURES_ARCH_X86) #error "Cannot compile cpuinfo_x86 on a non x86 platform." #endif @@ -125,6 +126,35 @@ static bool HasTmmOsXSave(uint32_t xcr0_eax) { MASK_ZMM16_31 | MASK_XTILECFG | MASK_XTILEDATA); } +static bool HasSecondFMA(uint32_t model) { + // Skylake server + if (model == 0x55) { + char proc_name[49] = {0}; + FillX86BrandString(proc_name); + // detect Xeon + if (proc_name[9] == 'X') { + // detect Silver or Bronze + if (proc_name[17] == 'S' || proc_name[17] == 'B') return false; + // detect Gold 5_20 and below, except for Gold 53__ + if (proc_name[17] == 'G' && proc_name[22] == '5') + return ((proc_name[23] == '3') || + (proc_name[24] == '2' && proc_name[25] == '2')); + // detect Xeon W 210x + if (proc_name[17] == 'W' && proc_name[21] == '0') return false; + // detect Xeon D 2xxx + if (proc_name[17] == 'D' && proc_name[19] == '2' && proc_name[20] == '1') + return false; + } + return true; + } + // Cannon Lake client + if (model == 0x66) return false; + // Ice Lake client + if (model == 0x7d || model == 0x7e) return false; + // This is the right default... + return true; +} + static void SetVendor(const Leaf leaf, char* const vendor) { *(uint32_t*)(vendor) = leaf.ebx; *(uint32_t*)(vendor + 4) = leaf.edx; @@ -1059,7 +1089,8 @@ typedef struct { } OsSupport; // Reference https://en.wikipedia.org/wiki/CPUID. -static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport* os_support) { +static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, + OsSupport* os_support) { const Leaf leaf_1 = SafeCpuId(max_cpuid_leaf, 1); const Leaf leaf_7 = SafeCpuId(max_cpuid_leaf, 7); const Leaf leaf_7_1 = SafeCpuIdEx(max_cpuid_leaf, 7, 1); @@ -1141,6 +1172,8 @@ static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport* features->avx512bitalg = IsBitSet(leaf_7.ecx, 12); features->avx512vpopcntdq = IsBitSet(leaf_7.ecx, 14); features->avx512_4vnniw = IsBitSet(leaf_7.edx, 2); + features->avx512_4vbmi2 = IsBitSet(leaf_7.edx, 3); + features->avx512_second_fma = HasSecondFMA(info->model); features->avx512_4fmaps = IsBitSet(leaf_7.edx, 3); features->avx512_bf16 = IsBitSet(leaf_7_1.eax, 5); features->avx512_vp2intersect = IsBitSet(leaf_7.edx, 8); @@ -1153,7 +1186,8 @@ static void ParseCpuId(const uint32_t max_cpuid_leaf, X86Info* info, OsSupport* } } -// Reference https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented. +// Reference +// https://en.wikipedia.org/wiki/CPUID#EAX=80000000h:_Get_Highest_Extended_Function_Implemented. static void ParseExtraAMDCpuId(X86Info* info, OsSupport os_support) { const Leaf leaf_80000000 = CpuId(0x80000000); const uint32_t max_extended_cpuid_leaf = leaf_80000000.eax; @@ -1265,11 +1299,11 @@ X86Microarchitecture GetX86Microarchitecture(const X86Info* info) { case CPUID(0x06, 0x66): // https://en.wikipedia.org/wiki/Cannon_Lake_(microarchitecture) return INTEL_CNL; - case CPUID(0x06, 0x7D): // client - case CPUID(0x06, 0x7E): // client - case CPUID(0x06, 0x9D): // NNP-I - case CPUID(0x06, 0x6A): // server - case CPUID(0x06, 0x6C): // server + case CPUID(0x06, 0x7D): // client + case CPUID(0x06, 0x7E): // client + case CPUID(0x06, 0x9D): // NNP-I + case CPUID(0x06, 0x6A): // server + case CPUID(0x06, 0x6C): // server // https://en.wikipedia.org/wiki/Ice_Lake_(microprocessor) return INTEL_ICL; case CPUID(0x06, 0x8C): @@ -1281,10 +1315,14 @@ X86Microarchitecture GetX86Microarchitecture(const X86Info* info) { return INTEL_SPR; case CPUID(0x06, 0x8E): switch (info->stepping) { - case 9: return INTEL_KBL; // https://en.wikipedia.org/wiki/Kaby_Lake - case 10: return INTEL_CFL; // https://en.wikipedia.org/wiki/Coffee_Lake - case 11: return INTEL_WHL; // https://en.wikipedia.org/wiki/Whiskey_Lake_(microarchitecture) - default: return X86_UNKNOWN; + case 9: + return INTEL_KBL; // https://en.wikipedia.org/wiki/Kaby_Lake + case 10: + return INTEL_CFL; // https://en.wikipedia.org/wiki/Coffee_Lake + case 11: + return INTEL_WHL; // https://en.wikipedia.org/wiki/Whiskey_Lake_(microarchitecture) + default: + return X86_UNKNOWN; } case CPUID(0x06, 0x9E): if (info->stepping > 9) { @@ -1427,6 +1465,10 @@ int GetX86FeaturesEnumValue(const X86Features* features, return features->avx512vpopcntdq; case X86_AVX512_4VNNIW: return features->avx512_4vnniw; + case X86_AVX512_4VBMI2: + return features->avx512_4vbmi2; + case X86_AVX512_SECOND_FMA: + return features->avx512_second_fma; case X86_AVX512_4FMAPS: return features->avx512_4fmaps; case X86_AVX512_BF16: @@ -1551,6 +1593,10 @@ const char* GetX86FeaturesEnumName(X86FeaturesEnum value) { return "avx512vpopcntdq"; case X86_AVX512_4VNNIW: return "avx512_4vnniw"; + case X86_AVX512_4VBMI2: + return "avx512_4vbmi2"; + case X86_AVX512_SECOND_FMA: + return "avx512_second_fma"; case X86_AVX512_4FMAPS: return "avx512_4fmaps"; case X86_AVX512_BF16: -- cgit v1.2.3