Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/pytorch/cpuinfo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsnadampal <87143774+snadampal@users.noreply.github.com>2022-02-28 17:42:57 +0300
committerGitHub <noreply@github.com>2022-02-28 17:42:57 +0300
commit6288930068efc8dff4f3c0b95f062fc5ddceba04 (patch)
tree9416caae352642b55dd3e108a97844027b8b8e11
parent85e931e36af24a3ec73b146b9fe8423b4b2a67db (diff)
cpuinfo: aarch64: add cache configuration details for neoverse-n1/v1/n2 (#75)
* cpuinfo: aarch64: add cache configuration details for neoverse-n1 * cpuinfo: aarch64: add support for neoverse-v1 and n2 architectures
-rw-r--r--README.md2
-rw-r--r--include/cpuinfo.h22
-rw-r--r--src/arm/cache.c60
-rw-r--r--src/arm/linux/aarch32-isa.c4
-rw-r--r--src/arm/linux/aarch64-isa.c12
-rw-r--r--src/arm/midr.h3
-rw-r--r--src/arm/uarch.c8
-rw-r--r--tools/cpu-info.c6
-rw-r--r--tools/isa-info.c2
9 files changed, 118 insertions, 1 deletions
diff --git a/README.md b/README.md
index 0eb71a5..7866fd6 100644
--- a/README.md
+++ b/README.md
@@ -239,7 +239,7 @@ LDFLAGS+= $(pkg-config --libs libcpuinfo)
- [x] AMD-designed x86/x86-64 cores (up to Puma/Jaguar and Zen 2)
- [ ] VIA-designed x86/x86-64 cores
- [ ] Other x86 cores (DM&P, RDC, Transmeta, Cyrix, Rise)
- - [x] ARM-designed ARM cores (up to Cortex-A55, Cortex-A77, and Neoverse E1/N1)
+ - [x] ARM-designed ARM cores (up to Cortex-A55, Cortex-A77, and Neoverse E1/N1/V1/N2)
- [x] Qualcomm-designed ARM cores (Scorpion, Krait, and Kryo)
- [x] Nvidia-designed ARM cores (Denver and Carmel)
- [x] Samsung-designed ARM cores (Exynos)
diff --git a/include/cpuinfo.h b/include/cpuinfo.h
index cffa299..258abd0 100644
--- a/include/cpuinfo.h
+++ b/include/cpuinfo.h
@@ -426,6 +426,10 @@ enum cpuinfo_uarch {
cpuinfo_uarch_neoverse_n1 = 0x00300400,
/** ARM Neoverse E1. */
cpuinfo_uarch_neoverse_e1 = 0x00300401,
+ /** ARM Neoverse V1. */
+ cpuinfo_uarch_neoverse_v1 = 0x00300402,
+ /** ARM Neoverse N2. */
+ cpuinfo_uarch_neoverse_n2 = 0x00300403,
/** ARM Cortex-X1. */
cpuinfo_uarch_cortex_x1 = 0x00300500,
@@ -1460,7 +1464,9 @@ static inline bool cpuinfo_has_x86_sha(void) {
#endif
#if CPUINFO_ARCH_ARM64
bool atomics;
+ bool bf16;
bool sve;
+ bool svebf16;
bool sve2;
#endif
bool rdm;
@@ -1793,6 +1799,22 @@ static inline bool cpuinfo_has_arm_sve2(void) {
#endif
}
+static inline bool cpuinfo_has_arm_bf16(void) {
+ #if CPUINFO_ARCH_ARM64
+ return cpuinfo_isa.bf16;
+ #else
+ return false;
+ #endif
+}
+
+static inline bool cpuinfo_has_arm_svebf16(void) {
+ #if CPUINFO_ARCH_ARM64
+ return cpuinfo_isa.svebf16;
+ #else
+ return false;
+ #endif
+}
+
const struct cpuinfo_processor* CPUINFO_ABI cpuinfo_get_processors(void);
const struct cpuinfo_core* CPUINFO_ABI cpuinfo_get_cores(void);
const struct cpuinfo_cluster* CPUINFO_ABI cpuinfo_get_clusters(void);
diff --git a/src/arm/cache.c b/src/arm/cache.c
index 6ec2d5b..1a6dd38 100644
--- a/src/arm/cache.c
+++ b/src/arm/cache.c
@@ -1239,6 +1239,63 @@ void cpuinfo_arm_decode_cache(
};
break;
}
+ case cpuinfo_uarch_neoverse_n1:
+ case cpuinfo_uarch_neoverse_v1:
+ case cpuinfo_uarch_neoverse_n2:
+ {
+ /*
+ * ARM Neoverse-n1 Core Technical Reference Manual
+ * A6.1. About the L1 memory system
+ * The L1 memory system consists of separate instruction and data caches. Both have a fixed size of 64KB.
+ *
+ * A6.1.1 L1 instruction-side memory system
+ * The L1 instruction memory system has the following key features:
+ * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
+ * Physically Tagged (PIPT) 4-way set-associative L1 data cache.
+ * - Fixed cache line length of 64 bytes.
+ *
+ * A6.1.2 L1 data-side memory system
+ * The L1 data memory system has the following features:
+ * - Virtually Indexed, Physically Tagged (VIPT), which behaves as a Physically Indexed,
+ * Physically Tagged (PIPT) 4-way set-associative L1 data cache.
+ * - Fixed cache line length of 64 bytes.
+ * - Pseudo-LRU cache replacement policy.
+ *
+ * A7.1 About the L2 memory system
+ * The L2 memory subsystem consist of:
+ * - An 8-way set associative L2 cache with a configurable size of 256KB, 512KB, or 1024KB. Cache lines
+ * have a fixed length of 64 bytes.
+ * - Strictly inclusive with L1 data cache.
+ * - When configured with instruction cache hardware coherency, strictly inclusive with L1 instruction cache.
+ * - When configured without instruction cache hardware coherency, weakly inclusive with L1 instruction cache.
+ */
+
+ const uint32_t min_l2_size_KB= 256;
+ const uint32_t min_l3_size_KB = 0;
+
+ *l1i = (struct cpuinfo_cache) {
+ .size = 64 * 1024,
+ .associativity = 4,
+ .line_size = 64,
+ };
+ *l1d = (struct cpuinfo_cache) {
+ .size = 64 * 1024,
+ .associativity = 4,
+ .line_size = 64,
+ };
+ *l2 = (struct cpuinfo_cache) {
+ .size = min_l2_size_KB * 1024,
+ .associativity = 8,
+ .line_size = 64,
+ .flags = CPUINFO_CACHE_INCLUSIVE,
+ };
+ *l3 = (struct cpuinfo_cache) {
+ .size = min_l3_size_KB * 1024,
+ .associativity = 16,
+ .line_size = 64,
+ };
+ break;
+ }
#if CPUINFO_ARCH_ARM && !defined(__ARM_ARCH_8A__)
case cpuinfo_uarch_scorpion:
/*
@@ -1656,6 +1713,9 @@ uint32_t cpuinfo_arm_compute_max_cache_size(const struct cpuinfo_processor* proc
*/
return 8 * 1024 * 1024;
case cpuinfo_uarch_cortex_a55:
+ case cpuinfo_uarch_neoverse_n1:
+ case cpuinfo_uarch_neoverse_v1:
+ case cpuinfo_uarch_neoverse_n2:
case cpuinfo_uarch_cortex_a75:
case cpuinfo_uarch_cortex_a76:
case cpuinfo_uarch_exynos_m4:
diff --git a/src/arm/linux/aarch32-isa.c b/src/arm/linux/aarch32-isa.c
index df68aa1..d6f6a21 100644
--- a/src/arm/linux/aarch32-isa.c
+++ b/src/arm/linux/aarch32-isa.c
@@ -64,6 +64,8 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
* - Processors with Exynos M4 cores
* - Processors with Exynos M5 cores
* - Neoverse N1 cores
+ * - Neoverse V1 cores
+ * - Neoverse N2 cores
*/
if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) {
/* Only little cores of Exynos 9810 support FP16 & RDM */
@@ -76,6 +78,8 @@ void cpuinfo_arm_linux_decode_isa_from_proc_cpuinfo(
case UINT32_C(0x4100D0C0): /* Neoverse N1 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+ case UINT32_C(0x4100D400): /* Neoverse V1 */
+ case UINT32_C(0x4100D490): /* Neoverse N2 */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */
case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */
diff --git a/src/arm/linux/aarch64-isa.c b/src/arm/linux/aarch64-isa.c
index 2000e1a..7b18095 100644
--- a/src/arm/linux/aarch64-isa.c
+++ b/src/arm/linux/aarch64-isa.c
@@ -41,6 +41,8 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
* - Processors with Exynos M4 cores
* - Processors with Exynos M5 cores
* - Neoverse N1 cores
+ * - Neoverse V1 cores
+ * - Neoverse N2 cores
*/
if (chipset->series == cpuinfo_arm_chipset_series_samsung_exynos && chipset->model == 9810) {
/* Exynos 9810 reports that it supports FP16 compute, but in fact only little cores do */
@@ -54,6 +56,8 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
case UINT32_C(0x4100D0C0): /* Neoverse N1 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+ case UINT32_C(0x4100D400): /* Neoverse V1 */
+ case UINT32_C(0x4100D490): /* Neoverse N2 */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
case UINT32_C(0x51008020): /* Kryo 385 Gold (Cortex-A75) */
case UINT32_C(0x51008030): /* Kryo 385 Silver (Cortex-A55) */
@@ -89,6 +93,8 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
case UINT32_C(0x4100D0C0): /* Neoverse N1 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+ case UINT32_C(0x4100D400): /* Neoverse V1 */
+ case UINT32_C(0x4100D490): /* Neoverse N2 */
case UINT32_C(0x4100D4A0): /* Neoverse E1 */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
case UINT32_C(0x51008040): /* Kryo 485 Gold (Cortex-A76) */
@@ -124,4 +130,10 @@ void cpuinfo_arm64_linux_decode_isa_from_proc_cpuinfo(
if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SVE2) {
isa->sve2 = true;
}
+ if (features2 & CPUINFO_ARM_LINUX_FEATURE2_BF16) {
+ isa->bf16 = true;
+ }
+ if (features2 & CPUINFO_ARM_LINUX_FEATURE2_SVEBF16) {
+ isa->svebf16 = true;
+ }
}
diff --git a/src/arm/midr.h b/src/arm/midr.h
index 739dc19..6329783 100644
--- a/src/arm/midr.h
+++ b/src/arm/midr.h
@@ -184,9 +184,12 @@ inline static uint32_t midr_score_core(uint32_t midr) {
case UINT32_C(0x51008000): /* Kryo 260 / 280 Gold */
case UINT32_C(0x51002050): /* Kryo Gold */
case UINT32_C(0x4800D400): /* Cortex-A76 (HiSilicon) */
+ case UINT32_C(0x4100D490): /* Neoverse N2 */
case UINT32_C(0x4100D410): /* Cortex-A78 */
+ case UINT32_C(0x4100D400): /* Neoverse V1 */
case UINT32_C(0x4100D0D0): /* Cortex-A77 */
case UINT32_C(0x4100D0E0): /* Cortex-A76AE */
+ case UINT32_C(0x4100D0C0): /* Neoverse-N1 */
case UINT32_C(0x4100D0B0): /* Cortex-A76 */
case UINT32_C(0x4100D0A0): /* Cortex-A75 */
case UINT32_C(0x4100D090): /* Cortex-A73 */
diff --git a/src/arm/uarch.c b/src/arm/uarch.c
index 8b5362b..346e1c1 100644
--- a/src/arm/uarch.c
+++ b/src/arm/uarch.c
@@ -91,6 +91,11 @@ void cpuinfo_arm_decode_vendor_uarch(
case 0xD0E: /* Cortex-A76AE */
*uarch = cpuinfo_uarch_cortex_a76;
break;
+#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+ case 0xD40:
+ *uarch = cpuinfo_uarch_neoverse_v1;
+ break;
+#endif /* CPUINFO_ARCH_ARM64 && !defined(__ANDROID__) */
case 0xD41: /* Cortex-A78 */
*uarch = cpuinfo_uarch_cortex_a78;
break;
@@ -98,6 +103,9 @@ void cpuinfo_arm_decode_vendor_uarch(
*uarch = cpuinfo_uarch_cortex_x1;
break;
#if CPUINFO_ARCH_ARM64 && !defined(__ANDROID__)
+ case 0xD49:
+ *uarch = cpuinfo_uarch_neoverse_n2;
+ break;
case 0xD4A:
*uarch = cpuinfo_uarch_neoverse_e1;
break;
diff --git a/tools/cpu-info.c b/tools/cpu-info.c
index 30ec633..ff80405 100644
--- a/tools/cpu-info.c
+++ b/tools/cpu-info.c
@@ -187,6 +187,12 @@ static const char* uarch_to_string(enum cpuinfo_uarch uarch) {
return "Cortex-A77";
case cpuinfo_uarch_cortex_a78:
return "Cortex-A78";
+ case cpuinfo_uarch_neoverse_n1:
+ return "Neoverse-N1";
+ case cpuinfo_uarch_neoverse_v1:
+ return "Neoverse-V1";
+ case cpuinfo_uarch_neoverse_n2:
+ return "Neoverse-N2";
case cpuinfo_uarch_cortex_x1:
return "Cortex-X1";
case cpuinfo_uarch_scorpion:
diff --git a/tools/isa-info.c b/tools/isa-info.c
index 92abb57..7320b74 100644
--- a/tools/isa-info.c
+++ b/tools/isa-info.c
@@ -157,12 +157,14 @@ int main(int argc, char** argv) {
printf("\tARM v8.1 atomics: %s\n", cpuinfo_has_arm_atomics() ? "yes" : "no");
printf("\tARM v8.1 SQRDMLxH: %s\n", cpuinfo_has_arm_neon_rdm() ? "yes" : "no");
printf("\tARM v8.2 FP16 arithmetics: %s\n", cpuinfo_has_arm_fp16_arith() ? "yes" : "no");
+ printf("\tARM v8.2 BF16: %s\n", cpuinfo_has_arm_bf16() ? "yes" : "no");
printf("\tARM v8.3 dot product: %s\n", cpuinfo_has_arm_neon_dot() ? "yes" : "no");
printf("\tARM v8.3 JS conversion: %s\n", cpuinfo_has_arm_jscvt() ? "yes" : "no");
printf("\tARM v8.3 complex: %s\n", cpuinfo_has_arm_fcma() ? "yes" : "no");
printf("SIMD extensions:\n");
printf("\tARM SVE: %s\n", cpuinfo_has_arm_sve() ? "yes" : "no");
+ printf("\tARM SVE BF16: %s\n", cpuinfo_has_arm_svebf16() ? "yes" : "no");
printf("\tARM SVE 2: %s\n", cpuinfo_has_arm_sve2() ? "yes" : "no");
printf("Cryptography extensions:\n");