Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/pytorch/cpuinfo.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgaborkertesz-linaro <91903944+gaborkertesz-linaro@users.noreply.github.com>2022-07-05 18:52:35 +0300
committerGitHub <noreply@github.com>2022-07-05 18:52:35 +0300
commit1baac2bb033666698e0e30074664aa32fa28ce80 (patch)
tree12820a86a340a5d7fc8077b85176bca408c2bdbf
parentab5c79fa45e459dd9a98e53c8878e9e20fb7be7a (diff)
Enable win-arm64 (#82)
This patch implements the required APIs for the new win-arm64 platform by reading topology information via Windows API. Build config: cmake . -A ARM64
-rw-r--r--CMakeLists.txt5
-rw-r--r--README.md4
-rw-r--r--src/arm/windows/init-by-logical-sys-info.c885
-rw-r--r--src/arm/windows/init.c253
-rw-r--r--src/arm/windows/windows-arm-init.h32
-rw-r--r--src/cpuinfo/internal-api.h6
-rw-r--r--src/init.c2
7 files changed, 1185 insertions, 2 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7c28761..1f8fc6c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,7 +65,7 @@ IF(NOT CMAKE_SYSTEM_PROCESSOR)
"cpuinfo will compile, but cpuinfo_initialize() will always fail.")
SET(CPUINFO_SUPPORTED_PLATFORM FALSE)
ENDIF()
-ELSEIF(NOT CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64)$")
+ELSEIF(NOT CPUINFO_TARGET_PROCESSOR MATCHES "^(i[3-6]86|AMD64|x86(_64)?|armv[5-8].*|aarch64|arm64|ARM64)$")
MESSAGE(WARNING
"Target processor architecture \"${CPUINFO_TARGET_PROCESSOR}\" is not supported in cpuinfo. "
"cpuinfo will compile, but cpuinfo_initialize() will always fail.")
@@ -171,6 +171,9 @@ IF(CPUINFO_SUPPORTED_PLATFORM)
LIST(APPEND CPUINFO_SRCS
src/arm/android/properties.c)
ENDIF()
+ ELSEIF(CMAKE_SYSTEM_NAME STREQUAL "Windows" AND CPUINFO_TARGET_PROCESSOR STREQUAL "ARM64")
+ LIST(APPEND CPUINFO_SRCS src/arm/windows/init-by-logical-sys-info.c)
+ LIST(APPEND CPUINFO_SRCS src/arm/windows/init.c)
ENDIF()
IF(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
diff --git a/README.md b/README.md
index d3da46e..7b8984d 100644
--- a/README.md
+++ b/README.md
@@ -256,6 +256,7 @@ LDFLAGS+= $(pkg-config --libs libcpuinfo)
- [x] Windows
- [x] x86
- [x] x86-64
+ - [x] arm64
## Methods
@@ -264,6 +265,7 @@ LDFLAGS+= $(pkg-config --libs libcpuinfo)
- [x] Using `/proc/cpuinfo` on ARM
- [x] Using `ro.chipname`, `ro.board.platform`, `ro.product.board`, `ro.mediatek.platform`, `ro.arch` properties (Android)
- [ ] Using kernel log (`dmesg`) on ARM Linux
+ - [x] Using Windows registry on ARM64 Windows
- Vendor and microarchitecture detection
- [x] Intel-designed x86/x86-64 cores (up to Sunny Cove, Goldmont Plus, and Knights Mill)
- [x] AMD-designed x86/x86-64 cores (up to Puma/Jaguar and Zen 2)
@@ -286,6 +288,7 @@ LDFLAGS+= $(pkg-config --libs libcpuinfo)
- [x] Using `/proc/self/auxv` (Android/ARM)
- [ ] Using instruction probing on ARM (Linux)
- [ ] Using CPUID registers on ARM64 (Linux)
+ - [x] Using IsProcessorFeaturePresent on ARM64 Windows
- Cache detection
- [x] Using CPUID leaf 0x00000002 (x86/x86-64)
- [x] Using CPUID leaf 0x00000004 (non-AMD x86/x86-64)
@@ -297,6 +300,7 @@ LDFLAGS+= $(pkg-config --libs libcpuinfo)
- [x] Using `sysctlbyname` (Mach)
- [x] Using sysfs `typology` directories (ARM/Linux)
- [ ] Using sysfs `cache` directories (Linux)
+ - [x] Using `GetLogicalProcessorInformationEx` on ARM64 Windows
- TLB detection
- [x] Using CPUID leaf 0x00000002 (x86/x86-64)
- [ ] Using CPUID leaves 0x80000005-0x80000006 and 0x80000019 (AMD x86/x86-64)
diff --git a/src/arm/windows/init-by-logical-sys-info.c b/src/arm/windows/init-by-logical-sys-info.c
new file mode 100644
index 0000000..f088011
--- /dev/null
+++ b/src/arm/windows/init-by-logical-sys-info.c
@@ -0,0 +1,885 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <errno.h>
+#include <sys/types.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+#include "windows-arm-init.h"
+
+#define MAX_NR_OF_CACHES (cpuinfo_cache_level_max - 1)
+
+/* Call chain:
+ * cpu_info_init_by_logical_sys_info
+ * read_packages_for_processors
+ * read_cores_for_processors
+ * read_caches_for_processors
+ * read_all_logical_processor_info_of_relation
+ * parse_relation_processor_info
+ * store_package_info_per_processor
+ * store_core_info_per_processor
+ * parse_relation_cache_info
+ * store_cache_info_per_processor
+ */
+
+static uint32_t count_logical_processors(
+ const uint32_t max_group_count,
+ uint32_t* global_proc_index_per_group);
+
+static uint32_t read_packages_for_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info *chip_info);
+
+static uint32_t read_cores_for_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info);
+
+static uint32_t read_caches_for_processors(
+ struct cpuinfo_processor *processors,
+ const uint32_t number_of_processors,
+ struct cpuinfo_cache *caches,
+ uint32_t* numbers_of_caches,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info *chip_info);
+
+static uint32_t read_all_logical_processor_info_of_relation(
+ LOGICAL_PROCESSOR_RELATIONSHIP info_type,
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ struct cpuinfo_cache* caches,
+ uint32_t* numbers_of_caches,
+ struct cpuinfo_core* cores,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info *chip_info);
+
+static bool parse_relation_processor_info(
+ struct cpuinfo_processor* processors,
+ uint32_t nr_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+ const uint32_t info_id,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info);
+
+static bool parse_relation_cache_info(
+ struct cpuinfo_processor* processors,
+ struct cpuinfo_cache* caches,
+ uint32_t* numbers_of_caches,
+ const uint32_t* global_proc_index_per_group,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info);
+
+static void store_package_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ const uint32_t package_id,
+ const uint32_t group_id,
+ const uint32_t processor_id_in_group);
+
+static void store_core_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ const uint32_t core_id,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info);
+
+static void store_cache_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+ struct cpuinfo_cache* current_cache);
+
+static bool connect_packages_cores_clusters_by_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t nr_of_processors,
+ struct cpuinfo_package* packages,
+ const uint32_t nr_of_packages,
+ struct cpuinfo_cluster* clusters,
+ struct cpuinfo_core* cores,
+ const uint32_t nr_of_cores,
+ const struct woa_chip_info* chip_info,
+ enum cpuinfo_vendor vendor);
+
+static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity);
+
+
+bool cpu_info_init_by_logical_sys_info(
+ const struct woa_chip_info *chip_info,
+ const enum cpuinfo_vendor vendor)
+{
+ struct cpuinfo_processor* processors = NULL;
+ struct cpuinfo_package* packages = NULL;
+ struct cpuinfo_cluster* clusters = NULL;
+ struct cpuinfo_core* cores = NULL;
+ struct cpuinfo_cache* caches = NULL;
+ struct cpuinfo_uarch_info* uarchs = NULL;
+
+ uint32_t nr_of_packages = 0;
+ uint32_t nr_of_cores = 0;
+ uint32_t nr_of_all_caches = 0;
+ uint32_t numbers_of_caches[MAX_NR_OF_CACHES] = {0};
+
+ uint32_t nr_of_uarchs = 0;
+ bool result = false;
+
+ HANDLE heap = GetProcessHeap();
+
+ /* 1. Count available logical processor groups and processors */
+ const uint32_t max_group_count = (uint32_t) GetMaximumProcessorGroupCount();
+ cpuinfo_log_debug("detected %"PRIu32" processor group(s)", max_group_count);
+ /* We need to store the absolute processor ID offsets for every groups, because
+ * 1. We can't assume every processor groups include the same number of
+ * logical processors.
+ * 2. Every processor groups know its group number and processor IDs within
+ * the group, but not the global processor IDs.
+ * 3. We need to list every logical processors by global IDs.
+ */
+ uint32_t* global_proc_index_per_group =
+ (uint32_t*) HeapAlloc(heap, 0, max_group_count * sizeof(uint32_t));
+ if (global_proc_index_per_group == NULL) {
+ cpuinfo_log_error(
+ "failed to allocate %zu bytes for descriptions of %"PRIu32" processor groups",
+ max_group_count * sizeof(struct cpuinfo_processor), max_group_count);
+ goto clean_up;
+ }
+
+ uint32_t nr_of_processors =
+ count_logical_processors(max_group_count, global_proc_index_per_group);
+ processors = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_processors * sizeof(struct cpuinfo_processor));
+ if (processors == NULL) {
+ cpuinfo_log_error(
+ "failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
+ nr_of_processors * sizeof(struct cpuinfo_processor), nr_of_processors);
+ goto clean_up;
+ }
+
+ /* 2. Read topology information via MSDN API: packages, cores and caches*/
+ nr_of_packages = read_packages_for_processors(
+ processors, nr_of_processors,
+ global_proc_index_per_group,
+ chip_info);
+ if (!nr_of_packages) {
+ cpuinfo_log_error("error in reading package information");
+ goto clean_up;
+ }
+ cpuinfo_log_debug("detected %"PRIu32" processor package(s)", nr_of_packages);
+
+ /* We need the EfficiencyClass to parse uarch from the core information,
+ * but we need to iterate first to count cores and allocate memory then
+ * we will iterate again to read and store data to cpuinfo_core structures.
+ */
+ nr_of_cores = read_cores_for_processors(
+ processors, nr_of_processors,
+ global_proc_index_per_group, NULL,
+ chip_info);
+ if (!nr_of_cores) {
+ cpuinfo_log_error("error in reading core information");
+ goto clean_up;
+ }
+ cpuinfo_log_debug("detected %"PRIu32" processor core(s)", nr_of_cores);
+
+ /* There is no API to read number of caches, so we need to iterate twice on caches:
+ 1. Count all type of caches -> allocate memory
+ 2. Read out cache data and store to allocated memory
+ */
+ nr_of_all_caches = read_caches_for_processors(
+ processors, nr_of_processors,
+ caches, numbers_of_caches,
+ global_proc_index_per_group, chip_info);
+ if (!nr_of_all_caches) {
+ cpuinfo_log_error("error in reading cache information");
+ goto clean_up;
+ }
+ cpuinfo_log_debug("detected %"PRIu32" processor cache(s)", nr_of_all_caches);
+
+ /* 3. Allocate memory for package, cluster, core and cache structures */
+ packages = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_packages * sizeof(struct cpuinfo_package));
+ if (packages == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" physical packages",
+ nr_of_packages * sizeof(struct cpuinfo_package), nr_of_packages);
+ goto clean_up;
+ }
+
+ /* We don't have cluster information so we explicitly set clusters to equal to cores. */
+ clusters = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_cores * sizeof(struct cpuinfo_cluster));
+ if (clusters == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters",
+ nr_of_cores * sizeof(struct cpuinfo_cluster), nr_of_cores);
+ goto clean_up;
+ }
+
+ cores = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_cores * sizeof(struct cpuinfo_core));
+ if (cores == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
+ nr_of_cores * sizeof(struct cpuinfo_core), nr_of_cores);
+ goto clean_up;
+ }
+
+ /* We allocate one contiguous cache array for all caches, then use offsets per cache type. */
+ caches = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_all_caches * sizeof(struct cpuinfo_cache));
+ if (caches == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" caches",
+ nr_of_all_caches * sizeof(struct cpuinfo_cache), nr_of_all_caches);
+ goto clean_up;
+ }
+
+ /* 4.Read missing topology information that can't be saved without counted
+ * allocate structures in the first round.
+ */
+ nr_of_all_caches = read_caches_for_processors(
+ processors, nr_of_processors,
+ caches, numbers_of_caches, global_proc_index_per_group, chip_info);
+ if (!nr_of_all_caches) {
+ cpuinfo_log_error("error in reading cache information");
+ goto clean_up;
+ }
+
+ nr_of_cores = read_cores_for_processors(
+ processors, nr_of_processors,
+ global_proc_index_per_group, cores,
+ chip_info);
+ if (!nr_of_cores) {
+ cpuinfo_log_error("error in reading core information");
+ goto clean_up;
+ }
+
+ /* 5. Now that we read out everything from the system we can, fill the package, cluster
+ * and core structures respectively.
+ */
+ result = connect_packages_cores_clusters_by_processors(
+ processors, nr_of_processors,
+ packages, nr_of_packages,
+ clusters,
+ cores, nr_of_cores,
+ chip_info,
+ vendor);
+ if(!result) {
+ cpuinfo_log_error("error in connecting information");
+ goto clean_up;
+ }
+
+ /* 6. Count and store uarchs of cores, assuming same uarchs are neighbors */
+ enum cpuinfo_uarch prev_uarch = cpuinfo_uarch_unknown;
+ for (uint32_t i = 0; i < nr_of_cores; i++) {
+ if (prev_uarch != cores[i].uarch) {
+ nr_of_uarchs++;
+ prev_uarch = cores[i].uarch;
+ }
+ }
+ uarchs = HeapAlloc(heap, HEAP_ZERO_MEMORY, nr_of_uarchs * sizeof(struct cpuinfo_uarch_info));
+ if (uarchs == NULL) {
+ cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" uarchs",
+ nr_of_uarchs * sizeof(struct cpuinfo_uarch_info), nr_of_uarchs);
+ goto clean_up;
+ }
+ prev_uarch = cpuinfo_uarch_unknown;
+ for (uint32_t i = 0, uarch_counter = 0; i < nr_of_cores; i++) {
+ if (prev_uarch != cores[i].uarch) {
+ prev_uarch = cores[i].uarch;
+ uarchs[uarch_counter].uarch = cores[i].uarch;
+ uarchs[uarch_counter].core_count = 1;
+ uarchs[uarch_counter].processor_count = cores[i].processor_count;
+ uarch_counter++;
+ } else if (prev_uarch != cpuinfo_uarch_unknown) {
+ uarchs[uarch_counter].core_count++;
+ uarchs[uarch_counter].processor_count += cores[i].processor_count;
+ }
+ }
+
+ /* 7. Commit changes */
+ cpuinfo_processors = processors;
+ cpuinfo_packages = packages;
+ cpuinfo_clusters = clusters;
+ cpuinfo_cores = cores;
+ cpuinfo_uarchs = uarchs;
+
+ cpuinfo_processors_count = nr_of_processors;
+ cpuinfo_packages_count = nr_of_packages;
+ cpuinfo_clusters_count = nr_of_cores;
+ cpuinfo_cores_count = nr_of_cores;
+ cpuinfo_uarchs_count = nr_of_uarchs;
+
+ for (uint32_t i = 0; i < MAX_NR_OF_CACHES; i++) {
+ cpuinfo_cache_count[i] = numbers_of_caches[i];
+ }
+ cpuinfo_cache[cpuinfo_cache_level_1i] = caches;
+ cpuinfo_cache[cpuinfo_cache_level_1d] = cpuinfo_cache[cpuinfo_cache_level_1i] + cpuinfo_cache_count[cpuinfo_cache_level_1i];
+ cpuinfo_cache[cpuinfo_cache_level_2] = cpuinfo_cache[cpuinfo_cache_level_1d] + cpuinfo_cache_count[cpuinfo_cache_level_1d];
+ cpuinfo_cache[cpuinfo_cache_level_3] = cpuinfo_cache[cpuinfo_cache_level_2] + cpuinfo_cache_count[cpuinfo_cache_level_2];
+ cpuinfo_cache[cpuinfo_cache_level_4] = cpuinfo_cache[cpuinfo_cache_level_3] + cpuinfo_cache_count[cpuinfo_cache_level_3];
+ cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
+
+ result = true;
+ MemoryBarrier();
+
+ processors = NULL;
+ packages = NULL;
+ clusters = NULL;
+ cores = NULL;
+ caches = NULL;
+ uarchs = NULL;
+
+clean_up:
+ /* The propagated pointers, shouldn't be freed, only in case of error
+ * and unfinished init.
+ */
+ if (processors != NULL) {
+ HeapFree(heap, 0, processors);
+ }
+ if (packages != NULL) {
+ HeapFree(heap, 0, packages);
+ }
+ if (clusters != NULL) {
+ HeapFree(heap, 0, clusters);
+ }
+ if (cores != NULL) {
+ HeapFree(heap, 0, cores);
+ }
+ if (caches != NULL) {
+ HeapFree(heap, 0, caches);
+ }
+ if (uarchs != NULL) {
+ HeapFree(heap, 0, uarchs);
+ }
+
+ /* Free the locally used temporary pointers */
+ HeapFree(heap, 0, global_proc_index_per_group);
+ global_proc_index_per_group = NULL;
+ return result;
+}
+
+static uint32_t count_logical_processors(
+ const uint32_t max_group_count,
+ uint32_t* global_proc_index_per_group)
+{
+ uint32_t nr_of_processors = 0;
+
+ for (uint32_t i = 0; i < max_group_count; i++) {
+ uint32_t nr_of_processors_per_group = GetMaximumProcessorCount((WORD) i);
+ cpuinfo_log_debug("detected %"PRIu32" processor(s) in group %"PRIu32"",
+ nr_of_processors_per_group, i);
+ global_proc_index_per_group[i] = nr_of_processors;
+ nr_of_processors += nr_of_processors_per_group;
+ }
+ return nr_of_processors;
+}
+
+static uint32_t read_packages_for_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info *chip_info)
+{
+ return read_all_logical_processor_info_of_relation(
+ RelationProcessorPackage,
+ processors,
+ number_of_processors,
+ NULL,
+ NULL,
+ NULL,
+ global_proc_index_per_group,
+ chip_info);
+}
+
+uint32_t read_cores_for_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info)
+{
+ return read_all_logical_processor_info_of_relation(
+ RelationProcessorCore,
+ processors,
+ number_of_processors,
+ NULL,
+ NULL,
+ cores,
+ global_proc_index_per_group,
+ chip_info);
+}
+
+static uint32_t read_caches_for_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ struct cpuinfo_cache* caches,
+ uint32_t* numbers_of_caches,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info *chip_info)
+{
+ /* Reset processor start indexes */
+ if (caches) {
+ uint32_t cache_offset = 0;
+ for (uint32_t i = 0; i < MAX_NR_OF_CACHES; i++) {
+ for (uint32_t j = 0; j < numbers_of_caches[i]; j++) {
+ caches[cache_offset + j].processor_start = UINT32_MAX;
+ }
+ cache_offset += numbers_of_caches[i];
+ }
+ }
+
+ return read_all_logical_processor_info_of_relation(
+ RelationCache,
+ processors,
+ number_of_processors,
+ caches,
+ numbers_of_caches,
+ NULL,
+ global_proc_index_per_group,
+ chip_info);
+}
+
+static uint32_t read_all_logical_processor_info_of_relation(
+ LOGICAL_PROCESSOR_RELATIONSHIP info_type,
+ struct cpuinfo_processor* processors,
+ const uint32_t number_of_processors,
+ struct cpuinfo_cache* caches,
+ uint32_t* numbers_of_caches,
+ struct cpuinfo_core* cores,
+ const uint32_t* global_proc_index_per_group,
+ const struct woa_chip_info* chip_info)
+{
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX infos = NULL;
+ uint32_t nr_of_structs = 0;
+ DWORD info_size = 0;
+ bool result = false;
+ HANDLE heap = GetProcessHeap();
+
+ /* 1. Query the size of the information structure first */
+ if (GetLogicalProcessorInformationEx(info_type, NULL, &info_size) == FALSE) {
+ const DWORD last_error = GetLastError();
+ if (last_error != ERROR_INSUFFICIENT_BUFFER) {
+ cpuinfo_log_error(
+ "failed to query size of processor %"PRIu32" information information: error %"PRIu32"",
+ (uint32_t)info_type, (uint32_t) last_error);
+ goto clean_up;
+ }
+ }
+ /* 2. Allocate memory for the information structure */
+ infos = HeapAlloc(heap, 0, info_size);
+ if (infos == NULL) {
+ cpuinfo_log_error("failed to allocate %"PRIu32" bytes for logical processor information",
+ (uint32_t) info_size);
+ goto clean_up;
+ }
+ /* 3. Read the information structure */
+ if (GetLogicalProcessorInformationEx(info_type, infos, &info_size) == FALSE) {
+ cpuinfo_log_error("failed to query processor %"PRIu32" information: error %"PRIu32"",
+ (uint32_t)info_type, (uint32_t) GetLastError());
+ goto clean_up;
+ }
+
+ /* 4. Parse the structure and store relevant data */
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info_end =
+ (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) infos + info_size);
+ for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = infos;
+ info < info_end;
+ info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) info + info->Size))
+ {
+ if (info->Relationship != info_type) {
+ cpuinfo_log_warning(
+ "unexpected processor info type (%"PRIu32") for processor information",
+ (uint32_t) info->Relationship);
+ continue;
+ }
+
+ const uint32_t info_id = nr_of_structs++;
+
+ switch(info_type) {
+ case RelationProcessorPackage:
+ result = parse_relation_processor_info(
+ processors,
+ number_of_processors,
+ global_proc_index_per_group,
+ info,
+ info_id,
+ cores,
+ chip_info);
+ break;
+ case RelationProcessorCore:
+ result = parse_relation_processor_info(
+ processors,
+ number_of_processors,
+ global_proc_index_per_group,
+ info,
+ info_id,
+ cores,
+ chip_info);
+ break;
+ case RelationCache:
+ result = parse_relation_cache_info(
+ processors,
+ caches,
+ numbers_of_caches,
+ global_proc_index_per_group,
+ info);
+ break;
+ default:
+ cpuinfo_log_error(
+ "unexpected processor info type (%"PRIu32") for processor information",
+ (uint32_t) info->Relationship);
+ result = false;
+ break;
+ }
+ if (!result) {
+ nr_of_structs = 0;
+ goto clean_up;
+ }
+ }
+clean_up:
+ /* 5. Release dynamically allocated info structure. */
+ HeapFree(heap, 0, infos);
+ infos = NULL;
+ return nr_of_structs;
+}
+
+static bool parse_relation_processor_info(
+ struct cpuinfo_processor* processors,
+ uint32_t nr_of_processors,
+ const uint32_t* global_proc_index_per_group,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+ const uint32_t info_id,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info)
+{
+ for (uint32_t i = 0; i < info->Processor.GroupCount; i++) {
+ const uint32_t group_id = info->Processor.GroupMask[i].Group;
+ /* Bitmask representing processors in this group belonging to this package */
+ KAFFINITY group_processors_mask = info->Processor.GroupMask[i].Mask;
+ while (group_processors_mask != 0) {
+ const uint32_t processor_id_in_group =
+ low_index_from_kaffinity(group_processors_mask);
+ const uint32_t processor_global_index =
+ global_proc_index_per_group[group_id] + processor_id_in_group;
+
+ if(processor_global_index >= nr_of_processors) {
+ cpuinfo_log_error("unexpected processor index %"PRIu32"",
+ processor_global_index);
+ return false;
+ }
+
+ switch(info->Relationship) {
+ case RelationProcessorPackage:
+ store_package_info_per_processor(
+ processors, processor_global_index, info_id,
+ group_id, processor_id_in_group);
+ break;
+ case RelationProcessorCore:
+ store_core_info_per_processor(
+ processors, processor_global_index,
+ info_id, info,
+ cores, chip_info);
+ break;
+ default:
+ cpuinfo_log_error(
+ "unexpected processor info type (%"PRIu32") for processor information",
+ (uint32_t) info->Relationship);
+ break;
+ }
+ /* Clear the bits in affinity mask, lower the least set bit. */
+ group_processors_mask &= (group_processors_mask - 1);
+ }
+ }
+ return true;
+}
+
+static bool parse_relation_cache_info(
+ struct cpuinfo_processor* processors,
+ struct cpuinfo_cache* caches,
+ uint32_t* numbers_of_caches,
+ const uint32_t* global_proc_index_per_group,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info)
+{
+ static uint32_t l1i_counter = 0;
+ static uint32_t l1d_counter = 0;
+ static uint32_t l2_counter = 0;
+ static uint32_t l3_counter = 0;
+
+ /* Count cache types for allocation at first. */
+ if (caches == NULL) {
+ switch(info->Cache.Level) {
+ case 1:
+ switch (info->Cache.Type) {
+ case CacheInstruction:
+ numbers_of_caches[cpuinfo_cache_level_1i]++;
+ break;
+ case CacheData:
+ numbers_of_caches[cpuinfo_cache_level_1d]++;
+ break;
+ case CacheUnified:
+ break;
+ case CacheTrace:
+ break;
+ default:
+ break;
+ }
+ break;
+ case 2:
+ numbers_of_caches[cpuinfo_cache_level_2]++;
+ break;
+ case 3:
+ numbers_of_caches[cpuinfo_cache_level_3]++;
+ break;
+ }
+ return true;
+ }
+ struct cpuinfo_cache* l1i_base = caches;
+ struct cpuinfo_cache* l1d_base = l1i_base + numbers_of_caches[cpuinfo_cache_level_1i];
+ struct cpuinfo_cache* l2_base = l1d_base + numbers_of_caches[cpuinfo_cache_level_1d];
+ struct cpuinfo_cache* l3_base = l2_base + numbers_of_caches[cpuinfo_cache_level_2];
+
+ cpuinfo_log_debug(
+ "info->Cache.GroupCount:%"PRIu32", info->Cache.GroupMask:%"PRIu32","
+ "info->Cache.Level:%"PRIu32", info->Cache.Associativity:%"PRIu32","
+ "info->Cache.LineSize:%"PRIu32","
+ "info->Cache.CacheSize:%"PRIu32", info->Cache.Type:%"PRIu32"",
+ info->Cache.GroupCount, (unsigned int)info->Cache.GroupMask.Mask,
+ info->Cache.Level, info->Cache.Associativity, info->Cache.LineSize,
+ info->Cache.CacheSize, info->Cache.Type);
+
+ struct cpuinfo_cache* current_cache = NULL;
+ switch (info->Cache.Level) {
+ case 1:
+ switch (info->Cache.Type) {
+ case CacheInstruction:
+ current_cache = l1i_base + l1i_counter;
+ l1i_counter++;
+ break;
+ case CacheData:
+ current_cache = l1d_base + l1d_counter;
+ l1d_counter++;
+ break;
+ case CacheUnified:
+ break;
+ case CacheTrace:
+ break;
+ default:
+ break;
+ }
+ break;
+ case 2:
+ current_cache = l2_base + l2_counter;
+ l2_counter++;
+ break;
+ case 3:
+ current_cache = l3_base + l3_counter;
+ l3_counter++;
+ break;
+ }
+ current_cache->size = info->Cache.CacheSize;
+ current_cache->line_size = info->Cache.LineSize;
+ current_cache->associativity = info->Cache.Associativity;
+ /* We don't have partition and set information of caches on Windows,
+ * so we set partitions to 1 and calculate the expected sets.
+ */
+ current_cache->partitions = 1;
+ current_cache->sets =
+ current_cache->size / current_cache->line_size / current_cache->associativity;
+ if (info->Cache.Type == CacheUnified) {
+ current_cache->flags = CPUINFO_CACHE_UNIFIED;
+ }
+
+ for (uint32_t i = 0; i <= info->Cache.GroupCount; i++) {
+ /* Zero GroupCount is valid, GroupMask still can store bits set. */
+ const uint32_t group_id = info->Cache.GroupMasks[i].Group;
+ /* Bitmask representing processors in this group belonging to this package */
+ KAFFINITY group_processors_mask = info->Cache.GroupMasks[i].Mask;
+ while (group_processors_mask != 0) {
+ const uint32_t processor_id_in_group =
+ low_index_from_kaffinity(group_processors_mask);
+ const uint32_t processor_global_index =
+ global_proc_index_per_group[group_id] + processor_id_in_group;
+
+ store_cache_info_per_processor(
+ processors, processor_global_index,
+ info, current_cache);
+
+ /* Clear the bits in affinity mask, lower the least set bit. */
+ group_processors_mask &= (group_processors_mask - 1);
+ }
+ }
+ return true;
+}
+
+static void store_package_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ const uint32_t package_id,
+ const uint32_t group_id,
+ const uint32_t processor_id_in_group)
+{
+ processors[processor_global_index].windows_group_id =
+ (uint16_t) group_id;
+ processors[processor_global_index].windows_processor_id =
+ (uint16_t) processor_id_in_group;
+
+ /* As we're counting the number of packages now, we haven't allocated memory for
+ * cpuinfo_packages yet, so we only set the package pointer's offset now.
+ */
+ processors[processor_global_index].package =
+ (const struct cpuinfo_package*) NULL + package_id;
+}
+
+void store_core_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ const uint32_t core_id,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info,
+ struct cpuinfo_core* cores,
+ const struct woa_chip_info *chip_info)
+{
+ if (cores) {
+ processors[processor_global_index].core = cores + core_id;
+ cores[core_id].core_id = core_id;
+ get_core_uarch_for_efficiency(
+ chip_info->chip_name, core_info->Processor.EfficiencyClass,
+ &(cores[core_id].uarch), &(cores[core_id].frequency));
+
+ /* We don't have cluster information, so we handle it as
+ * fixed 1 to (cluster / cores).
+ * Set the cluster offset ID now, as soon as we have the
+ * cluster base address, we'll set the absolute address.
+ */
+ processors[processor_global_index].cluster =
+ (const struct cpuinfo_cluster*) NULL + core_id;
+ }
+}
+
+static void store_cache_info_per_processor(
+ struct cpuinfo_processor* processors,
+ const uint32_t processor_global_index,
+ PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info,
+ struct cpuinfo_cache* current_cache)
+{
+ if (current_cache->processor_start > processor_global_index) {
+ current_cache->processor_start = processor_global_index;
+ }
+ current_cache->processor_count++;
+
+ switch(info->Cache.Level) {
+ case 1:
+ switch (info->Cache.Type) {
+ case CacheInstruction:
+ processors[processor_global_index].cache.l1i = current_cache;
+ break;
+ case CacheData:
+ processors[processor_global_index].cache.l1d = current_cache;
+ break;
+ case CacheUnified:
+ break;
+ case CacheTrace:
+ break;
+ default:
+ break;
+ }
+ break;
+ case 2:
+ processors[processor_global_index].cache.l2 = current_cache;
+ break;
+ case 3:
+ processors[processor_global_index].cache.l3 = current_cache;
+ break;
+ }
+}
+
+static bool connect_packages_cores_clusters_by_processors(
+ struct cpuinfo_processor* processors,
+ const uint32_t nr_of_processors,
+ struct cpuinfo_package* packages,
+ const uint32_t nr_of_packages,
+ struct cpuinfo_cluster* clusters,
+ struct cpuinfo_core* cores,
+ const uint32_t nr_of_cores,
+ const struct woa_chip_info* chip_info,
+ enum cpuinfo_vendor vendor)
+{
+ /* Adjust core and package pointers for all logical processors. */
+ for (uint32_t i = nr_of_processors; i != 0; i--) {
+ const uint32_t processor_id = i - 1;
+ struct cpuinfo_processor* processor = processors + processor_id;
+
+ struct cpuinfo_core* core = (struct cpuinfo_core*)processor->core;
+
+ /* We stored the offset of pointers when we haven't allocated memory
+ * for packages and clusters, so now add offsets to base addresses.
+ */
+ struct cpuinfo_package* package =
+ (struct cpuinfo_package*) ((uintptr_t) packages + (uintptr_t) processor->package);
+ if (package < packages ||
+ package >= (packages + nr_of_packages)) {
+ cpuinfo_log_error("invalid package indexing");
+ return false;
+ }
+ processor->package = package;
+
+ struct cpuinfo_cluster* cluster =
+ (struct cpuinfo_cluster*) ((uintptr_t) clusters + (uintptr_t) processor->cluster);
+ if (cluster < clusters ||
+ cluster >= (clusters + nr_of_cores)) {
+ cpuinfo_log_error("invalid cluster indexing");
+ return false;
+ }
+ processor->cluster = cluster;
+
+ if (chip_info) {
+ strncpy_s(package->name, CPUINFO_PACKAGE_NAME_MAX, chip_info->chip_name_string,
+ strnlen(chip_info->chip_name_string, CPUINFO_PACKAGE_NAME_MAX));
+ }
+
+ /* Set start indexes and counts per packages / clusters / cores - going backwards */
+
+ /* This can be overwritten by lower-index processors on the same package. */
+ package->processor_start = processor_id;
+ package->processor_count++;
+
+ /* This can be overwritten by lower-index processors on the same cluster. */
+ cluster->processor_start = processor_id;
+ cluster->processor_count++;
+
+ /* This can be overwritten by lower-index processors on the same core. */
+ core->processor_start = processor_id;
+ core->processor_count++;
+ }
+ /* Fill cores */
+ for (uint32_t i = nr_of_cores; i != 0; i--) {
+ const uint32_t global_core_id = i - 1;
+ struct cpuinfo_core* core = cores + global_core_id;
+ const struct cpuinfo_processor* processor = processors + core->processor_start;
+ struct cpuinfo_package* package = (struct cpuinfo_package*) processor->package;
+ struct cpuinfo_cluster* cluster = (struct cpuinfo_cluster*) processor->cluster;
+
+ core->package = package;
+ core->cluster = cluster;
+ core->vendor = vendor;
+
+ /* This can be overwritten by lower-index cores on the same cluster/package. */
+ cluster->core_start = global_core_id;
+ cluster->core_count++;
+ package->core_start = global_core_id;
+ package->core_count++;
+ package->cluster_start = global_core_id;
+ package->cluster_count = package->core_count;
+
+ cluster->package = package;
+ cluster->vendor = cores[cluster->core_start].vendor;
+ cluster->uarch = cores[cluster->core_start].uarch;
+ cluster->frequency = cores[cluster->core_start].frequency;
+ }
+ return true;
+}
+
+static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity) {
+ unsigned long index;
+ _BitScanForward64(&index, (unsigned __int64) kaffinity);
+ return (uint32_t) index;
+}
diff --git a/src/arm/windows/init.c b/src/arm/windows/init.c
new file mode 100644
index 0000000..8effc15
--- /dev/null
+++ b/src/arm/windows/init.c
@@ -0,0 +1,253 @@
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include <cpuinfo.h>
+#include <cpuinfo/internal-api.h>
+#include <cpuinfo/log.h>
+
+#include "windows-arm-init.h"
+
+/* Efficiency class = 0 means little core, while 1 means big core for now */
+#define MAX_WOA_VALID_EFFICIENCY_CLASSES 2
+#define VENDOR_NAME_MAX CPUINFO_PACKAGE_NAME_MAX
+
+struct cpuinfo_arm_isa cpuinfo_isa;
+
+static void set_cpuinfo_isa_fields(void);
+static bool get_system_info_from_registry(
+ struct woa_chip_info** chip_info,
+ enum cpuinfo_vendor* vendor);
+
+struct vendor_info {
+ char vendor_name[VENDOR_NAME_MAX];
+ enum cpuinfo_vendor vendor;
+};
+
+/* Please add new vendor here! */
+static struct vendor_info vendors[] = {
+ {
+ "Qualcomm",
+ cpuinfo_vendor_qualcomm
+ }
+};
+
+/* Please add new SoC/chip info here! */
+static struct woa_chip_info woa_chips[] = {
+ /* Microsoft SQ1 Kryo 495 4 + 4 cores (3 GHz + 1.80 GHz) */
+ {
+ "Microsoft SQ1",
+ woa_chip_name_microsoft_sq_1,
+ {
+ {
+ cpuinfo_uarch_cortex_a55,
+ 1800000000,
+ },
+ {
+ cpuinfo_uarch_cortex_a76,
+ 3000000000,
+ }
+ }
+ },
+ /* Microsoft SQ2 Kryo 495 4 + 4 cores (3.15 GHz + 2.42 GHz) */
+ {
+ "Microsoft SQ2",
+ woa_chip_name_microsoft_sq_2,
+ {
+ {
+ cpuinfo_uarch_cortex_a55,
+ 2420000000,
+ },
+ {
+ cpuinfo_uarch_cortex_a76,
+ 3150000000
+ }
+ }
+ }
+};
+
+BOOL CALLBACK cpuinfo_arm_windows_init(
+ PINIT_ONCE init_once, PVOID parameter, PVOID* context)
+{
+ struct woa_chip_info *chip_info = NULL;
+ enum cpuinfo_vendor vendor = cpuinfo_vendor_unknown;
+ bool result = false;
+
+ set_cpuinfo_isa_fields();
+ result = get_system_info_from_registry(&chip_info, &vendor);
+ result &= cpu_info_init_by_logical_sys_info(chip_info, vendor);
+ cpuinfo_is_initialized = result;
+ return ((result == true) ? TRUE : FALSE);
+}
+
+bool get_core_uarch_for_efficiency(
+ enum woa_chip_name chip, BYTE EfficiencyClass,
+ enum cpuinfo_uarch* uarch, uint64_t* frequency)
+{
+ /* For currently supported WoA chips, the Efficiency class selects
+ * the pre-defined little and big core.
+ * Any further supported SoC's logic should be implemented here.
+ */
+ if (uarch && frequency && chip < woa_chip_name_last &&
+ EfficiencyClass < MAX_WOA_VALID_EFFICIENCY_CLASSES) {
+ *uarch = woa_chips[chip].uarchs[EfficiencyClass].uarch;
+ *frequency = woa_chips[chip].uarchs[EfficiencyClass].frequency;
+ return true;
+ }
+ return false;
+}
+
+/* Static helper functions */
+
+static bool read_registry(
+ LPCTSTR subkey,
+ LPCTSTR value,
+ char** textBuffer)
+{
+ DWORD keyType = 0;
+ DWORD dataSize = 0;
+ const DWORD flags = RRF_RT_REG_SZ; /* Only read strings (REG_SZ) */
+ LSTATUS result = 0;
+ HANDLE heap = GetProcessHeap();
+
+ result = RegGetValue(
+ HKEY_LOCAL_MACHINE,
+ subkey,
+ value,
+ flags,
+ &keyType,
+ NULL, /* Request buffer size */
+ &dataSize);
+ if (result != 0 || dataSize == 0) {
+ cpuinfo_log_error("Registry entry size read error");
+ return false;
+ }
+
+ if (*textBuffer) {
+ HeapFree(heap, 0, *textBuffer);
+ }
+ *textBuffer = HeapAlloc(heap, HEAP_ZERO_MEMORY, dataSize);
+ if (*textBuffer == NULL) {
+ cpuinfo_log_error("Registry textbuffer allocation error");
+ return false;
+ }
+
+ result = RegGetValue(
+ HKEY_LOCAL_MACHINE,
+ subkey,
+ value,
+ flags,
+ NULL,
+ *textBuffer, /* Write string in this destination buffer */
+ &dataSize);
+ if (result != 0) {
+ cpuinfo_log_error("Registry read error");
+ return false;
+ }
+ return true;
+}
+
+static bool get_system_info_from_registry(
+ struct woa_chip_info** chip_info,
+ enum cpuinfo_vendor* vendor)
+{
+ bool result = false;
+ char* textBuffer = NULL;
+ LPCTSTR cpu0_subkey =
+ (LPCTSTR)"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0";
+ LPCTSTR chip_name_value = (LPCTSTR)"ProcessorNameString";
+ LPCTSTR vendor_name_value = (LPCTSTR)"VendorIdentifier";
+
+ *chip_info = NULL;
+ *vendor = cpuinfo_vendor_unknown;
+ HANDLE heap = GetProcessHeap();
+
+ /* 1. Read processor model name from registry and find in the hard-coded list. */
+ if (!read_registry(cpu0_subkey, chip_name_value, &textBuffer)) {
+ cpuinfo_log_error("Registry read error");
+ goto cleanup;
+ }
+ for (uint32_t i = 0; i < (uint32_t) woa_chip_name_last; i++) {
+ size_t compare_length = strnlen(woa_chips[i].chip_name_string, CPUINFO_PACKAGE_NAME_MAX);
+ int compare_result = strncmp(textBuffer, woa_chips[i].chip_name_string, compare_length);
+ if (compare_result == 0) {
+ *chip_info = woa_chips+i;
+ break;
+ }
+ }
+ if (*chip_info == NULL) {
+ cpuinfo_log_error("Unknown chip model name.\n Please add new Windows on Arm SoC/chip support!");
+ goto cleanup;
+ }
+ cpuinfo_log_debug("detected chip model name: %s", (**chip_info).chip_name_string);
+
+ /* 2. Read vendor/manufacturer name from registry. */
+ if (!read_registry(cpu0_subkey, vendor_name_value, &textBuffer)) {
+ cpuinfo_log_error("Registry read error");
+ goto cleanup;
+ }
+
+ for (uint32_t i = 0; i < (sizeof(vendors) / sizeof(struct vendor_info)); i++) {
+ if (strncmp(textBuffer, vendors[i].vendor_name,
+ strlen(vendors[i].vendor_name)) == 0) {
+ *vendor = vendors[i].vendor;
+ result = true;
+ break;
+ }
+ }
+ if (*vendor == cpuinfo_vendor_unknown) {
+ cpuinfo_log_error("Unexpected vendor: %s", textBuffer);
+ }
+
+cleanup:
+ HeapFree(heap, 0, textBuffer);
+ textBuffer = NULL;
+ return result;
+}
+
+static void set_cpuinfo_isa_fields(void)
+{
+ bool armv8 = IsProcessorFeaturePresent(PF_ARM_V8_INSTRUCTIONS_AVAILABLE);
+ bool crypto = IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE);
+ bool load_store_atomic = IsProcessorFeaturePresent(PF_ARM_64BIT_LOADSTORE_ATOMIC);
+ bool float_multiply_accumulate = IsProcessorFeaturePresent(PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE);
+ bool crc32 = IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE);
+ bool float_emulated = IsProcessorFeaturePresent(PF_FLOATING_POINT_EMULATED);
+
+ /* Read all Arm related Windows features for debug purposes, even if we can't
+ * pair Arm ISA feature to that now.
+ */
+#if CPUINFO_LOG_DEBUG_PARSERS
+ bool divide = IsProcessorFeaturePresent(PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE);
+ bool ext_cache = IsProcessorFeaturePresent(PF_ARM_EXTERNAL_CACHE_AVAILABLE);
+ bool vfp_registers = IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE);
+ bool arm_v81 = IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE);
+
+ cpuinfo_log_debug("divide present: %d", divide);
+ cpuinfo_log_debug("ext_cache present: %d", ext_cache);
+ cpuinfo_log_debug("vfp_registers present: %d", vfp_registers);
+ cpuinfo_log_debug("arm_v81 present: %d", arm_v81);
+#endif
+
+ cpuinfo_log_debug("armv8 present: %d", armv8);
+ cpuinfo_log_debug("crypto present: %d", crypto);
+ cpuinfo_log_debug("load_store_atomic present: %d", load_store_atomic);
+ cpuinfo_log_debug("float_multiply_accumulate present: %d", float_multiply_accumulate);
+ cpuinfo_log_debug("crc32 present: %d", crc32);
+ cpuinfo_log_debug("float_emulated: %d", float_emulated);
+
+#if CPUINFO_ARCH_ARM
+ cpuinfo_isa.armv8 = armv8;
+#endif
+#if CPUINFO_ARCH_ARM64
+ cpuinfo_isa.atomics = load_store_atomic;
+#endif
+ cpuinfo_isa.crc32 = crc32;
+ /* Windows API reports all or nothing for cryptographic instructions. */
+ cpuinfo_isa.aes = crypto;
+ cpuinfo_isa.sha1 = crypto;
+ cpuinfo_isa.sha2 = crypto;
+ cpuinfo_isa.pmull = crypto;
+ cpuinfo_isa.fp16arith = !float_emulated && float_multiply_accumulate;
+}
diff --git a/src/arm/windows/windows-arm-init.h b/src/arm/windows/windows-arm-init.h
new file mode 100644
index 0000000..76cc51e
--- /dev/null
+++ b/src/arm/windows/windows-arm-init.h
@@ -0,0 +1,32 @@
+#pragma once
+
+/* List of known and supported Windows on Arm SoCs/chips. */
+enum woa_chip_name {
+ woa_chip_name_microsoft_sq_1 = 0,
+ woa_chip_name_microsoft_sq_2 = 1,
+ woa_chip_name_unknown = 2,
+ woa_chip_name_last = woa_chip_name_unknown
+};
+
+/* Topology information hard-coded by SoC/chip name */
+struct core_info_by_chip_name {
+ enum cpuinfo_uarch uarch;
+ uint64_t frequency; /* Hz */
+};
+
+/* SoC/chip info that's currently not readable by logical system information,
+ * but can be read from registry.
+ */
+struct woa_chip_info {
+ char* chip_name_string;
+ enum woa_chip_name chip_name;
+ struct core_info_by_chip_name uarchs[woa_chip_name_last];
+};
+
+bool get_core_uarch_for_efficiency(
+ enum woa_chip_name chip, BYTE EfficiencyClass,
+ enum cpuinfo_uarch* uarch, uint64_t* frequency);
+
+bool cpu_info_init_by_logical_sys_info(
+ const struct woa_chip_info *chip_info,
+ enum cpuinfo_vendor vendor);
diff --git a/src/cpuinfo/internal-api.h b/src/cpuinfo/internal-api.h
index 9c23d7c..c04620e 100644
--- a/src/cpuinfo/internal-api.h
+++ b/src/cpuinfo/internal-api.h
@@ -51,7 +51,11 @@ extern CPUINFO_INTERNAL uint32_t cpuinfo_max_cache_size;
CPUINFO_PRIVATE void cpuinfo_x86_mach_init(void);
CPUINFO_PRIVATE void cpuinfo_x86_linux_init(void);
#if defined(_WIN32) || defined(__CYGWIN__)
- CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context);
+ #if CPUINFO_ARCH_ARM64
+ CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_arm_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context);
+ #else
+ CPUINFO_PRIVATE BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context);
+ #endif
#endif
CPUINFO_PRIVATE void cpuinfo_arm_mach_init(void);
CPUINFO_PRIVATE void cpuinfo_arm_linux_init(void);
diff --git a/src/init.c b/src/init.c
index d61e7be..ed37c07 100644
--- a/src/init.c
+++ b/src/init.c
@@ -37,6 +37,8 @@ bool CPUINFO_ABI cpuinfo_initialize(void) {
pthread_once(&init_guard, &cpuinfo_arm_linux_init);
#elif defined(__MACH__) && defined(__APPLE__)
pthread_once(&init_guard, &cpuinfo_arm_mach_init);
+ #elif defined(_WIN32)
+ InitOnceExecuteOnce(&init_guard, &cpuinfo_arm_windows_init, NULL, NULL);
#else
cpuinfo_log_error("operating system is not supported in cpuinfo");
#endif