diff options
author | Neale Ferguson <neale@sinenomine.net> | 2022-01-25 20:29:02 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-01-25 20:29:02 +0300 |
commit | 04859508c54a7ac25547bc5bcf4a019f0517aa0b (patch) | |
tree | 5cf0e785602867159040b5b2ce0bcabb7edf596f | |
parent | 95229aaa2ec642449aea4eba44fc717ca74786ab (diff) |
Determine any memory/CPU limitations from sysfs cgroup (#21280)
Add capability to interrogate cgroup limitations when determining CP and memory limits
This code has been adapted from coreCLR. It has been modified from C++ but uses the same naming conventions in the event of a unified mechanism that can be shared between both runtimes being developed. The code has been tested on Ubuntu 20.04 and CentOS 7 with cgroupv1 and cgroupv2.
This code is required in the event of running mono in a container as the current limitations being discovered by mono are purely for the machine and not in a container which may have lower quotas.
* mono/utils/Makefile.am
- Add new file to build
* mono/utils/memfuncs.c
- Call `getRestrictedPhysicalMemoryLimit()` or `getPhyscalMemoryAvail()`
* mono/utils/memfuncs.h
- Add prototypes for the new APIs
* mono/utils/mono-cgroup.c
- Code adapted from coreCLR to interrogate sysfs to determine any limitations on memory or CPU
* mono/utils/mono-proclib.c
- Add call to `getCpuLimit()`
* mono/utils/mono-proclib.h
- Add prototype for the new API
-rw-r--r-- | configure.ac | 3 | ||||
-rw-r--r-- | mono/metadata/icall.c | 2 | ||||
-rw-r--r-- | mono/metadata/threadpool-worker-default.c | 4 | ||||
-rw-r--r-- | mono/metadata/threadpool.c | 4 | ||||
-rw-r--r-- | mono/sgen/sgen-marksweep.c | 4 | ||||
-rw-r--r-- | mono/utils/Makefile.am | 1 | ||||
-rw-r--r-- | mono/utils/memfuncs.c | 45 | ||||
-rw-r--r-- | mono/utils/memfuncs.h | 5 | ||||
-rw-r--r-- | mono/utils/mono-cgroup.c | 964 | ||||
-rw-r--r-- | mono/utils/mono-proclib-windows.c | 10 | ||||
-rw-r--r-- | mono/utils/mono-proclib.c | 45 | ||||
-rw-r--r-- | mono/utils/mono-proclib.h | 3 |
12 files changed, 1078 insertions, 12 deletions
diff --git a/configure.ac b/configure.ac index badba3705ba..d17d849a225 100644 --- a/configure.ac +++ b/configure.ac @@ -852,6 +852,8 @@ if test x$platform_android = xyes; then if test x$android_unified_headers = xyes; then AC_DEFINE(ANDROID_UNIFIED_HEADERS, 1, [Whether Android NDK unified headers are used]) fi +else + AC_CHECK_HEADER(linux/cgroupstats.h, [AC_DEFINE(HAVE_CGROUP_SUPPORT, 1, Define to 1 if you have the <linux/cgroupstats.h> header file.)]) fi # Android # not 64 bit clean in cross-compile @@ -7095,6 +7097,7 @@ echo " libgdiplus: $libgdiplus_msg zlib: $zlib_msg BTLS: $enable_btls$btls_platform_string + CGROUP: $ac_cv_header_linux_cgroupstats_h jemalloc: $with_jemalloc (always use: $with_jemalloc_always) crash reporting: $crash_reporting (private crashes: $with_crash_privacy) $disabled diff --git a/mono/metadata/icall.c b/mono/metadata/icall.c index 786c1099a07..08304a3c90d 100644 --- a/mono/metadata/icall.c +++ b/mono/metadata/icall.c @@ -9182,7 +9182,7 @@ ves_icall_System_Threading_Thread_YieldInternal (void) gint32 ves_icall_System_Environment_get_ProcessorCount (void) { - return mono_cpu_count (); + return mono_cpu_limit (); } #if defined(ENABLE_MONODROID) diff --git a/mono/metadata/threadpool-worker-default.c b/mono/metadata/threadpool-worker-default.c index 044b0cb7d8f..76382ebf2c9 100644 --- a/mono/metadata/threadpool-worker-default.c +++ b/mono/metadata/threadpool-worker-default.c @@ -281,7 +281,7 @@ mono_threadpool_worker_init (MonoThreadPoolWorkerCallback callback) else threads_per_cpu = CLAMP (atoi (threads_per_cpu_env), 1, 50); - threads_count = mono_cpu_count () * threads_per_cpu; + threads_count = mono_cpu_limit () * threads_per_cpu; worker.limit_worker_min = threads_count; @@ -1206,7 +1206,7 @@ mono_threadpool_worker_set_max (gint32 value) { gint32 cpu_count; - cpu_count = mono_cpu_count (); + cpu_count = mono_cpu_limit (); if (value < worker.limit_worker_min || value < cpu_count) return FALSE; diff --git a/mono/metadata/threadpool.c b/mono/metadata/threadpool.c index cc669e9e132..dc7bd6070f5 100644 --- a/mono/metadata/threadpool.c +++ b/mono/metadata/threadpool.c @@ -145,7 +145,7 @@ initialize (void) threadpool.domains = g_ptr_array_new (); mono_coop_mutex_init (&threadpool.domains_lock); - threadpool.limit_io_min = mono_cpu_count (); + threadpool.limit_io_min = mono_cpu_limit (); threadpool.limit_io_max = CLAMP (threadpool.limit_io_min * 100, MIN (threadpool.limit_io_min, 200), MAX (threadpool.limit_io_min, 200)); mono_threadpool_worker_init (worker_callback); @@ -712,7 +712,7 @@ ves_icall_System_Threading_ThreadPool_SetMaxThreadsNative (gint32 worker_threads worker_threads = MIN (worker_threads, MAX_POSSIBLE_THREADS); completion_port_threads = MIN (completion_port_threads, MAX_POSSIBLE_THREADS); - gint cpu_count = mono_cpu_count (); + gint cpu_count = mono_cpu_limit (); if (completion_port_threads < threadpool.limit_io_min || completion_port_threads < cpu_count) return FALSE; diff --git a/mono/sgen/sgen-marksweep.c b/mono/sgen/sgen-marksweep.c index f767431c169..29ffa892326 100644 --- a/mono/sgen/sgen-marksweep.c +++ b/mono/sgen/sgen-marksweep.c @@ -2860,7 +2860,7 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr sgen_register_fixed_internal_mem_type (INTERNAL_MEM_MS_BLOCK_INFO, SIZEOF_MS_BLOCK_INFO); - if (mono_cpu_count () <= 1) + if (mono_cpu_limit () <= 1) is_parallel = FALSE; num_block_obj_sizes = ms_calculate_block_obj_sizes (MS_BLOCK_OBJ_SIZE_FACTOR, NULL); @@ -3026,7 +3026,7 @@ sgen_marksweep_init_internal (SgenMajorCollector *collector, gboolean is_concurr #ifndef DISABLE_SGEN_MAJOR_MARKSWEEP_CONC if (is_concurrent && is_parallel) - sgen_workers_create_context (GENERATION_OLD, mono_cpu_count ()); + sgen_workers_create_context (GENERATION_OLD, mono_cpu_limit ()); else if (is_concurrent) sgen_workers_create_context (GENERATION_OLD, 1); diff --git a/mono/utils/Makefile.am b/mono/utils/Makefile.am index 3a737bd43bb..f463ba2574b 100644 --- a/mono/utils/Makefile.am +++ b/mono/utils/Makefile.am @@ -53,6 +53,7 @@ else unix_sources = \ dlmalloc.h \ dlmalloc.c \ + mono-cgroup.c \ os-event-unix.c platform_sources = $(unix_sources) diff --git a/mono/utils/memfuncs.c b/mono/utils/memfuncs.c index 4b30d1e0d09..53f7e3d7508 100644 --- a/mono/utils/memfuncs.c +++ b/mono/utils/memfuncs.c @@ -27,6 +27,7 @@ #include <config.h> #include <glib.h> #include <string.h> +#include <errno.h> #if defined (__APPLE__) #include <mach/message.h> @@ -271,7 +272,7 @@ mono_determine_physical_ram_size (void) return (guint64)value; #elif defined (HAVE_SYSCONF) - guint64 page_size = 0, num_pages = 0; + guint64 page_size = 0, num_pages = 0, memsize; /* sysconf works on most *NIX operating systems, if your system doesn't have it or if it * reports invalid values, please add your OS specific code below. */ @@ -288,7 +289,44 @@ mono_determine_physical_ram_size (void) return 134217728; } - return page_size * num_pages; +#if defined(_SC_AVPHYS_PAGES) + memsize = sysconf(_SC_AVPHYS_PAGES) * page_size; +#else + memsize = page_size * num_pages; /* Calculate physical memory size */ +#endif + +#if HAVE_CGROUP_SUPPORT + gint64 restricted_limit = mono_get_restricted_memory_limit(); /* Check for any cgroup limit */ + if (restricted_limit != 0) { + gchar *heapHardLimit = getenv("DOTNET_GCHeapHardLimit"); /* See if user has set a limit */ + if (heapHardLimit == NULL) + heapHardLimit = getenv("COMPlus_GCHeapHardLimit"); /* Check old envvar name */ + errno = 0; + if (heapHardLimit != NULL) { + guint64 gcLimit = strtoull(heapHardLimit, NULL, 16); + if ((errno == 0) && (gcLimit != 0)) + restricted_limit = (restricted_limit < gcLimit ? restricted_limit : (gint64) gcLimit); + } else { + gchar *heapHardLimitPct = getenv("DOTNET_GCHeapHardLimitPercent"); /* User % limit? */ + if (heapHardLimitPct == NULL) + heapHardLimitPct = getenv("COMPlus_GCHeapHardLimitPercent"); /* Check old envvar name */ + if (heapHardLimitPct != NULL) { + int gcLimit = strtoll(heapHardLimitPct, NULL, 16); + if ((gcLimit > 0) && (gcLimit <= 100)) + restricted_limit = (gcLimit * restricted_limit) / 100; + else + restricted_limit = (3 * restricted_limit) / 4; /* Use 75% limit of container */ + } else { + restricted_limit = (3 * restricted_limit) / 4; /* Use 75% limit of container */ + } + } + return (restricted_limit < 209715200 ? 209715200 : /* Use at least 20MB */ + (restricted_limit < memsize ? restricted_limit : memsize)); + } + +#endif + + return memsize; #else return 134217728; #endif @@ -341,6 +379,9 @@ mono_determine_physical_ram_available_size (void) host_page_size (host, &page_size); return (guint64) vmstat.free_count * page_size; +#elif HAVE_CGROUP_SUPPORT + return (mono_get_memory_avail()); + #elif defined (HAVE_SYSCONF) guint64 page_size = 0, num_pages = 0; diff --git a/mono/utils/memfuncs.h b/mono/utils/memfuncs.h index bc8c578bcb2..5af5356f6ca 100644 --- a/mono/utils/memfuncs.h +++ b/mono/utils/memfuncs.h @@ -23,5 +23,10 @@ void mono_gc_memmove_atomic (void *dest, const void *src, size_t size); void mono_gc_memmove_aligned (void *dest, const void *src, size_t size); guint64 mono_determine_physical_ram_size (void); guint64 mono_determine_physical_ram_available_size (void); +#if HAVE_CGROUP_SUPPORT +size_t mono_get_restricted_memory_limit(void); +gboolean mono_get_memory_used(size_t *); +size_t mono_get_memory_avail(void); +#endif #endif diff --git a/mono/utils/mono-cgroup.c b/mono/utils/mono-cgroup.c new file mode 100644 index 00000000000..dd22ffbb812 --- /dev/null +++ b/mono/utils/mono-cgroup.c @@ -0,0 +1,964 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/*++ + +Module Name: + + mono-cgroup.c + +Abstract: + Read the memory limit for the current process + + Adapted from runtime src/coreclr/gc/unix/cgroup.cpp + - commit 28ec20194010c2a3d06f2217998cfcb8e8b8fb5e +--*/ +#ifdef __FreeBSD__ +#define _WITH_GETLINE +#endif + +#include <config.h> +#include <mono/utils/mono-compiler.h> + +#if HAVE_CGROUP_SUPPORT + +#include <unistd.h> +#include <stdlib.h> +#include <stdint.h> +#include <glib.h> +#include <stdio.h> +#include <string.h> +#include <sys/resource.h> +#if defined(__APPLE__) || defined(__FreeBSD__) +#include <sys/param.h> +#include <sys/mount.h> +#else +#include <sys/vfs.h> +#endif +#include <errno.h> +#include <limits.h> + +#include <utils/mono-logger-internals.h> + +#ifndef SIZE_T_MAX +# define SIZE_T_MAX (~(size_t)0) +#endif + +#define CGROUP2_SUPER_MAGIC 0x63677270 +#define TMPFS_MAGIC 0x01021994 + +#define PROC_MOUNTINFO_FILENAME "/proc/self/mountinfo" +#define PROC_CGROUP_FILENAME "/proc/self/cgroup" +#define PROC_STATM_FILENAME "/proc/self/statm" +#define CGROUP1_MEMORY_LIMIT_FILENAME "/memory.limit_in_bytes" +#define CGROUP2_MEMORY_LIMIT_FILENAME "/memory.max" +#define CGROUP_MEMORY_STAT_FILENAME "/memory.stat" +#define CGROUP1_CFS_QUOTA_FILENAME "/cpu.cfs_quota_us" +#define CGROUP1_CFS_PERIOD_FILENAME "/cpu.cfs_period_us" +#define CGROUP2_CPU_MAX_FILENAME "/cpu.max" + +static void initialize(void); +static gboolean readMemoryValueFromFile(const char *, size_t *); +static gboolean getPhysicalMemoryLimit(size_t *); +static gboolean getPhysicalMemoryUsage(size_t *); +static int findCGroupVersion(void); +static gboolean isCGroup1MemorySubsystem(const char *); +static gboolean isCGroup1CpuSubsystem(const char *); +static char *findCGroupPath(gboolean (*is_subsystem)(const char *)); +static void findHierarchyMount(gboolean (*is_subsystem)(const char *), char **, char **); +static char *findCGroupPathForSubsystem(gboolean (*is_subsystem)(const char *)); +static gboolean getCGroupMemoryLimit(size_t *, const char *); +static gboolean getCGroupMemoryUsage(size_t *); +static size_t getPhysicalMemoryTotal(size_t); +static long long readCpuCGroupValue(const char *); +static void computeCpuLimit(long long, long long, guint32 *); + +size_t mono_get_restricted_memory_limit(void); +gboolean mono_get_memory_used(size_t *); +size_t mono_get_memory_avail(void); +gboolean mono_get_cpu_limit(guint *); +static gboolean readLongLongValueFromFile(const char *, long long *); + +// the cgroup version number or 0 to indicate cgroups are not found or not enabled +static int s_cgroup_version = -1; + +static char *s_memory_cgroup_path = NULL; +static char *s_cpu_cgroup_path = NULL; + +static const char *s_mem_stat_key_names[4]; +static size_t s_mem_stat_key_lengths[4]; +static size_t s_mem_stat_n_keys = 0; +static long pageSize; + +/** + * @brief Initialize variables used by the calculation routines. + * + */ +static void +initialize() +{ + s_cgroup_version = findCGroupVersion(); + s_memory_cgroup_path = findCGroupPath(s_cgroup_version == 1 ? &isCGroup1MemorySubsystem : NULL); + s_cpu_cgroup_path = findCGroupPath(s_cgroup_version == 1 ? &isCGroup1CpuSubsystem : NULL); + + if (s_cgroup_version == 0) + return; + + if (s_cgroup_version == 1) { + s_mem_stat_n_keys = 4; + s_mem_stat_key_names[0] = "total_inactive_anon "; + s_mem_stat_key_names[1] = "total_active_anon "; + s_mem_stat_key_names[2] = "total_dirty "; + s_mem_stat_key_names[3] = "total_unevictable "; + } else { + s_mem_stat_n_keys = 3; + s_mem_stat_key_names[0] = "anon "; + s_mem_stat_key_names[1] = "file_dirty "; + s_mem_stat_key_names[2] = "unevictable "; + } + + for (size_t i = 0; i < s_mem_stat_n_keys; i++) + s_mem_stat_key_lengths[i] = strlen(s_mem_stat_key_names[i]); + + pageSize = sysconf(_SC_PAGE_SIZE); +} + +/** + * + * @brief Read a value from a specified /sys/fs/cgroup/memory file + * + * @param[in] filename - name of file containing value + * @param[out] val - pointer to the result area + * @returns True or False depending if value was found + * + */ +static gboolean +readMemoryValueFromFile(const char* filename, size_t* val) +{ + gboolean result = FALSE; + char *line = NULL; + size_t lineLen = 0; + char *endptr = NULL; + FILE *file = NULL; + + if (val != NULL) { + file = fopen(filename, "r"); + if (file != NULL) { + if (getline(&line, &lineLen, file) != -1) { + errno = 0; + *val = strtoull(line, &endptr, 0); + result = TRUE; + } + } + } + + if (file) + fclose(file); + free(line); + return result; +} + +/** + * + * @brief Interrogate the cgroup memory values to determine if there's + * a limit on physical memory. + * + * @param[out] val - pointer to the result area + * @returns True or False depending if a limit was found + * + */ +static gboolean +getPhysicalMemoryLimit(size_t *val) +{ + if (s_cgroup_version == 0) + return FALSE; + else if (s_cgroup_version == 1) + return getCGroupMemoryLimit(val, CGROUP1_MEMORY_LIMIT_FILENAME); + else if (s_cgroup_version == 2) + return getCGroupMemoryLimit(val, CGROUP2_MEMORY_LIMIT_FILENAME); + else { + mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_CONFIG, + "Unknown cgroup version."); + return FALSE; + } +} + +/** + * + * @brief Interrogate the cgroup memory values to determine how much + * memory is in use. + * + * @param[out] val - pointer to the result area + * @returns True or False depending if a usage value was found + * + */ +static gboolean +getPhysicalMemoryUsage(size_t *val) +{ + if (s_cgroup_version == 0) + return FALSE; + else if (s_cgroup_version == 1) + return getCGroupMemoryUsage(val); + else if (s_cgroup_version == 2) + return getCGroupMemoryUsage(val); + else { + mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_CONFIG, + "Unknown cgroup version."); + return FALSE; + } +} + +/** + * + * @brief Inspect the /sys/fs/cgroup hierachy to determine what version of + * group we are using + * + * @returns cgroup version + * + */ +static int +findCGroupVersion() +{ + // It is possible to have both cgroup v1 and v2 enabled on a system. + // Most non-bleeding-edge Linux distributions fall in this group. We + // look at the file system type of /sys/fs/cgroup to determine which + // one is the default. For more details, see: + // https://systemd.io/CGROUP_DELEGATION/#three-different-tree-setups- + // We dont care about the difference between the "legacy" and "hybrid" + // modes because both of those involve cgroup v1 controllers managing + // resources. + + + struct statfs stats; + int result = statfs("/sys/fs/cgroup", &stats); + if (result != 0) + return 0; + + switch (stats.f_type) { + case TMPFS_MAGIC: return 1; + case CGROUP2_SUPER_MAGIC: return 2; + default: return 0; + } +} + +/** + * + * @brief Check if we've found the memory component of /sys/fs/cgroup + * + * @param[in] strTok - Token for comparison + * @returns True if token matches "memory" + * + */ +static gboolean +isCGroup1MemorySubsystem(const char *strTok) +{ + return strcmp("memory", strTok) == 0; +} + +/** + * + * @brief Check if we've found the CPU component of /sys/fs/cgroup + * + * @param[in] strTok - Token for comparison + * @returns True if token matches "cpu" + * + */ +static gboolean +isCGroup1CpuSubsystem(const char *strTok) +{ + return strcmp("cpu", strTok) == 0; +} + +/** + * + * @brief Navigate the /sys/fs/cgroup to try and find the correct cgroup path + * + * @param[in] is_subsystem - Function used to compare tokens + * @returns Path to cgroup + * + */ +static char * +findCGroupPath(gboolean (*is_subsystem)(const char *)) +{ + char *cgroup_path = NULL; + char *hierarchy_mount = NULL; + char *hierarchy_root = NULL; + char *cgroup_path_relative_to_mount = NULL; + size_t common_path_prefix_len; + + findHierarchyMount(is_subsystem, &hierarchy_mount, &hierarchy_root); + if (hierarchy_mount != NULL && hierarchy_root != NULL) { + + cgroup_path_relative_to_mount = findCGroupPathForSubsystem(is_subsystem); + if (cgroup_path_relative_to_mount != NULL) { + + cgroup_path = (char*)malloc(strlen(hierarchy_mount) + strlen(cgroup_path_relative_to_mount) + 1); + if (cgroup_path != NULL) { + + strcpy(cgroup_path, hierarchy_mount); + // For a host cgroup, we need to append the relative path. + // The root and cgroup path can share a common prefix of the path that should not be appended. + // Example 1 (docker): + // hierarchy_mount: /sys/fs/cgroup/cpu + // hierarchy_root: /docker/87ee2de57e51bc75175a4d2e81b71d162811b179d549d6601ed70b58cad83578 + // cgroup_path_relative_to_mount: /docker/87ee2de57e51bc75175a4d2e81b71d162811b179d549d6601ed70b58cad83578/my_named_cgroup + // append do the cgroup_path: /my_named_cgroup + // final cgroup_path: /sys/fs/cgroup/cpu/my_named_cgroup + // + // Example 2 (out of docker) + // hierarchy_mount: /sys/fs/cgroup/cpu + // hierarchy_root: / + // cgroup_path_relative_to_mount: /my_named_cgroup + // append do the cgroup_path: /my_named_cgroup + // final cgroup_path: /sys/fs/cgroup/cpu/my_named_cgroup + common_path_prefix_len = strlen(hierarchy_root); + if ((common_path_prefix_len == 1) || + (strncmp(hierarchy_root, cgroup_path_relative_to_mount, common_path_prefix_len) != 0)) + common_path_prefix_len = 0; + + g_assert((cgroup_path_relative_to_mount[common_path_prefix_len] == '/') || + (cgroup_path_relative_to_mount[common_path_prefix_len] == '\0')); + + strcat(cgroup_path, cgroup_path_relative_to_mount + common_path_prefix_len); + } + } + } + + free(hierarchy_mount); + free(hierarchy_root); + free(cgroup_path_relative_to_mount); + return cgroup_path; +} + +/** + * + * @brief Check the /proc filesystem to determine the root and mount + * path of /sys/fs/cgroup data + * + * @param[in] is_subsystem - Comparison function + * @param[out] pmountpath - + * @param[out] pmountroot - + * + */ +static void +findHierarchyMount(gboolean (*is_subsystem)(const char *), char** pmountpath, char** pmountroot) +{ + char *line = NULL; + size_t lineLen = 0, maxLineLen = 0; + char *filesystemType = NULL; + char *options = NULL; + char *mountpath = NULL; + char *mountroot = NULL; + + FILE *mountinfofile = fopen(PROC_MOUNTINFO_FILENAME, "r"); + if (mountinfofile == NULL) + goto done; + + while (getline(&line, &lineLen, mountinfofile) != -1) { + if (filesystemType == NULL || lineLen > maxLineLen) { + free(filesystemType); + filesystemType = NULL; + free(options); + options = NULL; + filesystemType = (char*)malloc(lineLen+1); + if (filesystemType == NULL) + goto done; + options = (char*)malloc(lineLen+1); + if (options == NULL) + goto done; + maxLineLen = lineLen; + } + + char *separatorChar = strstr(line, " - "); + + // See man page of proc to get format for /proc/self/mountinfo file + int sscanfRet = sscanf(separatorChar, + " - %s %*s %s", + filesystemType, + options); + if (sscanfRet != 2) { + mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_CONFIG, "Failed to parse mount info file contents with sscanf."); + goto done; + } + + if (strncmp(filesystemType, "cgroup", 6) == 0) { + gboolean isSubsystemMatch = is_subsystem == NULL; + if (!isSubsystemMatch) { + char *context = NULL; + char *strTok = strtok_r(options, ",", &context); + while (!isSubsystemMatch && strTok != NULL) + { + isSubsystemMatch = is_subsystem(strTok); + strTok = strtok_r(NULL, ",", &context); + } + } + if (isSubsystemMatch) { + mountpath = (char*)malloc(lineLen+1); + if (mountpath == NULL) + goto done; + mountroot = (char*)malloc(lineLen+1); + if (mountroot == NULL) + goto done; + + sscanfRet = sscanf(line, + "%*s %*s %*s %s %s ", + mountroot, + mountpath); + if (sscanfRet != 2) + mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_CONFIG, + "Failed to parse mount info file contents with sscanf."); + + // assign the output arguments and clear the locals so we don't free them. + *pmountpath = mountpath; + *pmountroot = mountroot; + mountpath = mountroot = NULL; + } + } + } +done: + free(mountpath); + free(mountroot); + free(filesystemType); + free(options); + free(line); + if (mountinfofile) + fclose(mountinfofile); +} + +/** + * + * @brief + * Check the /proc filesystem to determine the root and mount path + * of /sys/fs/cgroup data + * + * @param[in] is_subsystem - Comparison function + * @returns cgroup path for the memory subsystem + * + */ +static char * +findCGroupPathForSubsystem(gboolean (*is_subsystem)(const char *)) +{ + char *line = NULL; + size_t lineLen = 0; + size_t maxLineLen = 0; + char *subsystem_list = NULL; + char *cgroup_path = NULL; + gboolean result = FALSE; + + FILE *cgroupfile = fopen(PROC_CGROUP_FILENAME, "r"); + if (cgroupfile == NULL) + goto done; + + while (!result && getline(&line, &lineLen, cgroupfile) != -1) { + if (subsystem_list == NULL || lineLen > maxLineLen) { + free(subsystem_list); + subsystem_list = NULL; + free(cgroup_path); + cgroup_path = NULL; + subsystem_list = (char*)malloc(lineLen+1); + if (subsystem_list == NULL) + goto done; + cgroup_path = (char*)malloc(lineLen+1); + if (cgroup_path == NULL) + goto done; + maxLineLen = lineLen; + } + + if (s_cgroup_version == 1) { + // See man page of proc to get format for /proc/self/cgroup file + int sscanfRet = sscanf(line, + "%*[^:]:%[^:]:%s", + subsystem_list, + cgroup_path); + if (sscanfRet != 2) { + mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_CONFIG, + "Failed to parse cgroup info file contents with sscanf."); + goto done; + } + + char* context = NULL; + char* strTok = strtok_r(subsystem_list, ",", &context); + while (strTok != NULL) { + if (is_subsystem(strTok)) { + result = TRUE; + break; + } + strTok = strtok_r(NULL, ",", &context); + } + } else if (s_cgroup_version == 2) { + // See https://www.kernel.org/doc/Documentation/cgroup-v2.txt + // Look for a "0::/some/path" + int sscanfRet = sscanf(line, + "0::%s", + cgroup_path); + if (sscanfRet == 1) + { + result = TRUE; + } + } else { + mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_CONFIG, + "Unknown cgroup version in mountinfo."); + goto done; + } + } +done: + free(subsystem_list); + if (!result) { + free(cgroup_path); + cgroup_path = NULL; + } + free(line); + if (cgroupfile) + fclose(cgroupfile); + return cgroup_path; +} + +/** + * + * @brief Extract memory limit from specified /sys/fs/cgroup/memory file + * + * @param[out] val - Memory limit + * @param[in] filename - name of file from which to extract limit + * @returns True if value found + * + */ +static gboolean +getCGroupMemoryLimit(size_t *val, const char *filename) +{ + if (s_memory_cgroup_path == NULL) + return FALSE; + + char* mem_limit_filename = NULL; + if (asprintf(&mem_limit_filename, "%s%s", s_memory_cgroup_path, filename) < 0) + return FALSE; + + gboolean result = readMemoryValueFromFile(mem_limit_filename, val); + free(mem_limit_filename); + return result; +} + +/** + * + * @brief Extract memory usage from /sys/fs/cgroup/memory.stat file + * + * @param[out] val - Memory limit + * @returns True if value found + * + */ +static gboolean +getCGroupMemoryUsage(size_t *val) +{ + if (s_memory_cgroup_path == NULL) + return FALSE; + + char *stat_filename = NULL; + if (asprintf(&stat_filename, "%s%s", s_memory_cgroup_path, CGROUP_MEMORY_STAT_FILENAME) < 0) + return FALSE; + + FILE *stat_file = fopen(stat_filename, "r"); + free(stat_filename); + if (stat_file == NULL) + return FALSE; + + char *line = NULL; + size_t lineLen = 0; + size_t readValues = 0; + char *endptr; + + *val = 0; + while (getline(&line, &lineLen, stat_file) != -1 && readValues < s_mem_stat_n_keys) { + for (size_t i = 0; i < s_mem_stat_n_keys; i++) { + if (strncmp(line, s_mem_stat_key_names[i], s_mem_stat_key_lengths[i]) == 0) { + errno = 0; + const char *startptr = line + s_mem_stat_key_lengths[i]; + *val += strtoll(startptr, &endptr, 10); + if (endptr != startptr && errno == 0) + readValues++; + + break; + } + } + } + + fclose(stat_file); + free(line); + + if (readValues == s_mem_stat_n_keys) + return TRUE; + + return FALSE; +} + +/** + * + * @brief Determine if there are any limits on memory and return the value + * + * @returns Physical memory limit + * + * Zero represents no limit. + */ +size_t +mono_get_restricted_memory_limit() +{ + size_t physical_memory_limit = 0; + + if (s_cgroup_version == -1) + initialize(); + + if (s_cgroup_version == 0) + return 0; + + if (!getPhysicalMemoryLimit(&physical_memory_limit)) + return 0; + + // If there's no memory limit specified on the container this + // actually returns 0x7FFFFFFFFFFFF000 (2^63-1 rounded down to + // 4k which is a common page size). So we know we are not + // running in a memory restricted environment. + if (physical_memory_limit > 0x7FFFFFFF00000000) + return 0; + + return (getPhysicalMemoryTotal(physical_memory_limit)); +} + +/** + * + * @brief Check the input limit against any system limits or actual memory on system + * + * @param[in] physical_memory_limit - The max memory on the system + * @returns Physical memory total + * + */ +static size_t +getPhysicalMemoryTotal(size_t physical_memory_limit) +{ + struct rlimit curr_rlimit; + size_t rlimit_soft_limit = (size_t)RLIM_INFINITY; + if (getrlimit(RLIMIT_AS, &curr_rlimit) == 0) + rlimit_soft_limit = curr_rlimit.rlim_cur; + physical_memory_limit = (physical_memory_limit < rlimit_soft_limit) ? + physical_memory_limit : rlimit_soft_limit; + + // Ensure that limit is not greater than real memory size + long pages = sysconf(_SC_PHYS_PAGES); + if (pages != -1) { + if (pageSize != -1) { + physical_memory_limit = (physical_memory_limit < (size_t)pages * pageSize) ? + physical_memory_limit : (size_t)pages * pageSize; + } + } + + if (physical_memory_limit > ULONG_MAX) { + // It is observed in practice when the memory is unrestricted, Linux control + // group returns a physical limit that is bigger than the address space + return ULONG_MAX; + } else + return (size_t)physical_memory_limit; +} + +/** + * + * @brief Determine the amount of memory in use + * + * @param[out] val - pointer to the memory usage value + * @returns True if we are able to determine usage + * + */ +gboolean +mono_get_memory_used(size_t *val) +{ + gboolean result = FALSE; + size_t linelen; + char *line = NULL; + + if (val == NULL) + return FALSE; + + // Linux uses cgroup usage to trigger oom kills. + if (getPhysicalMemoryUsage(val)) + return TRUE; + + // process resident set size. + FILE* file = fopen(PROC_STATM_FILENAME, "r"); + if (file != NULL && getline(&line, &linelen, file) != -1) { + char* context = NULL; + char* strTok = strtok_r(line, " ", &context); + strTok = strtok_r(NULL, " ", &context); + + errno = 0; + *val = strtoull(strTok, NULL, 0); + if (errno == 0) { + if (pageSize != -1) { + *val = *val * pageSize; + result = TRUE; + } + } + } + + if (file) + fclose(file); + free(line); + return result; +} + +/** + * + * @brief Determine the amount of memory available by examininig any + * limits and checking what memory is in use. + * + * @returns Amount of memory available + * + */ +size_t +mono_get_memory_avail() +{ + size_t max, used, avail, sysAvail; +#ifdef _SC_AVPHYS_PAGES // If this isn't defined then we don't get called + + max = mono_get_restricted_memory_limit(); + + if (max == 0) + max = getPhysicalMemoryTotal(ULONG_MAX); + + if (mono_get_memory_used(&used)) + avail = max - used; + else + avail = max; + + sysAvail = sysconf(_SC_AVPHYS_PAGES) * pageSize; + return (avail < sysAvail ? avail : sysAvail); +#else + return (0); +#endif +} + +/** + * + * @brief Return any limits on CPU use + * + * @returns Number of CPU usable + * + */ +static gboolean +getCGroup1CpuLimit(guint32 *val) +{ + long long quota; + long long period; + + quota = readCpuCGroupValue(CGROUP1_CFS_QUOTA_FILENAME); + if (quota <= 0) + return FALSE; + + period = readCpuCGroupValue(CGROUP1_CFS_PERIOD_FILENAME); + if (period <= 0) + return FALSE; + + computeCpuLimit(period, quota, val); + + return TRUE; +} + +/** + * + * @brief Return any limits on CPU use + * + * @returns Number of CPU usable + * + */ +static gboolean +getCGroup2CpuLimit(guint32 *val) +{ + char *filename = NULL; + FILE *file = NULL; + char *endptr = NULL; + char *max_quota_string = NULL; + char *period_string = NULL; + char *context = NULL; + char *line = NULL; + size_t lineLen = 0; + + long long quota = 0; + long long period = 0; + + + gboolean result = FALSE; + + if (s_cpu_cgroup_path == NULL) + return FALSE; + + if (asprintf(&filename, "%s%s", s_cpu_cgroup_path, CGROUP2_CPU_MAX_FILENAME) < 0) + return FALSE; + + file = fopen(filename, "r"); + if (file == NULL) + goto done; + + if (getline(&line, &lineLen, file) == -1) + goto done; + + // The expected format is: + // $MAX $PERIOD + // Where "$MAX" may be the string literal "max" + + max_quota_string = strtok_r(line, " ", &context); + if (max_quota_string == NULL) + { + mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_CONFIG, + "Unable to parse " CGROUP2_CPU_MAX_FILENAME " file contents."); + goto done; + } + period_string = strtok_r(NULL, " ", &context); + if (period_string == NULL) + { + mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_CONFIG, + "Unable to parse " CGROUP2_CPU_MAX_FILENAME " file contents."); + goto done; + } + + // "max" means no cpu limit + if (strcmp("max", max_quota_string) == 0) + goto done; + + errno = 0; + quota = strtoll(max_quota_string, &endptr, 10); + if (max_quota_string == endptr || errno != 0) + goto done; + + period = strtoll(period_string, &endptr, 10); + if (period_string == endptr || errno != 0) + goto done; + + computeCpuLimit(period, quota, val); + result = TRUE; + +done: + if (file) + fclose(file); + free(filename); + free(line); + + return result; +} + +/** + * + * @brief Compute the CPU limit based on the CGroup data + * + * @param[in] period + * @param[in] quota - Limit found in sysfs + * @param[out] Number of CPU usable + * + */ +static void +computeCpuLimit(long long period, long long quota, uint32_t *val) +{ + // Cannot have less than 1 CPU + if (quota <= period) { + *val = 1; + return; + } + + // Calculate cpu count based on quota and round it up + double cpu_count = (double) quota / period + 0.999999999; + *val = (cpu_count < UINT32_MAX) ? (uint32_t)cpu_count : UINT32_MAX; +} + +/** + * + * @brief Read the CGroup CPU data from sysfs + * + * @param[in] subsystemFileName - sysfs File containing data + * @returns CPU CGroup value + * + */ +static long long +readCpuCGroupValue(const char *subsystemFilename) +{ + char *filename = NULL; + gboolean result = FALSE; + long long val = -1; + + if (s_cpu_cgroup_path == NULL) + return -1; + + if (asprintf(&filename, "%s%s", s_cpu_cgroup_path, subsystemFilename) < 0) + return -1; + + result = readLongLongValueFromFile(filename, &val); + free(filename); + if (!result) + return -1; + + return val; +} + +/** + * + * @brief Read a long long value from a file + * + * @param[in] fileName - sysfs File containing data + * @param[out] val - Value read + * @returns Success indicator + * + */ +static gboolean +readLongLongValueFromFile(const char *filename, long long *val) +{ + gboolean result = FALSE; + char *line = NULL; + size_t lineLen = 0; + char *endptr = NULL; + + if (val == NULL) + return FALSE; + + FILE *file = fopen(filename, "r"); + if (file == NULL) + return FALSE; + + if (getline(&line, &lineLen, file) != -1) { + errno = 0; + *val = strtoll(line, &endptr, 10); + if (line != endptr && errno == 0) + result = TRUE; + } + + fclose(file); + free(line); + return result; +} + +/** + * + * @brief Interrogate the cgroup CPU values to determine if there's + * a limit on CPUs + * + * @param[out] val - pointer to the result area + * @returns True or False depending if a limit was found + * + * Interrogate the cgroup CPU values to determine if there's + * a limit on CPUs + */ +gboolean +mono_get_cpu_limit(guint *val) +{ + if (s_cgroup_version == -1) + initialize(); + + if (s_cgroup_version == 0) + return FALSE; + else if (s_cgroup_version == 1) + return getCGroup1CpuLimit((guint32 *)val); + else if (s_cgroup_version == 2) + return getCGroup2CpuLimit((guint32 *)val); + else { + mono_trace (G_LOG_LEVEL_DEBUG, MONO_TRACE_CONFIG, + "Unknown cgroup version."); + return FALSE; + } +} +#else + +MONO_EMPTY_SOURCE_FILE (mono_cgroup); + +#endif diff --git a/mono/utils/mono-proclib-windows.c b/mono/utils/mono-proclib-windows.c index cc56ecbef1f..6b379c02c78 100644 --- a/mono/utils/mono-proclib-windows.c +++ b/mono/utils/mono-proclib-windows.c @@ -32,6 +32,16 @@ mono_cpu_count (void) return info.dwNumberOfProcessors; } +/** + * mono_cpu_limit: + * \returns the number of processors available to this process + */ +int +mono_cpu_limit (void) +{ + return mono_cpu_count(); +} + /* * This function returns the cpu usage in percentage, * normalized on the number of cores. diff --git a/mono/utils/mono-proclib.c b/mono/utils/mono-proclib.c index 64c583da9ee..912aa6e5cc8 100644 --- a/mono/utils/mono-proclib.c +++ b/mono/utils/mono-proclib.c @@ -838,9 +838,11 @@ mono_cpu_count (void) * [5] https://github.com/dotnet/coreclr/blob/7058273693db2555f127ce16e6b0c5b40fb04867/src/pal/src/misc/sysinfo.cpp#L148 */ + #if defined (_SC_NPROCESSORS_CONF) && defined (HAVE_SYSCONF) { - int count = sysconf (_SC_NPROCESSORS_CONF); + int count; + count = sysconf (_SC_NPROCESSORS_CONF); if (count > 0) return count; } @@ -879,6 +881,43 @@ mono_cpu_count (void) /* FIXME: warn */ return 1; } + +/** + * mono_cpu_limit: + * \returns the number of processors available to this process + */ +int +mono_cpu_limit (void) +{ + int count = 0; + static int limit = -1; /* Value will be cached for future calls */ + + /* + * If 1st time through then check if user has mandated a value and use it, + * otherwise we check for any cgroup limit and use the min of actual number + * and that limit + */ + if (limit == -1) { + char *dotnetProcCnt = getenv("DOTNET_PROCESSOR_COUNT"); + if (dotnetProcCnt != NULL) { + errno = 0; + limit = strtol(dotnetProcCnt, NULL, 0); + if ((errno == 0) && (limit > 0)) /* If it's in range and positive */ + return (limit); + } + limit = mono_cpu_count(); +#if HAVE_CGROUP_SUPPORT + if (mono_get_cpu_limit(&count)) + limit = (limit < count ? limit : count); +#endif + } + + /* + * Just return the cached value + */ + return (limit); + +} #endif /* !HOST_WIN32 */ static void @@ -892,7 +931,7 @@ get_cpu_times (int cpu_id, gint64 *user, gint64 *systemt, gint64 *irq, gint64 *s if (!f) return; if (cpu_id < 0) - uhz *= mono_cpu_count (); + uhz *= mono_cpu_limit (); while ((s = fgets (buf, sizeof (buf), f))) { char *data = NULL; if (cpu_id < 0 && strncmp (s, "cpu", 3) == 0 && g_ascii_isspace (s [3])) { @@ -1134,7 +1173,7 @@ mono_cpu_usage (MonoCpuUsageState *prev) user_time = resource_usage.ru_utime.tv_sec * 1000 * 1000 * 10 + resource_usage.ru_utime.tv_usec * 10; cpu_busy_time = (user_time - (prev ? prev->user_time : 0)) + (kernel_time - (prev ? prev->kernel_time : 0)); - cpu_total_time = (current_time - (prev ? prev->current_time : 0)) * mono_cpu_count (); + cpu_total_time = (current_time - (prev ? prev->current_time : 0)) * mono_cpu_limit (); if (prev) { prev->kernel_time = kernel_time; diff --git a/mono/utils/mono-proclib.h b/mono/utils/mono-proclib.h index 01c93526c12..496973e0cd0 100644 --- a/mono/utils/mono-proclib.h +++ b/mono/utils/mono-proclib.h @@ -73,9 +73,12 @@ gint64 mono_process_get_data_with_error (gpointer pid, MonoProcessData data, int mono_process_current_pid (void); MONO_API int mono_cpu_count (void); +MONO_API int mono_cpu_limit (void); gint64 mono_cpu_get_data (int cpu_id, MonoCpuData data, MonoProcessError *error); gint32 mono_cpu_usage (MonoCpuUsageState *prev); +gboolean mono_get_cpu_limit(int *); + int mono_atexit (void (*func)(void)); #ifndef HOST_WIN32 |