diff options
Diffstat (limited to 'src/System.Private.CoreLib/shared/System/Buffers/TlsOverPerCoreLockedStacksArrayPool.cs')
-rw-r--r-- | src/System.Private.CoreLib/shared/System/Buffers/TlsOverPerCoreLockedStacksArrayPool.cs | 216 |
1 files changed, 210 insertions, 6 deletions
diff --git a/src/System.Private.CoreLib/shared/System/Buffers/TlsOverPerCoreLockedStacksArrayPool.cs b/src/System.Private.CoreLib/shared/System/Buffers/TlsOverPerCoreLockedStacksArrayPool.cs index 64c5cebe8..96efb0d52 100644 --- a/src/System.Private.CoreLib/shared/System/Buffers/TlsOverPerCoreLockedStacksArrayPool.cs +++ b/src/System.Private.CoreLib/shared/System/Buffers/TlsOverPerCoreLockedStacksArrayPool.cs @@ -2,9 +2,12 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. -using Microsoft.Win32; +using System; +using System.Collections.Generic; using System.Runtime.CompilerServices; using System.Threading; +using Internal.Runtime.Augments; +using Internal.Runtime.CompilerServices; namespace System.Buffers { @@ -22,7 +25,6 @@ namespace System.Buffers { // TODO: #7747: "Investigate optimizing ArrayPool heuristics" // - Explore caching in TLS more than one array per size per thread, and moving stale buffers to the global queue. - // - Explore dumping stale buffers from the global queue, similar to PinnableBufferCache (maybe merging them). // - Explore changing the size of each per-core bucket, potentially dynamically or based on other factors like array size. // - Explore changing number of buckets and what sizes of arrays are cached. // - Investigate whether false sharing is causing any issues, in particular on LockedStack's count and the contents of its array. @@ -46,6 +48,15 @@ namespace System.Buffers [ThreadStatic] private static T[][] t_tlsBuckets; + private int _callbackCreated; + + private readonly static bool s_trimBuffers = GetTrimBuffers(); + + /// <summary> + /// Used to keep track of all thread local buckets for trimming if needed + /// </summary> + private static readonly ConditionalWeakTable<T[][], object> s_allTlsBuckets = s_trimBuffers ? new ConditionalWeakTable<T[][], object>() : null; + /// <summary>Initialize the pool.</summary> public TlsOverPerCoreLockedStacksArrayPool() { @@ -180,15 +191,24 @@ namespace System.Buffers { t_tlsBuckets = tlsBuckets = new T[NumBuckets][]; tlsBuckets[bucketIndex] = array; + if (s_trimBuffers) + { + s_allTlsBuckets.Add(tlsBuckets, null); + if (Interlocked.Exchange(ref _callbackCreated, 1) != 1) + { + Gen2GcCallback.Register(Gen2GcCallbackFunc, this); + } + } } else { T[] prev = tlsBuckets[bucketIndex]; tlsBuckets[bucketIndex] = array; + if (prev != null) { - PerCoreLockedStacks bucket = _buckets[bucketIndex] ?? CreatePerCoreLockedStacks(bucketIndex); - bucket.TryPush(prev); + PerCoreLockedStacks stackBucket = _buckets[bucketIndex] ?? CreatePerCoreLockedStacks(bucketIndex); + stackBucket.TryPush(prev); } } } @@ -201,6 +221,103 @@ namespace System.Buffers } } + public bool Trim() + { + int milliseconds = Environment.TickCount; + MemoryPressure pressure = GetMemoryPressure(); + + ArrayPoolEventSource log = ArrayPoolEventSource.Log; + if (log.IsEnabled()) + log.BufferTrimPoll(milliseconds, (int)pressure); + + foreach (PerCoreLockedStacks bucket in _buckets) + { + bucket?.Trim((uint)milliseconds, Id, pressure, _bucketArraySizes); + } + + if (pressure == MemoryPressure.High) + { + // Under high pressure, release all thread locals + if (log.IsEnabled()) + { + foreach (KeyValuePair<T[][], object> tlsBuckets in s_allTlsBuckets) + { + T[][] buckets = tlsBuckets.Key; + for (int i = 0; i < buckets.Length; i++) + { + T[] buffer = Interlocked.Exchange(ref buckets[i], null); + if (buffer != null) + { + // As we don't want to take a perf hit in the rent path it + // is possible that a buffer could be rented as we "free" it. + log.BufferTrimmed(buffer.GetHashCode(), buffer.Length, Id); + } + } + } + } + else + { + foreach (KeyValuePair<T[][], object> tlsBuckets in s_allTlsBuckets) + { + T[][] buckets = tlsBuckets.Key; + Array.Clear(buckets, 0, buckets.Length); + } + } + } + + return true; + } + + /// <summary> + /// This is the static function that is called from the gen2 GC callback. + /// The input object is the instance we want the callback on. + /// </summary> + /// <remarks> + /// The reason that we make this function static and take the instance as a parameter is that + /// we would otherwise root the instance to the Gen2GcCallback object, leaking the instance even when + /// the application no longer needs it. + /// </remarks> + private static bool Gen2GcCallbackFunc(object target) + { + return ((TlsOverPerCoreLockedStacksArrayPool<T>)(target)).Trim(); + } + + private enum MemoryPressure + { + Low, + Medium, + High + } + + private static MemoryPressure GetMemoryPressure() + { + const double HighPressureThreshold = .90; // Percent of GC memory pressure threshold we consider "high" + const double MediumPressureThreshold = .70; // Percent of GC memory pressure threshold we consider "medium" + + GC.GetMemoryInfo(out uint threshold, out _, out uint lastLoad, out _, out _); + if (lastLoad >= threshold * HighPressureThreshold) + { + return MemoryPressure.High; + } + else if (lastLoad >= threshold * MediumPressureThreshold) + { + return MemoryPressure.Medium; + } + return MemoryPressure.Low; + } + + private static bool GetTrimBuffers() + { + // Environment uses ArrayPool, so we have to hit the API directly. +#if !CORECLR + // P/Invokes are different for CoreCLR/RT- for RT we'll not allow + // enabling/disabling for now. + return true; +#else + return CLRConfig.GetBoolValueWithFallbacks("System.Buffers.ArrayPool.TrimShared", "DOTNET_SYSTEM_BUFFERS_ARRAYPOOL_TRIMSHARED", defaultValue: true); +#endif + } + /// <summary> /// Stores a set of stacks of arrays, with one stack per core. /// </summary> @@ -228,7 +345,7 @@ namespace System.Buffers // Try to push on to the associated stack first. If that fails, // round-robin through the other stacks. LockedStack[] stacks = _perCoreStacks; - int index = Environment.CurrentExecutionId % stacks.Length; + int index = RuntimeThread.GetCurrentProcessorId() % stacks.Length; for (int i = 0; i < stacks.Length; i++) { if (stacks[index].TryPush(array)) return; @@ -244,7 +361,7 @@ namespace System.Buffers // round-robin through the other stacks. T[] arr; LockedStack[] stacks = _perCoreStacks; - int index = Environment.CurrentExecutionId % stacks.Length; + int index = RuntimeThread.GetCurrentProcessorId() % stacks.Length; for (int i = 0; i < stacks.Length; i++) { if ((arr = stacks[index].TryPop()) != null) return arr; @@ -252,6 +369,16 @@ namespace System.Buffers } return null; } + + public bool Trim(uint tickCount, int id, MemoryPressure pressure, int[] bucketSizes) + { + LockedStack[] stacks = _perCoreStacks; + for (int i = 0; i < stacks.Length; i++) + { + stacks[i].Trim(tickCount, id, pressure, bucketSizes[i]); + } + return true; + } } /// <summary>Provides a simple stack of arrays, protected by a lock.</summary> @@ -259,6 +386,7 @@ namespace System.Buffers { private readonly T[][] _arrays = new T[MaxBuffersPerArraySizePerCore][]; private int _count; + private uint _firstStackItemMS; [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool TryPush(T[] array) @@ -267,6 +395,12 @@ namespace System.Buffers Monitor.Enter(this); if (_count < MaxBuffersPerArraySizePerCore) { + if (s_trimBuffers && _count == 0) + { + // Stash the time the bottom of the stack was filled + _firstStackItemMS = (uint)Environment.TickCount; + } + _arrays[_count++] = array; enqueued = true; } @@ -287,6 +421,76 @@ namespace System.Buffers Monitor.Exit(this); return arr; } + + public void Trim(uint tickCount, int id, MemoryPressure pressure, int bucketSize) + { + const uint StackTrimAfterMS = 60 * 1000; // Trim after 60 seconds for low/moderate pressure + const uint StackHighTrimAfterMS = 10 * 1000; // Trim after 10 seconds for high pressure + const uint StackRefreshMS = StackTrimAfterMS / 4; // Time bump after trimming (1/4 trim time) + const int StackLowTrimCount = 1; // Trim one item when pressure is low + const int StackMediumTrimCount = 2; // Trim two items when pressure is moderate + const int StackHighTrimCount = MaxBuffersPerArraySizePerCore; // Trim all items when pressure is high + const int StackLargeBucket = 16384; // If the bucket is larger than this we'll trim an extra when under high pressure + const int StackModerateTypeSize = 16; // If T is larger than this we'll trim an extra when under high pressure + const int StackLargeTypeSize = 32; // If T is larger than this we'll trim an extra (additional) when under high pressure + + if (_count == 0) + return; + uint trimTicks = pressure == MemoryPressure.High ? StackHighTrimAfterMS : StackTrimAfterMS; + + lock (this) + { + if (_count > 0 && _firstStackItemMS > tickCount || (tickCount - _firstStackItemMS) > trimTicks) + { + // We've wrapped the tick count or elapsed enough time since the + // first item went into the stack. Drop the top item so it can + // be collected and make the stack look a little newer. + + ArrayPoolEventSource log = ArrayPoolEventSource.Log; + int trimCount = StackLowTrimCount; + switch (pressure) + { + case MemoryPressure.High: + trimCount = StackHighTrimCount; + + // When pressure is high, aggressively trim larger arrays. + if (bucketSize > StackLargeBucket) + { + trimCount++; + } + if (Unsafe.SizeOf<T>() > StackModerateTypeSize) + { + trimCount++; + } + if (Unsafe.SizeOf<T>() > StackLargeTypeSize) + { + trimCount++; + } + break; + case MemoryPressure.Medium: + trimCount = StackMediumTrimCount; + break; + } + + while (_count > 0 && trimCount-- > 0) + { + T[] array = _arrays[--_count]; + _arrays[_count] = null; + + if (log.IsEnabled()) + { + log.BufferTrimmed(array.GetHashCode(), array.Length, id); + } + } + + if (_count > 0 && _firstStackItemMS < uint.MaxValue - StackRefreshMS) + { + // Give the remaining items a bit more time + _firstStackItemMS += StackRefreshMS; + } + } + } + } } } } |