Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/dotnet/runtime.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKoundinya Veluri <kouvel@users.noreply.github.com>2021-07-13 18:59:20 +0300
committerGitHub <noreply@github.com>2021-07-13 18:59:20 +0300
commit4ff3762a2e08245a5f005beffeb42cca5c4b6c10 (patch)
treee2f3e18cee966f687bad55934a347f68b1641ae2 /src/coreclr/vm
parentb2a670b5421af99edad62f8cb5ae172cd8d05030 (diff)
Update spin-wait pause/yield normalization (#55295)
Update spin-wait pause/yield normalization - Modified the measurement to use much less time and to remeasure periodically to reduce CPU usage during startup - Each measurement does a low-microsecond-level measurement of pause/yield times - Some small amount of history of recent measurements is retained and used to for now take the lowest measurement for normalization - Measurements are done lazily, and at most every few seconds another measurement is taken - Added a profiling event that includes info about a measurement and the established value from recent measurements that is used for normalization
Diffstat (limited to 'src/coreclr/vm')
-rw-r--r--src/coreclr/vm/CMakeLists.txt2
-rw-r--r--src/coreclr/vm/ClrEtwAll.man27
-rw-r--r--src/coreclr/vm/ClrEtwAllMeta.lst8
-rw-r--r--src/coreclr/vm/comsynchronizable.cpp17
-rw-r--r--src/coreclr/vm/comsynchronizable.h2
-rw-r--r--src/coreclr/vm/ecalllist.h2
-rw-r--r--src/coreclr/vm/eventtrace.cpp6
-rw-r--r--src/coreclr/vm/finalizerthread.cpp5
-rw-r--r--src/coreclr/vm/threads.cpp9
-rw-r--r--src/coreclr/vm/yieldprocessornormalized.cpp305
10 files changed, 301 insertions, 82 deletions
diff --git a/src/coreclr/vm/CMakeLists.txt b/src/coreclr/vm/CMakeLists.txt
index 9c2cb3df0b7..f31e5a3ca12 100644
--- a/src/coreclr/vm/CMakeLists.txt
+++ b/src/coreclr/vm/CMakeLists.txt
@@ -136,7 +136,6 @@ set(VM_SOURCES_DAC_AND_WKS_COMMON
versionresilienthashcode.cpp
virtualcallstub.cpp
win32threadpool.cpp
- yieldprocessornormalized.cpp
zapsig.cpp
)
@@ -389,6 +388,7 @@ set(VM_SOURCES_WKS
threadsuspend.cpp
typeparse.cpp
weakreferencenative.cpp
+ yieldprocessornormalized.cpp
${VM_SOURCES_GDBJIT}
)
diff --git a/src/coreclr/vm/ClrEtwAll.man b/src/coreclr/vm/ClrEtwAll.man
index 45895f16fce..0eed049c17b 100644
--- a/src/coreclr/vm/ClrEtwAll.man
+++ b/src/coreclr/vm/ClrEtwAll.man
@@ -438,7 +438,13 @@
<opcode name="Profiler" message="$(string.RuntimePublisher.ProfilerOpcodeMessage)" symbol="CLR_PROFILER_OPCODE" value="11"/>
</opcodes>
</task>
- <!--Next available ID is 37-->
+ <task name="YieldProcessorMeasurement" symbol="CLR_YIELD_PROCESSOR_MEASUREMENT_TASK"
+ value="37" eventGUID="{B4AFC324-DECE-4B02-86DC-AAB8F22BC1B1}"
+ message="$(string.RuntimePublisher.YieldProcessorMeasurementTaskMessage)">
+ <opcodes>
+ </opcodes>
+ </task>
+ <!--Next available ID is 38-->
</tasks>
<!--Maps-->
<maps>
@@ -2916,6 +2922,19 @@
</Settings>
</UserData>
</template>
+
+ <template tid="YieldProcessorMeasurement">
+ <data name="ClrInstanceID" inType="win:UInt16"/>
+ <data name="NsPerYield" inType="win:Double"/>
+ <data name="EstablishedNsPerYield" inType="win:Double"/>
+ <UserData>
+ <Settings xmlns="myNs">
+ <ClrInstanceID> %1 </ClrInstanceID>
+ <NsPerYield> %2 </NsPerYield>
+ <EstablishedNsPerYield> %3 </EstablishedNsPerYield>
+ </Settings>
+ </UserData>
+ </template>
</templates>
<events>
@@ -3313,6 +3332,10 @@
keywords ="ThreadingKeyword" opcode="Wait"
task="ThreadPoolWorkerThread"
symbol="ThreadPoolWorkerThreadWait" message="$(string.RuntimePublisher.ThreadPoolWorkerThreadEventMessage)"/>
+
+ <event value="58" version="0" level="win:Informational" template="YieldProcessorMeasurement"
+ keywords="ThreadingKeyword" task="YieldProcessorMeasurement" opcode="win:Info"
+ symbol="YieldProcessorMeasurement" message="$(string.RuntimePublisher.YieldProcessorMeasurementEventMessage)"/>
<!-- CLR private ThreadPool events -->
<event value="60" version="0" level="win:Verbose" template="ThreadPoolWorkingThreadCount"
@@ -8049,6 +8072,7 @@
<string id="RuntimePublisher.WorkerThreadRetirementRetireThreadEventMessage" value="WorkerThreadCount=%1;%nRetiredWorkerThreads=%2" />
<string id="RuntimePublisher.WorkerThreadRetirementUnretireThreadEventMessage" value="WorkerThreadCount=%1;%nRetiredWorkerThreads=%2" />
<string id="RuntimePublisher.ThreadPoolWorkerThreadEventMessage" value="WorkerThreadCount=%1;%nRetiredWorkerThreadCount=%2;%nClrInstanceID=%3" />
+ <string id="RuntimePublisher.YieldProcessorMeasurementEventMessage" value="ClrInstanceID=%1;%nNsPerYield=%2;%nEstablishedNsPerYield=%3" />
<string id="RuntimePublisher.ThreadPoolWorkerThreadAdjustmentSampleEventMessage" value="Throughput=%1;%nClrInstanceID=%2" />
<string id="RuntimePublisher.ThreadPoolWorkerThreadAdjustmentAdjustmentEventMessage" value="AverageThroughput=%1;%nNewWorkerThreadCount=%2;%nReason=%3;%nClrInstanceID=%4" />
<string id="RuntimePublisher.ThreadPoolWorkerThreadAdjustmentStatsEventMessage" value="Duration=%1;%nThroughput=%2;%nThreadWave=%3;%nThroughputWave=%4;%nThroughputErrorEstimate=%5;%nAverageThroughputErrorEstimate=%6;%nThroughputRatio=%7;%nConfidence=%8;%nNewControlSetting=%9;%nNewThreadWaveMagnitude=%10;%nClrInstanceID=%11" />
@@ -8334,6 +8358,7 @@
<string id="RuntimePublisher.JitInstrumentationDataTaskMessage" value="JitInstrumentationData" />
<string id="RuntimePublisher.ExecutionCheckpointTaskMessage" value="ExecutionCheckpoint" />
<string id="RuntimePublisher.ProfilerTaskMessage" value="Profiler" />
+ <string id="RuntimePublisher.YieldProcessorMeasurementTaskMessage" value="YieldProcessorMeasurement" />
<string id="RundownPublisher.EEStartupTaskMessage" value="Runtime" />
<string id="RundownPublisher.MethodTaskMessage" value="Method" />
diff --git a/src/coreclr/vm/ClrEtwAllMeta.lst b/src/coreclr/vm/ClrEtwAllMeta.lst
index 4ac4fe405d9..9c5738ef43d 100644
--- a/src/coreclr/vm/ClrEtwAllMeta.lst
+++ b/src/coreclr/vm/ClrEtwAllMeta.lst
@@ -134,9 +134,9 @@ nomac:GarbageCollection:::GCJoin_V2
nostack:Type:::BulkType
-###################
-# Threadpool events
-###################
+#################################
+# Threading and Threadpool events
+#################################
nomac:WorkerThreadCreation:::WorkerThreadCreate
noclrinstanceid:WorkerThreadCreation:::WorkerThreadCreate
nomac:WorkerThreadCreation:::WorkerThreadTerminate
@@ -170,6 +170,8 @@ nomac:ThreadPoolWorkerThreadAdjustment:::ThreadPoolWorkerThreadAdjustmentSample
nostack:ThreadPoolWorkerThreadAdjustment:::ThreadPoolWorkerThreadAdjustmentSample
nomac:ThreadPoolWorkerThreadAdjustment:::ThreadPoolWorkerThreadAdjustmentAdjustment
nostack:ThreadPoolWorkerThreadAdjustment:::ThreadPoolWorkerThreadAdjustmentAdjustment
+nomac:YieldProcessorMeasurement:::YieldProcessorMeasurement
+nostack:YieldProcessorMeasurement:::YieldProcessorMeasurement
##################
# Exception events
diff --git a/src/coreclr/vm/comsynchronizable.cpp b/src/coreclr/vm/comsynchronizable.cpp
index 39f00d06741..15a33c711e7 100644
--- a/src/coreclr/vm/comsynchronizable.cpp
+++ b/src/coreclr/vm/comsynchronizable.cpp
@@ -1089,22 +1089,13 @@ FCIMPL1(void, ThreadNative::SetIsThreadpoolThread, ThreadBaseObject* thread)
}
FCIMPLEND
-INT32 QCALLTYPE ThreadNative::GetOptimalMaxSpinWaitsPerSpinIteration()
+FCIMPL0(INT32, ThreadNative::GetOptimalMaxSpinWaitsPerSpinIteration)
{
- QCALL_CONTRACT;
-
- INT32 optimalMaxNormalizedYieldsPerSpinIteration;
-
- BEGIN_QCALL;
-
- // RuntimeThread calls this function only once lazily and caches the result, so ensure initialization
- EnsureYieldProcessorNormalizedInitialized();
- optimalMaxNormalizedYieldsPerSpinIteration = g_optimalMaxNormalizedYieldsPerSpinIteration;
-
- END_QCALL;
+ FCALL_CONTRACT;
- return optimalMaxNormalizedYieldsPerSpinIteration;
+ return (INT32)YieldProcessorNormalization::GetOptimalMaxNormalizedYieldsPerSpinIteration();
}
+FCIMPLEND
FCIMPL1(void, ThreadNative::SpinWait, int iterations)
{
diff --git a/src/coreclr/vm/comsynchronizable.h b/src/coreclr/vm/comsynchronizable.h
index e9968201b8b..cfab18d9010 100644
--- a/src/coreclr/vm/comsynchronizable.h
+++ b/src/coreclr/vm/comsynchronizable.h
@@ -86,7 +86,7 @@ public:
UINT64 QCALLTYPE GetProcessDefaultStackSize();
static FCDECL1(INT32, GetManagedThreadId, ThreadBaseObject* th);
- static INT32 QCALLTYPE GetOptimalMaxSpinWaitsPerSpinIteration();
+ static FCDECL0(INT32, GetOptimalMaxSpinWaitsPerSpinIteration);
static FCDECL1(void, SpinWait, int iterations);
static BOOL QCALLTYPE YieldThread();
static FCDECL0(Object*, GetCurrentThread);
diff --git a/src/coreclr/vm/ecalllist.h b/src/coreclr/vm/ecalllist.h
index f77dc75c80b..ea3f65d7291 100644
--- a/src/coreclr/vm/ecalllist.h
+++ b/src/coreclr/vm/ecalllist.h
@@ -602,7 +602,7 @@ FCFuncStart(gThreadFuncs)
#endif // FEATURE_COMINTEROP
FCFuncElement("Interrupt", ThreadNative::Interrupt)
FCFuncElement("Join", ThreadNative::Join)
- QCFuncElement("GetOptimalMaxSpinWaitsPerSpinIterationInternal", ThreadNative::GetOptimalMaxSpinWaitsPerSpinIteration)
+ FCFuncElement("get_OptimalMaxSpinWaitsPerSpinIteration", ThreadNative::GetOptimalMaxSpinWaitsPerSpinIteration)
FCFuncElement("GetCurrentProcessorNumber", ThreadNative::GetCurrentProcessorNumber)
FCFuncEnd()
diff --git a/src/coreclr/vm/eventtrace.cpp b/src/coreclr/vm/eventtrace.cpp
index ac7be2a9439..aded74deda6 100644
--- a/src/coreclr/vm/eventtrace.cpp
+++ b/src/coreclr/vm/eventtrace.cpp
@@ -4417,6 +4417,12 @@ VOID EtwCallbackCommon(
{
ETW::TypeSystemLog::OnKeywordsChanged();
}
+
+ if (g_fEEStarted && !g_fEEShutDown)
+ {
+ // Emit the YieldProcessor measured values at the beginning of the trace
+ YieldProcessorNormalization::FireMeasurementEvents();
+ }
}
// Individual callbacks for each EventPipe provider.
diff --git a/src/coreclr/vm/finalizerthread.cpp b/src/coreclr/vm/finalizerthread.cpp
index 1e4dbf913c8..e8370315e66 100644
--- a/src/coreclr/vm/finalizerthread.cpp
+++ b/src/coreclr/vm/finalizerthread.cpp
@@ -379,11 +379,6 @@ DWORD WINAPI FinalizerThread::FinalizerThreadStart(void *args)
{
GetFinalizerThread()->SetBackground(TRUE);
- {
- GCX_PREEMP();
- EnsureYieldProcessorNormalizedInitialized();
- }
-
while (!fQuitFinalizer)
{
// This will apply any policy for swallowing exceptions during normal
diff --git a/src/coreclr/vm/threads.cpp b/src/coreclr/vm/threads.cpp
index 2c55f8770b0..c6485b86d59 100644
--- a/src/coreclr/vm/threads.cpp
+++ b/src/coreclr/vm/threads.cpp
@@ -1150,8 +1150,6 @@ void InitThreadManager()
}
CONTRACTL_END;
- InitializeYieldProcessorNormalizedCrst();
-
// All patched helpers should fit into one page.
// If you hit this assert on retail build, there is most likely problem with BBT script.
_ASSERTE_ALL_BUILDS("clr/src/VM/threads.cpp", (BYTE*)JIT_PatchedCodeLast - (BYTE*)JIT_PatchedCodeStart > (ptrdiff_t)0);
@@ -7194,6 +7192,7 @@ BOOL Thread::HaveExtraWorkForFinalizer()
|| Thread::CleanupNeededForFinalizedThread()
|| (m_DetachCount > 0)
|| SystemDomain::System()->RequireAppDomainCleanup()
+ || YieldProcessorNormalization::IsMeasurementScheduled()
|| ThreadStore::s_pThreadStore->ShouldTriggerGCForDeadThreads();
}
@@ -7240,6 +7239,12 @@ void Thread::DoExtraWorkForFinalizer()
// If there were any TimerInfos waiting to be released, they'll get flushed now
ThreadpoolMgr::FlushQueueOfTimerInfos();
+ if (YieldProcessorNormalization::IsMeasurementScheduled())
+ {
+ GCX_PREEMP();
+ YieldProcessorNormalization::PerformMeasurement();
+ }
+
ThreadStore::s_pThreadStore->TriggerGCForDeadThreadsIfNecessary();
}
diff --git a/src/coreclr/vm/yieldprocessornormalized.cpp b/src/coreclr/vm/yieldprocessornormalized.cpp
index 91547923310..2c51e73b678 100644
--- a/src/coreclr/vm/yieldprocessornormalized.cpp
+++ b/src/coreclr/vm/yieldprocessornormalized.cpp
@@ -2,17 +2,33 @@
// The .NET Foundation licenses this file to you under the MIT license.
#include "common.h"
+#include "yieldprocessornormalized.h"
-static Volatile<bool> s_isYieldProcessorNormalizedInitialized = false;
-static CrstStatic s_initializeYieldProcessorNormalizedCrst;
+#ifndef CROSSGEN_COMPILE
-void InitializeYieldProcessorNormalizedCrst()
+#include "finalizerthread.h"
+
+enum class NormalizationState : UINT8
{
- WRAPPER_NO_CONTRACT;
- s_initializeYieldProcessorNormalizedCrst.Init(CrstLeafLock);
-}
+ Uninitialized,
+ Initialized,
+ Failed
+};
+
+static const int NsPerYieldMeasurementCount = 8;
+static const unsigned int MeasurementPeriodMs = 4000;
+
+static const unsigned int NsPerS = 1000 * 1000 * 1000;
+
+static NormalizationState s_normalizationState = NormalizationState::Uninitialized;
+static unsigned int s_previousNormalizationTimeMs;
+
+static UINT64 s_performanceCounterTicksPerS;
+static double s_nsPerYieldMeasurements[NsPerYieldMeasurementCount];
+static int s_nextMeasurementIndex;
+static double s_establishedNsPerYield = YieldProcessorNormalization::TargetNsPerNormalizedYield;
-static void InitializeYieldProcessorNormalized()
+static unsigned int DetermineMeasureDurationUs()
{
CONTRACTL
{
@@ -22,92 +38,271 @@ static void InitializeYieldProcessorNormalized()
}
CONTRACTL_END;
- CrstHolder lock(&s_initializeYieldProcessorNormalizedCrst);
+ _ASSERTE(s_normalizationState != NormalizationState::Failed);
- if (s_isYieldProcessorNormalizedInitialized)
+ // On some systems, querying the high performance counter has relatively significant overhead. Increase the measure duration
+ // if the overhead seems high relative to the measure duration.
+ unsigned int measureDurationUs = 1;
+ LARGE_INTEGER li;
+ QueryPerformanceCounter(&li);
+ UINT64 startTicks = li.QuadPart;
+ QueryPerformanceCounter(&li);
+ UINT64 elapsedTicks = li.QuadPart - startTicks;
+ if (elapsedTicks >= s_performanceCounterTicksPerS * measureDurationUs * (1000 / 4) / NsPerS) // elapsed >= 1/4 of the measure duration
{
- return;
+ measureDurationUs *= 4;
+ }
+ return measureDurationUs;
+}
+
+static double MeasureNsPerYield(unsigned int measureDurationUs)
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_PREEMPTIVE;
}
+ CONTRACTL_END;
- // Intel pre-Skylake processor: measured typically 14-17 cycles per yield
- // Intel post-Skylake processor: measured typically 125-150 cycles per yield
- const int MeasureDurationMs = 10;
- const int NsPerSecond = 1000 * 1000 * 1000;
+ _ASSERTE(s_normalizationState != NormalizationState::Failed);
+
+ int yieldCount = (int)(measureDurationUs * 1000 / s_establishedNsPerYield) + 1;
+ UINT64 ticksPerS = s_performanceCounterTicksPerS;
+ UINT64 measureDurationTicks = ticksPerS * measureDurationUs / (1000 * 1000);
LARGE_INTEGER li;
- if (!QueryPerformanceFrequency(&li) || (ULONGLONG)li.QuadPart < 1000 / MeasureDurationMs)
+ QueryPerformanceCounter(&li);
+ UINT64 startTicks = li.QuadPart;
+
+ for (int i = 0; i < yieldCount; ++i)
{
- // High precision clock not available or clock resolution is too low, resort to defaults
- s_isYieldProcessorNormalizedInitialized = true;
- return;
+ System_YieldProcessor();
}
- ULONGLONG ticksPerSecond = li.QuadPart;
- // Measure the nanosecond delay per yield
- ULONGLONG measureDurationTicks = ticksPerSecond / (1000 / MeasureDurationMs);
- unsigned int yieldCount = 0;
QueryPerformanceCounter(&li);
- ULONGLONG startTicks = li.QuadPart;
- ULONGLONG elapsedTicks;
- do
- {
- // On some systems, querying the high performance counter has relatively significant overhead. Do enough yields to mask
- // the timing overhead. Assuming one yield has a delay of MinNsPerNormalizedYield, 1000 yields would have a delay in the
- // low microsecond range.
- for (int i = 0; i < 1000; ++i)
+ UINT64 elapsedTicks = li.QuadPart - startTicks;
+ while (elapsedTicks < measureDurationTicks)
+ {
+ int nextYieldCount =
+ Max(4,
+ elapsedTicks == 0
+ ? yieldCount / 4
+ : (int)(yieldCount * (measureDurationTicks - elapsedTicks) / (double)elapsedTicks) + 1);
+ for (int i = 0; i < nextYieldCount; ++i)
{
System_YieldProcessor();
}
- yieldCount += 1000;
QueryPerformanceCounter(&li);
- ULONGLONG nowTicks = li.QuadPart;
- elapsedTicks = nowTicks - startTicks;
- } while (elapsedTicks < measureDurationTicks);
- double nsPerYield = (double)elapsedTicks * NsPerSecond / ((double)yieldCount * ticksPerSecond);
- if (nsPerYield < 1)
+ elapsedTicks = li.QuadPart - startTicks;
+ yieldCount += nextYieldCount;
+ }
+
+ // Limit the minimum to a reasonable value considering that on some systems a yield may be implemented as a no-op
+ const double MinNsPerYield = 0.1;
+
+ // Measured values higher than this don't affect values calculated for normalization, and it's very unlikely for a yield to
+ // really take this long. Limit the maximum to keep the recorded values reasonable.
+ const double MaxNsPerYield = YieldProcessorNormalization::TargetMaxNsPerSpinIteration / 1.5 + 1;
+
+ return Max(MinNsPerYield, Min((double)elapsedTicks * NsPerS / ((double)yieldCount * ticksPerS), MaxNsPerYield));
+}
+
+void YieldProcessorNormalization::PerformMeasurement()
+{
+ CONTRACTL
+ {
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_PREEMPTIVE;
+ }
+ CONTRACTL_END;
+
+ _ASSERTE(s_isMeasurementScheduled);
+
+ double latestNsPerYield;
+ if (s_normalizationState == NormalizationState::Initialized)
{
- nsPerYield = 1;
+ if (GetTickCount() - s_previousNormalizationTimeMs < MeasurementPeriodMs)
+ {
+ return;
+ }
+
+ int nextMeasurementIndex = s_nextMeasurementIndex;
+ latestNsPerYield = MeasureNsPerYield(DetermineMeasureDurationUs());
+ AtomicStore(&s_nsPerYieldMeasurements[nextMeasurementIndex], latestNsPerYield);
+ if (++nextMeasurementIndex >= NsPerYieldMeasurementCount)
+ {
+ nextMeasurementIndex = 0;
+ }
+ s_nextMeasurementIndex = nextMeasurementIndex;
}
+ else if (s_normalizationState == NormalizationState::Uninitialized)
+ {
+ LARGE_INTEGER li;
+ if (!QueryPerformanceFrequency(&li) || li.QuadPart < 1000 * 1000)
+ {
+ // High precision clock not available or clock resolution is too low, resort to defaults
+ s_normalizationState = NormalizationState::Failed;
+ return;
+ }
+ s_performanceCounterTicksPerS = li.QuadPart;
+
+ unsigned int measureDurationUs = DetermineMeasureDurationUs();
+ for (int i = 0; i < NsPerYieldMeasurementCount; ++i)
+ {
+ latestNsPerYield = MeasureNsPerYield(measureDurationUs);
+ AtomicStore(&s_nsPerYieldMeasurements[i], latestNsPerYield);
+ if (i == 0 || latestNsPerYield < s_establishedNsPerYield)
+ {
+ AtomicStore(&s_establishedNsPerYield, latestNsPerYield);
+ }
- // Calculate the number of yields required to span the duration of a normalized yield. Since nsPerYield is at least 1, this
- // value is naturally limited to MinNsPerNormalizedYield.
- int yieldsPerNormalizedYield = (int)(MinNsPerNormalizedYield / nsPerYield + 0.5);
- if (yieldsPerNormalizedYield < 1)
+ if (i < NsPerYieldMeasurementCount - 1)
+ {
+ FireEtwYieldProcessorMeasurement(GetClrInstanceId(), latestNsPerYield, s_establishedNsPerYield);
+ }
+ }
+ }
+ else
{
- yieldsPerNormalizedYield = 1;
+ _ASSERTE(s_normalizationState == NormalizationState::Failed);
+ return;
}
- _ASSERTE(yieldsPerNormalizedYield <= (int)MinNsPerNormalizedYield);
+
+ double establishedNsPerYield = s_nsPerYieldMeasurements[0];
+ for (int i = 1; i < NsPerYieldMeasurementCount; ++i)
+ {
+ double nsPerYield = s_nsPerYieldMeasurements[i];
+ if (nsPerYield < establishedNsPerYield)
+ {
+ establishedNsPerYield = nsPerYield;
+ }
+ }
+ if (establishedNsPerYield != s_establishedNsPerYield)
+ {
+ AtomicStore(&s_establishedNsPerYield, establishedNsPerYield);
+ }
+
+ FireEtwYieldProcessorMeasurement(GetClrInstanceId(), latestNsPerYield, s_establishedNsPerYield);
+
+ // Calculate the number of yields required to span the duration of a normalized yield
+ unsigned int yieldsPerNormalizedYield = Max(1u, (unsigned int)(TargetNsPerNormalizedYield / establishedNsPerYield + 0.5));
+ _ASSERTE(yieldsPerNormalizedYield <= MaxYieldsPerNormalizedYield);
+ s_yieldsPerNormalizedYield = yieldsPerNormalizedYield;
// Calculate the maximum number of yields that would be optimal for a late spin iteration. Typically, we would not want to
// spend excessive amounts of time (thousands of cycles) doing only YieldProcessor, as SwitchToThread/Sleep would do a
// better job of allowing other work to run.
- int optimalMaxNormalizedYieldsPerSpinIteration =
- (int)(NsPerOptimalMaxSpinIterationDuration / (yieldsPerNormalizedYield * nsPerYield) + 0.5);
- if (optimalMaxNormalizedYieldsPerSpinIteration < 1)
+ s_optimalMaxNormalizedYieldsPerSpinIteration =
+ Max(1u, (unsigned int)(TargetMaxNsPerSpinIteration / (yieldsPerNormalizedYield * establishedNsPerYield) + 0.5));
+ _ASSERTE(s_optimalMaxNormalizedYieldsPerSpinIteration <= MaxOptimalMaxNormalizedYieldsPerSpinIteration);
+
+ GCHeapUtilities::GetGCHeap()->SetYieldProcessorScalingFactor((float)yieldsPerNormalizedYield);
+
+ s_previousNormalizationTimeMs = GetTickCount();
+ s_normalizationState = NormalizationState::Initialized;
+ s_isMeasurementScheduled = false;
+}
+
+#endif // !CROSSGEN_COMPILE
+
+void YieldProcessorNormalization::ScheduleMeasurementIfNecessary()
+{
+ CONTRACTL
{
- optimalMaxNormalizedYieldsPerSpinIteration = 1;
+ NOTHROW;
+ GC_NOTRIGGER;
+ MODE_ANY;
}
+ CONTRACTL_END;
- g_yieldsPerNormalizedYield = yieldsPerNormalizedYield;
- g_optimalMaxNormalizedYieldsPerSpinIteration = optimalMaxNormalizedYieldsPerSpinIteration;
- s_isYieldProcessorNormalizedInitialized = true;
+#ifndef CROSSGEN_COMPILE
+ NormalizationState normalizationState = VolatileLoadWithoutBarrier(&s_normalizationState);
+ if (normalizationState == NormalizationState::Initialized)
+ {
+ if (GetTickCount() - s_previousNormalizationTimeMs < MeasurementPeriodMs)
+ {
+ return;
+ }
+ }
+ else if (normalizationState == NormalizationState::Uninitialized)
+ {
+ }
+ else
+ {
+ _ASSERTE(normalizationState == NormalizationState::Failed);
+ return;
+ }
- GCHeapUtilities::GetGCHeap()->SetYieldProcessorScalingFactor((float)yieldsPerNormalizedYield);
+ // !g_fEEStarted is required for FinalizerThread::EnableFinalization() below
+ if (s_isMeasurementScheduled || !g_fEEStarted)
+ {
+ return;
+ }
+
+ s_isMeasurementScheduled = true;
+ FinalizerThread::EnableFinalization();
+#endif // !CROSSGEN_COMPILE
}
-void EnsureYieldProcessorNormalizedInitialized()
+#ifndef CROSSGEN_COMPILE
+
+void YieldProcessorNormalization::FireMeasurementEvents()
{
CONTRACTL
{
NOTHROW;
GC_NOTRIGGER;
- MODE_PREEMPTIVE;
+ MODE_ANY;
}
CONTRACTL_END;
- if (!s_isYieldProcessorNormalizedInitialized)
+ if (!EventEnabledYieldProcessorMeasurement())
{
- InitializeYieldProcessorNormalized();
+ return;
}
+
+ // This function may be called at any time to fire events about recorded measurements. There is no synchronization for the
+ // recorded information, so try to enumerate the array with some care.
+ double establishedNsPerYield = AtomicLoad(&s_establishedNsPerYield);
+ int nextIndex = VolatileLoadWithoutBarrier(&s_nextMeasurementIndex);
+ for (int i = 0; i < NsPerYieldMeasurementCount; ++i)
+ {
+ double nsPerYield = AtomicLoad(&s_nsPerYieldMeasurements[nextIndex]);
+ if (nsPerYield != 0) // the array may not be fully initialized yet
+ {
+ FireEtwYieldProcessorMeasurement(GetClrInstanceId(), nsPerYield, establishedNsPerYield);
+ }
+
+ if (++nextIndex >= NsPerYieldMeasurementCount)
+ {
+ nextIndex = 0;
+ }
+ }
+}
+
+double YieldProcessorNormalization::AtomicLoad(double *valueRef)
+{
+ WRAPPER_NO_CONTRACT;
+
+#ifdef TARGET_64BIT
+ return VolatileLoadWithoutBarrier(valueRef);
+#else
+ return InterlockedCompareExchangeT(valueRef, 0.0, 0.0);
+#endif
}
+
+void YieldProcessorNormalization::AtomicStore(double *valueRef, double value)
+{
+ WRAPPER_NO_CONTRACT;
+
+#ifdef TARGET_64BIT
+ *valueRef = value;
+#else
+ InterlockedExchangeT(valueRef, value);
+#endif
+}
+
+#endif // !CROSSGEN_COMPILE