Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/dotnet/runtime.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Wrighton <davidwr@microsoft.com>2021-01-14 01:40:56 +0300
committerGitHub <noreply@github.com>2021-01-14 01:40:56 +0300
commit6ded57b3d2c66c73a0a2674ec820c7a8137d7590 (patch)
tree18e60baa0d29da33efdd86489f83f884feecfa0e /src/coreclr/jit/flowgraph.cpp
parentfd744a8f819771609ba38f8279cfc747cf1cdd3b (diff)
Phase 1 of refactoring pgo data pipeline (#46638)
Phase 1 of replacing existing infrastructure around handling of pgo data with more flexible schema based approach. The schema based approach allows the JIT to define the form of data needed for instrumentation. - The schema associates 4 32bit integers with each data collection point (ILOffset, InstrumentationKind, Count, and Other) - Rich meaning is attached to InstrumentationKind, and Count - InstrumentationKind defines the size and layout of individual instrumentation data items - Count allows a single schema item to be repeated - ILOffset and Other are not processed in any specific way by the infrastructure Changes part of this phase - PgoManager holds arbitrary amount of pgo data instead of a slab - Aware of collectible assemblies - Match with pgo data utilizes hash of IL body in addition to IL size information for greater accuracy in match - JIT no longer uses block count apis, and instead uses schema based apis - JIT now explicitly defines the shape of data collected for both basic block and type probes - The rest of the system handles that without deep knowledge of what those formats are - Text file format for pgo data updated - Existing IBC infrastructure adjusted to speak in terms of schema concept - Uncompressed and binary encoded implementation of Pgo schema handling - Update SuperPMI to handle new apis Future Changes for static Pgo - Move Pgo type handle histogram processing into JIT - Extract Pgo data from process using Event infrastructure - Triggers for controlling Pgo data extraction - Instrumented Pgo processing as part of dotnet-pgo tool - Pgo data flow in crossgen2
Diffstat (limited to 'src/coreclr/jit/flowgraph.cpp')
-rw-r--r--src/coreclr/jit/flowgraph.cpp341
1 files changed, 196 insertions, 145 deletions
diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp
index efb3305a620..9314d48f4a0 100644
--- a/src/coreclr/jit/flowgraph.cpp
+++ b/src/coreclr/jit/flowgraph.cpp
@@ -176,9 +176,10 @@ void Compiler::fgInit()
fgPreviousCandidateSIMDFieldAsgStmt = nullptr;
#endif
- fgHasSwitch = false;
- fgBlockCounts = nullptr;
-
+ fgHasSwitch = false;
+ fgPgoSchema = nullptr;
+ fgPgoData = nullptr;
+ fgPgoSchemaCount = 0;
fgPredListSortVector = nullptr;
}
@@ -217,7 +218,7 @@ bool Compiler::fgHaveProfileData()
return false;
}
- return (fgBlockCounts != nullptr);
+ return (fgPgoSchema != nullptr);
}
//------------------------------------------------------------------------
@@ -370,11 +371,12 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei
return false;
}
- for (UINT32 i = 0; i < fgBlockCountsCount; i++)
+ for (UINT32 i = 0; i < fgPgoSchemaCount; i++)
{
- if (fgBlockCounts[i].ILOffset == offset)
+ if ((fgPgoSchema[i].InstrumentationKind == ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount) &&
+ ((IL_OFFSET)fgPgoSchema[i].ILOffset == offset))
{
- *weightWB = (BasicBlock::weight_t)fgBlockCounts[i].ExecutionCount;
+ *weightWB = (BasicBlock::weight_t) * (uint32_t*)(fgPgoData + fgPgoSchema[i].Offset);
return true;
}
}
@@ -383,6 +385,38 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei
return true;
}
+template <class TFunctor>
+class ClassProbeVisitor final : public GenTreeVisitor<ClassProbeVisitor<TFunctor>>
+{
+public:
+ enum
+ {
+ DoPreOrder = true
+ };
+
+ TFunctor& m_functor;
+ Compiler* m_compiler;
+
+ ClassProbeVisitor(Compiler* compiler, TFunctor& functor)
+ : GenTreeVisitor<ClassProbeVisitor>(compiler), m_functor(functor), m_compiler(compiler)
+ {
+ }
+ Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
+ {
+ GenTree* const node = *use;
+ if (node->IsCall())
+ {
+ GenTreeCall* const call = node->AsCall();
+ if (call->IsVirtual() && (call->gtCallType != CT_INDIRECT))
+ {
+ m_functor(m_compiler, call);
+ }
+ }
+
+ return Compiler::WALK_CONTINUE;
+ }
+};
+
//------------------------------------------------------------------------
// fgInstrumentMethod: add instrumentation probes to the method
//
@@ -407,6 +441,7 @@ bool Compiler::fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::wei
void Compiler::fgInstrumentMethod()
{
noway_assert(!compIsForInlining());
+ jitstd::vector<ICorJitInfo::PgoInstrumentationSchema> schema(getAllocator());
// Count the number of basic blocks in the method
// that will get block count probes.
@@ -415,10 +450,80 @@ void Compiler::fgInstrumentMethod()
BasicBlock* block;
for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
{
- if (!(block->bbFlags & BBF_IMPORTED) || (block->bbFlags & BBF_INTERNAL))
+ // We don't want to profile any un-imported blocks
+ //
+ if ((block->bbFlags & BBF_IMPORTED) == 0)
+ {
+ continue;
+ }
+
+ if ((block->bbFlags & BBF_HAS_CLASS_PROFILE) != 0)
+ {
+ class BuildClassProbeSchemaGen
+ {
+ jitstd::vector<ICorJitInfo::PgoInstrumentationSchema>* m_schema;
+
+ public:
+ BuildClassProbeSchemaGen(jitstd::vector<ICorJitInfo::PgoInstrumentationSchema>* schema)
+ : m_schema(schema)
+ {
+ }
+ void operator()(Compiler* compiler, GenTreeCall* call)
+ {
+ ICorJitInfo::PgoInstrumentationSchema schemaElem;
+ schemaElem.Count = 1;
+ schemaElem.Other = ICorJitInfo::ClassProfile::CLASS_FLAG;
+ if (call->IsVirtualStub())
+ {
+ schemaElem.Other |= ICorJitInfo::ClassProfile::INTERFACE_FLAG;
+ }
+ else
+ {
+ assert(call->IsVirtualVtable());
+ }
+
+ schemaElem.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramCount;
+ schemaElem.ILOffset = jitGetILoffs(call->gtClassProfileCandidateInfo->ilOffset);
+ schemaElem.Offset = 0;
+
+ m_schema->push_back(schemaElem);
+
+ // Re-using ILOffset and Other fields from schema item for TypeHandleHistogramCount
+ schemaElem.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::TypeHandleHistogramTypeHandle;
+ schemaElem.Count = ICorJitInfo::ClassProfile::SIZE;
+ m_schema->push_back(schemaElem);
+ }
+ };
+ // Scan the statements and identify the class probes
+ //
+ BuildClassProbeSchemaGen schemaGen(&schema);
+ ClassProbeVisitor<BuildClassProbeSchemaGen> visitor(this, schemaGen);
+ for (Statement* stmt : block->Statements())
+ {
+ visitor.WalkTree(stmt->GetRootNodePointer(), nullptr);
+ }
+ }
+
+ if (block->bbFlags & BBF_INTERNAL)
{
continue;
}
+
+ // Assign the current block's IL offset into the profile data
+ // (make sure IL offset is sane)
+ //
+ IL_OFFSET offset = block->bbCodeOffs;
+ assert((int)offset >= 0);
+
+ ICorJitInfo::PgoInstrumentationSchema schemaElem;
+ schemaElem.Count = 1;
+ schemaElem.Other = 0;
+ schemaElem.InstrumentationKind = ICorJitInfo::PgoInstrumentationKind::BasicBlockIntCount;
+ schemaElem.ILOffset = offset;
+ schemaElem.Offset = 0;
+
+ schema.push_back(schemaElem);
+
countOfBlocks++;
}
@@ -426,6 +531,7 @@ void Compiler::fgInstrumentMethod()
// when importing.
//
int countOfCalls = info.compClassProbeCount;
+ assert(((countOfCalls * 2) + countOfBlocks) == (int)schema.size());
// Optionally bail out, if there are less than three blocks and no call sites to profile.
// One block is common. We don't expect to see zero or two blocks here.
@@ -444,16 +550,10 @@ void Compiler::fgInstrumentMethod()
// Allocate the profile buffer
//
- // Allocation is in multiples of ICorJitInfo::BlockCounts. For each profile table we need
- // some multiple of these.
- //
- const unsigned entriesPerCall = sizeof(ICorJitInfo::ClassProfile) / sizeof(ICorJitInfo::BlockCounts);
- assert(entriesPerCall * sizeof(ICorJitInfo::BlockCounts) == sizeof(ICorJitInfo::ClassProfile));
-
- const unsigned totalEntries = countOfBlocks + entriesPerCall * countOfCalls;
- ICorJitInfo::BlockCounts* profileBlockCountsStart = nullptr;
+ BYTE* profileMemory;
- HRESULT res = info.compCompHnd->allocMethodBlockCounts(totalEntries, &profileBlockCountsStart);
+ HRESULT res = info.compCompHnd->allocPgoInstrumentationBySchema(info.compMethodHnd, schema.data(),
+ (UINT32)schema.size(), &profileMemory);
// We may not be able to instrument, if so we'll set this false.
// We can't just early exit, because we have to clean up calls that we might have profiled.
@@ -472,9 +572,6 @@ void Compiler::fgInstrumentMethod()
}
}
- ICorJitInfo::BlockCounts* profileBlockCountsEnd = &profileBlockCountsStart[countOfBlocks];
- ICorJitInfo::BlockCounts* profileEnd = &profileBlockCountsStart[totalEntries];
-
// For each BasicBlock (non-Internal)
// 1. Assign the blocks bbCodeOffs to the ILOffset field of this blocks profile data.
// 2. Add an operation that increments the ExecutionCount field at the beginning of the block.
@@ -482,7 +579,10 @@ void Compiler::fgInstrumentMethod()
// Each (non-Internal) block has it own BlockCounts tuple [ILOffset, ExecutionCount]
// To start we initialize our current one with the first one that we allocated
//
- ICorJitInfo::BlockCounts* currentBlockCounts = profileBlockCountsStart;
+ int currentSchemaIndex = 0;
+
+ // Hold the address of the first blocks ExecutionCount
+ size_t addrOfFirstExecutionCount = 0;
for (block = fgFirstBB; (block != nullptr); block = block->bbNext)
{
@@ -508,130 +608,93 @@ void Compiler::fgInstrumentMethod()
//
JITDUMP("Scanning for calls to profile in " FMT_BB "\n", block->bbNum);
- class ClassProbeVisitor final : public GenTreeVisitor<ClassProbeVisitor>
+ class ClassProbeInserter
{
- public:
- enum
- {
- DoPreOrder = true
- };
-
- int m_count;
- ICorJitInfo::ClassProfile* m_tableBase;
- bool m_instrument;
+ jitstd::vector<ICorJitInfo::PgoInstrumentationSchema>* m_schema;
+ BYTE* m_profileMemory;
+ int* m_currentSchemaIndex;
+ bool m_instrument;
- ClassProbeVisitor(Compiler* compiler, ICorJitInfo::ClassProfile* tableBase, bool instrument)
- : GenTreeVisitor<ClassProbeVisitor>(compiler)
- , m_count(0)
- , m_tableBase(tableBase)
+ public:
+ int m_count = 0;
+
+ ClassProbeInserter(jitstd::vector<ICorJitInfo::PgoInstrumentationSchema>* schema,
+ BYTE* profileMemory,
+ int* pCurrentSchemaIndex,
+ bool instrument)
+ : m_schema(schema)
+ , m_profileMemory(profileMemory)
+ , m_currentSchemaIndex(pCurrentSchemaIndex)
, m_instrument(instrument)
{
}
- Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
+ void operator()(Compiler* compiler, GenTreeCall* call)
{
- GenTree* const node = *use;
- if (node->IsCall())
+ JITDUMP("Found call [%06u] with probe index %d and ilOffset 0x%X\n", compiler->dspTreeID(call),
+ call->gtClassProfileCandidateInfo->probeIndex,
+ call->gtClassProfileCandidateInfo->ilOffset);
+
+ m_count++;
+ if (m_instrument)
{
- GenTreeCall* const call = node->AsCall();
- if (call->IsVirtual() && (call->gtCallType != CT_INDIRECT))
- {
- JITDUMP("Found call [%06u] with probe index %d and ilOffset 0x%X\n",
- m_compiler->dspTreeID(call), call->gtClassProfileCandidateInfo->probeIndex,
- call->gtClassProfileCandidateInfo->ilOffset);
+ // We transform the call from (CALLVIRT obj, ... args ...) to
+ // to
+ // (CALLVIRT
+ // (COMMA
+ // (ASG tmp, obj)
+ // (COMMA
+ // (CALL probe_fn tmp, &probeEntry)
+ // tmp)))
+ // ... args ...)
+ //
- m_count++;
+ assert(call->gtCallThisArg->GetNode()->TypeGet() == TYP_REF);
- if (m_instrument)
- {
- // We transform the call from (CALLVIRT obj, ... args ...) to
- // to
- // (CALLVIRT
- // (COMMA
- // (ASG tmp, obj)
- // (COMMA
- // (CALL probe_fn tmp, &probeEntry)
- // tmp)))
- // ... args ...)
- //
-
- assert(call->gtCallThisArg->GetNode()->TypeGet() == TYP_REF);
-
- // Figure out where the table is located.
- //
- ICorJitInfo::ClassProfile* classProfile =
- &m_tableBase[call->gtClassProfileCandidateInfo->probeIndex];
-
- // Grab a temp to hold the 'this' object as it will be used three times
- //
- unsigned const tmpNum = m_compiler->lvaGrabTemp(true DEBUGARG("class profile tmp"));
- m_compiler->lvaTable[tmpNum].lvType = TYP_REF;
-
- // Generate the IR...
- //
- GenTree* const classProfileNode =
- m_compiler->gtNewIconNode((ssize_t)classProfile, TYP_I_IMPL);
- GenTree* const tmpNode = m_compiler->gtNewLclvNode(tmpNum, TYP_REF);
- GenTreeCall::Use* const args = m_compiler->gtNewCallArgs(tmpNode, classProfileNode);
- GenTree* const helperCallNode =
- m_compiler->gtNewHelperCallNode(CORINFO_HELP_CLASSPROFILE, TYP_VOID, args);
- GenTree* const tmpNode2 = m_compiler->gtNewLclvNode(tmpNum, TYP_REF);
- GenTree* const callCommaNode =
- m_compiler->gtNewOperNode(GT_COMMA, TYP_REF, helperCallNode, tmpNode2);
- GenTree* const tmpNode3 = m_compiler->gtNewLclvNode(tmpNum, TYP_REF);
- GenTree* const asgNode = m_compiler->gtNewOperNode(GT_ASG, TYP_REF, tmpNode3,
- call->gtCallThisArg->GetNode());
- GenTree* const asgCommaNode =
- m_compiler->gtNewOperNode(GT_COMMA, TYP_REF, asgNode, callCommaNode);
-
- // Update the call
- //
- call->gtCallThisArg->SetNode(asgCommaNode);
-
- JITDUMP("Modified call is now\n");
- DISPTREE(call);
-
- // Initialize the class table
- //
- // Hack: we use two high bits of the offset to indicate that this record
- // is the start of a class profile, and what kind of call is being profiled.
- //
- IL_OFFSET offset = jitGetILoffs(call->gtClassProfileCandidateInfo->ilOffset);
- assert((offset & (ICorJitInfo::ClassProfile::CLASS_FLAG |
- ICorJitInfo::ClassProfile::INTERFACE_FLAG)) == 0);
-
- offset |= ICorJitInfo::ClassProfile::CLASS_FLAG;
-
- if (call->IsVirtualStub())
- {
- offset |= ICorJitInfo::ClassProfile::INTERFACE_FLAG;
- }
- else
- {
- assert(call->IsVirtualVtable());
- }
+ // Figure out where the table is located.
+ //
+ BYTE* classProfile = (*m_schema)[*m_currentSchemaIndex].Offset + m_profileMemory;
+ *m_currentSchemaIndex += 2; // There are 2 schema entries per class probe
- classProfile->ILOffset = offset;
- classProfile->Count = 0;
+ // Grab a temp to hold the 'this' object as it will be used three times
+ //
+ unsigned const tmpNum = compiler->lvaGrabTemp(true DEBUGARG("class profile tmp"));
+ compiler->lvaTable[tmpNum].lvType = TYP_REF;
- for (int i = 0; i < ICorJitInfo::ClassProfile::SIZE; i++)
- {
- classProfile->ClassTable[i] = NO_CLASS_HANDLE;
- }
- }
+ // Generate the IR...
+ //
+ GenTree* const classProfileNode =
+ compiler->gtNewIconNode((ssize_t)classProfile, TYP_I_IMPL);
+ GenTree* const tmpNode = compiler->gtNewLclvNode(tmpNum, TYP_REF);
+ GenTreeCall::Use* const args = compiler->gtNewCallArgs(tmpNode, classProfileNode);
+ GenTree* const helperCallNode =
+ compiler->gtNewHelperCallNode(CORINFO_HELP_CLASSPROFILE, TYP_VOID, args);
+ GenTree* const tmpNode2 = compiler->gtNewLclvNode(tmpNum, TYP_REF);
+ GenTree* const callCommaNode =
+ compiler->gtNewOperNode(GT_COMMA, TYP_REF, helperCallNode, tmpNode2);
+ GenTree* const tmpNode3 = compiler->gtNewLclvNode(tmpNum, TYP_REF);
+ GenTree* const asgNode =
+ compiler->gtNewOperNode(GT_ASG, TYP_REF, tmpNode3, call->gtCallThisArg->GetNode());
+ GenTree* const asgCommaNode =
+ compiler->gtNewOperNode(GT_COMMA, TYP_REF, asgNode, callCommaNode);
+
+ // Update the call
+ //
+ call->gtCallThisArg->SetNode(asgCommaNode);
- // Restore the stub address on call, whether instrumenting or not.
- //
- call->gtStubCallStubAddr = call->gtClassProfileCandidateInfo->stubAddr;
- }
+ JITDUMP("Modified call is now\n");
+ DISPTREE(call);
}
- return Compiler::WALK_CONTINUE;
+ // Restore the stub address on the call, whether instrumenting or not.
+ //
+ call->gtStubCallStubAddr = call->gtClassProfileCandidateInfo->stubAddr;
}
};
// Scan the statements and add class probes
//
- ClassProbeVisitor visitor(this, (ICorJitInfo::ClassProfile*)profileBlockCountsEnd, instrument);
+ ClassProbeInserter insertProbes(&schema, profileMemory, &currentSchemaIndex, instrument);
+ ClassProbeVisitor<ClassProbeInserter> visitor(this, insertProbes);
for (Statement* stmt : block->Statements())
{
visitor.WalkTree(stmt->GetRootNodePointer(), nullptr);
@@ -639,8 +702,8 @@ void Compiler::fgInstrumentMethod()
// Bookkeeping
//
- assert(visitor.m_count <= countOfCalls);
- countOfCalls -= visitor.m_count;
+ assert(insertProbes.m_count <= countOfCalls);
+ countOfCalls -= insertProbes.m_count;
JITDUMP("\n%d calls remain to be visited\n", countOfCalls);
}
else
@@ -664,16 +727,11 @@ void Compiler::fgInstrumentMethod()
if (instrument)
{
- // Assign the current block's IL offset into the profile data
- // (make sure IL offset is sane)
- //
- IL_OFFSET offset = block->bbCodeOffs;
- assert((int)offset >= 0);
-
- currentBlockCounts->ILOffset = offset;
- currentBlockCounts->ExecutionCount = 0;
-
- size_t addrOfCurrentExecutionCount = (size_t)&currentBlockCounts->ExecutionCount;
+ assert(block->bbCodeOffs == (IL_OFFSET)schema[currentSchemaIndex].ILOffset);
+ size_t addrOfCurrentExecutionCount = (size_t)(schema[currentSchemaIndex].Offset + profileMemory);
+ if (addrOfFirstExecutionCount == 0)
+ addrOfFirstExecutionCount = addrOfCurrentExecutionCount;
+ currentSchemaIndex++;
// Read Basic-Block count value
GenTree* valueNode =
@@ -687,9 +745,6 @@ void Compiler::fgInstrumentMethod()
GenTree* asgNode = gtNewAssignNode(lhsNode, rhsNode);
fgNewStmtAtBeg(block, asgNode);
-
- // Advance to the next BlockCounts tuple [ILOffset, ExecutionCount]
- currentBlockCounts++;
}
}
@@ -702,7 +757,6 @@ void Compiler::fgInstrumentMethod()
//
noway_assert(countOfBlocks == 0);
noway_assert(countOfCalls == 0);
- assert(currentBlockCounts == profileBlockCountsEnd);
// When prejitting, add the method entry callback node
if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT))
@@ -733,9 +787,6 @@ void Compiler::fgInstrumentMethod()
GenTreeCall::Use* args = gtNewCallArgs(arg);
GenTree* call = gtNewHelperCallNode(CORINFO_HELP_BBT_FCN_ENTER, TYP_VOID, args);
- // Get the address of the first blocks ExecutionCount
- size_t addrOfFirstExecutionCount = (size_t)&profileBlockCountsStart->ExecutionCount;
-
// Read Basic-Block count value
GenTree* valueNode = gtNewIndOfIconHandleNode(TYP_INT, addrOfFirstExecutionCount, GTF_ICON_BBC_PTR, false);