Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/dotnet/runtime.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruce Forstall <brucefo@microsoft.com>2021-04-24 04:08:13 +0300
committerGitHub <noreply@github.com>2021-04-24 04:08:13 +0300
commit51a116d186da5495e7861bb95f8407944c1339c3 (patch)
tree0b5630288ca2da9ad51d6283e1c09e57e585ed01
parent86c113260fd7a998f01b394ef8916039dcc62ec8 (diff)
Don't recompute preds lists during loop cloning (#51757)
Instead, add and modify the appropriate preds when the mechanical cloning is performed. This will preserve existing profile data on the edges. Contributes to #49030 No x86/x64 SPMI asm diffs.
-rw-r--r--src/coreclr/jit/compiler.h7
-rw-r--r--src/coreclr/jit/fgdiagnostic.cpp49
-rw-r--r--src/coreclr/jit/fgflow.cpp10
-rw-r--r--src/coreclr/jit/fgopt.cpp7
-rw-r--r--src/coreclr/jit/flowgraph.cpp5
-rw-r--r--src/coreclr/jit/jithashtable.h2
-rw-r--r--src/coreclr/jit/optimizer.cpp198
7 files changed, 208 insertions, 70 deletions
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 29e300d12e9..ee2abdb945f 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -5268,7 +5268,7 @@ protected:
// When the flow graph changes, we need to update the block numbers, predecessor lists, reachability sets, and
// dominators.
- void fgUpdateChangedFlowGraph(bool computeDoms = true);
+ void fgUpdateChangedFlowGraph(const bool computePreds = true, const bool computeDoms = true);
public:
// Compute the predecessors of the blocks in the control flow graph.
@@ -6534,8 +6534,9 @@ protected:
void optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to);
// Updates the successors of "blk": if "blk2" is a successor of "blk", and there is a mapping for "blk2->blk3" in
- // "redirectMap", change "blk" so that "blk3" is this successor. Note that the predecessor lists are not updated.
- void optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap);
+ // "redirectMap", change "blk" so that "blk3" is this successor. If `addPreds` is true, add predecessor edges
+ // for the block based on its new target, whether redirected or not.
+ void optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, const bool addPreds = false);
// Marks the containsCall information to "lnum" and any parent loops.
void AddContainsCallAllContainingLoops(unsigned lnum);
diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp
index 180d4cfc469..057e0d312f1 100644
--- a/src/coreclr/jit/fgdiagnostic.cpp
+++ b/src/coreclr/jit/fgdiagnostic.cpp
@@ -707,11 +707,24 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
fprintf(fgxFile, ">");
}
+ // In some cases, we want to change the display based on whether an edge is lexically backwards, forwards,
+ // or lexical successor. Also, for the region tree, using the lexical order is useful for determining where
+ // to insert in the tree, to determine nesting. We'd like to use the bbNum to do this. However, we don't
+ // want to renumber the blocks. So, create a mapping of bbNum to ordinal, and compare block order by
+ // comparing the mapped ordinals instead.
+
+ unsigned blockOrdinal = 0;
+ unsigned* blkMap = new (this, CMK_DebugOnly) unsigned[fgBBNumMax + 1];
+ memset(blkMap, 0, sizeof(unsigned) * (fgBBNumMax + 1));
+ for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext)
+ {
+ blkMap[block->bbNum] = blockOrdinal++;
+ }
+
static const char* kindImage[] = {"EHFINALLYRET", "EHFILTERRET", "EHCATCHRET", "THROW", "RETURN", "NONE",
"ALWAYS", "LEAVE", "CALLFINALLY", "COND", "SWITCH"};
BasicBlock* block;
- unsigned blockOrdinal;
for (block = fgFirstBB, blockOrdinal = 1; block != nullptr; block = block->bbNext, blockOrdinal++)
{
if (createDotFile)
@@ -840,13 +853,13 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
const char* sep = "";
- if (bSource->bbNum > bTarget->bbNum)
+ if (blkMap[bSource->bbNum] > blkMap[bTarget->bbNum])
{
// Lexical backedge
fprintf(fgxFile, " [color=green");
sep = ", ";
}
- else if ((bSource->bbNum + 1) == bTarget->bbNum)
+ else if ((blkMap[bSource->bbNum] + 1) == blkMap[bTarget->bbNum])
{
// Lexical successor
fprintf(fgxFile, " [color=blue, weight=20");
@@ -953,12 +966,12 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
{
BasicBlock* const bTarget = bSource->GetSucc(i);
fprintf(fgxFile, " " FMT_BB " -> " FMT_BB, bSource->bbNum, bTarget->bbNum);
- if (bSource->bbNum > bTarget->bbNum)
+ if (blkMap[bSource->bbNum] > blkMap[bTarget->bbNum])
{
// Lexical backedge
fprintf(fgxFile, " [color=green]\n");
}
- else if ((bSource->bbNum + 1) == bTarget->bbNum)
+ else if ((blkMap[bSource->bbNum] + 1) == blkMap[bTarget->bbNum])
{
// Lexical successor
fprintf(fgxFile, " [color=blue]\n");
@@ -1027,24 +1040,11 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
};
public:
- RegionGraph(Compiler* comp) : m_comp(comp), m_rgnRoot(nullptr)
+ RegionGraph(Compiler* comp, unsigned* blkMap) : m_comp(comp), m_rgnRoot(nullptr), m_blkMap(blkMap)
{
// Create a root region that encompasses the whole function.
- // We don't want to renumber the blocks, but it's useful to have a sequential number
- // representing the lexical block order so we know where to insert a block range
- // in the region tree. To do this, create a mapping of bbNum to ordinal, and compare
- // block order by comparing the mapped ordinals.
-
m_rgnRoot =
new (m_comp, CMK_DebugOnly) Region(RegionType::Root, "Root", comp->fgFirstBB, comp->fgLastBB);
-
- unsigned bbOrdinal = 0;
- m_blkMap = new (m_comp, CMK_DebugOnly) unsigned[comp->fgBBNumMax + 1];
- memset(m_blkMap, 0, sizeof(unsigned) * (comp->fgBBNumMax + 1));
- for (BasicBlock* block = comp->fgFirstBB; block != nullptr; block = block->bbNext)
- {
- m_blkMap[block->bbNum] = bbOrdinal++;
- }
}
//------------------------------------------------------------------------
@@ -1353,7 +1353,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase)
// Define the region graph object. We'll add regions to this, then output the graph.
- RegionGraph rgnGraph(this);
+ RegionGraph rgnGraph(this, blkMap);
// Add the EH regions to the region graph. An EH region consists of a region for the
// `try`, a region for the handler, and, for filter/filter-handlers, a region for the
@@ -2071,7 +2071,7 @@ private:
bool CheckEhTryDsc(BasicBlock* block, BasicBlock* blockPred, EHblkDsc* ehTryDsc);
bool CheckEhHndDsc(BasicBlock* block, BasicBlock* blockPred, EHblkDsc* ehHndlDsc);
bool CheckJump(BasicBlock* blockPred, BasicBlock* block);
- bool CheckEHFinalyRet(BasicBlock* blockPred, BasicBlock* block);
+ bool CheckEHFinallyRet(BasicBlock* blockPred, BasicBlock* block);
private:
Compiler* comp;
@@ -2130,7 +2130,7 @@ unsigned BBPredsChecker::CheckBBPreds(BasicBlock* block, unsigned curTraversalSt
assert(CheckJump(blockPred, block));
}
- // Make sure preds are in increasting BBnum order
+ // Make sure preds are in increasing BBnum order
//
assert(block->checkPredListOrder());
@@ -2232,7 +2232,7 @@ bool BBPredsChecker::CheckJump(BasicBlock* blockPred, BasicBlock* block)
return true;
case BBJ_EHFINALLYRET:
- assert(CheckEHFinalyRet(blockPred, block));
+ assert(CheckEHFinallyRet(blockPred, block));
return true;
case BBJ_THROW:
@@ -2265,9 +2265,8 @@ bool BBPredsChecker::CheckJump(BasicBlock* blockPred, BasicBlock* block)
return false;
}
-bool BBPredsChecker::CheckEHFinalyRet(BasicBlock* blockPred, BasicBlock* block)
+bool BBPredsChecker::CheckEHFinallyRet(BasicBlock* blockPred, BasicBlock* block)
{
-
// If the current block is a successor to a BBJ_EHFINALLYRET (return from finally),
// then the lexically previous block should be a call to the same finally.
// Verify all of that.
diff --git a/src/coreclr/jit/fgflow.cpp b/src/coreclr/jit/fgflow.cpp
index e3b6869f116..fda7f901451 100644
--- a/src/coreclr/jit/fgflow.cpp
+++ b/src/coreclr/jit/fgflow.cpp
@@ -686,7 +686,7 @@ void Compiler::fgRemovePreds()
//
void Compiler::fgComputePreds()
{
- noway_assert(fgFirstBB);
+ noway_assert(fgFirstBB != nullptr);
BasicBlock* block;
@@ -697,6 +697,14 @@ void Compiler::fgComputePreds()
fgDispBasicBlocks();
printf("\n");
}
+
+ // Check that the block numbers are increasing order.
+ unsigned lastBBnum = fgFirstBB->bbNum;
+ for (BasicBlock* block = fgFirstBB->bbNext; block != nullptr; block = block->bbNext)
+ {
+ assert(lastBBnum < block->bbNum);
+ lastBBnum = block->bbNum;
+ }
#endif // DEBUG
// Reset everything pred related
diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp
index b73fb634718..d0256fb9514 100644
--- a/src/coreclr/jit/fgopt.cpp
+++ b/src/coreclr/jit/fgopt.cpp
@@ -174,7 +174,7 @@ bool Compiler::fgReachable(BasicBlock* b1, BasicBlock* b2)
// Arguments:
// computeDoms -- `true` if we should recompute dominators
//
-void Compiler::fgUpdateChangedFlowGraph(bool computeDoms)
+void Compiler::fgUpdateChangedFlowGraph(const bool computePreds, const bool computeDoms)
{
// We need to clear this so we don't hit an assert calling fgRenumberBlocks().
fgDomsComputed = false;
@@ -182,7 +182,10 @@ void Compiler::fgUpdateChangedFlowGraph(bool computeDoms)
JITDUMP("\nRenumbering the basic blocks for fgUpdateChangeFlowGraph\n");
fgRenumberBlocks();
- fgComputePreds();
+ if (computePreds) // This condition is only here until all phases don't require it.
+ {
+ fgComputePreds();
+ }
fgComputeEnterBlocksSet();
fgComputeReachabilitySets();
if (computeDoms)
diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp
index 046f939772b..5c24e7725d3 100644
--- a/src/coreclr/jit/flowgraph.cpp
+++ b/src/coreclr/jit/flowgraph.cpp
@@ -190,8 +190,9 @@ PhaseStatus Compiler::fgInsertGCPolls()
{
noway_assert(opts.OptimizationEnabled());
fgReorderBlocks();
- constexpr bool computeDoms = false;
- fgUpdateChangedFlowGraph(computeDoms);
+ constexpr bool computePreds = true;
+ constexpr bool computeDoms = false;
+ fgUpdateChangedFlowGraph(computePreds, computeDoms);
}
#ifdef DEBUG
if (verbose)
diff --git a/src/coreclr/jit/jithashtable.h b/src/coreclr/jit/jithashtable.h
index 131f804a112..3713c4a1919 100644
--- a/src/coreclr/jit/jithashtable.h
+++ b/src/coreclr/jit/jithashtable.h
@@ -471,7 +471,7 @@ private:
public:
//------------------------------------------------------------------------
- // CheckGrowth: Replace the bucket table with a larger one and copy all nodes
+ // Reallocate: Replace the bucket table with a larger one and copy all nodes
// from the existing bucket table.
//
// Notes:
diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp
index 67c26e26c4e..daf459faca5 100644
--- a/src/coreclr/jit/optimizer.cpp
+++ b/src/coreclr/jit/optimizer.cpp
@@ -2555,7 +2555,8 @@ NO_MORE_LOOPS:
}
if (mod)
{
- fgUpdateChangedFlowGraph();
+ constexpr bool computePreds = true;
+ fgUpdateChangedFlowGraph(computePreds);
}
#ifdef DEBUG
@@ -2606,20 +2607,26 @@ void Compiler::optIdentifyLoopsForAlignment()
#endif
}
-void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
+void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, const bool addPreds)
{
BasicBlock* newJumpDest = nullptr;
switch (blk->bbJumpKind)
{
case BBJ_THROW:
case BBJ_RETURN:
- case BBJ_NONE:
case BBJ_EHFILTERRET:
case BBJ_EHFINALLYRET:
case BBJ_EHCATCHRET:
// These have no jump destination to update.
break;
+ case BBJ_NONE:
+ if (addPreds)
+ {
+ fgAddRefPred(blk->bbNext, blk);
+ }
+ break;
+
case BBJ_ALWAYS:
case BBJ_LEAVE:
case BBJ_CALLFINALLY:
@@ -2629,6 +2636,15 @@ void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
{
blk->bbJumpDest = newJumpDest;
}
+ if (addPreds)
+ {
+ fgAddRefPred(blk->bbJumpDest, blk);
+ if (blk->bbJumpKind == BBJ_COND)
+ {
+ // Add a pred for the fall-through path.
+ fgAddRefPred(blk->bbNext, blk);
+ }
+ }
break;
case BBJ_SWITCH:
@@ -2636,11 +2652,17 @@ void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap)
bool redirected = false;
for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++)
{
- if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest))
+ BasicBlock* switchDest = blk->bbJumpSwt->bbsDstTab[i];
+ if (redirectMap->Lookup(switchDest, &newJumpDest))
{
+ switchDest = newJumpDest;
blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
redirected = true;
}
+ if (addPreds)
+ {
+ fgAddRefPred(switchDest, blk);
+ }
}
// If any redirections happend, invalidate the switch table map for the switch.
if (redirected)
@@ -3959,7 +3981,8 @@ void Compiler::optUnrollLoops()
if (change)
{
- fgUpdateChangedFlowGraph();
+ constexpr bool computePreds = true;
+ fgUpdateChangedFlowGraph(computePreds);
}
#ifdef DEBUG
@@ -5481,7 +5504,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
// We're going to transform this loop:
//
- // H --> E
+ // H --> E (or, H conditionally branches around the loop and has fall-through to F == T == E)
// F
// T
// E
@@ -5507,60 +5530,96 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS)
{
// Make a new block to be the unique entry to the loop.
- assert(h->bbJumpKind == BBJ_COND && h->bbNext == loop.lpEntry);
+ JITDUMP("Create new unique single-successor entry to loop\n");
+ assert((h->bbJumpKind == BBJ_COND) && (h->bbNext == loop.lpEntry));
BasicBlock* newH = fgNewBBafter(BBJ_NONE, h, /*extendRegion*/ true);
- newH->bbWeight = newH->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight;
+ JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", newH->bbNum, h->bbNum);
+ newH->bbWeight = newH->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight;
BlockSetOps::Assign(this, newH->bbReach, h->bbReach);
// This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
newH->bbNatLoopNum = ambientLoop;
- h = newH;
- optUpdateLoopHead(loopInd, loop.lpHead, h);
+ optUpdateLoopHead(loopInd, h, newH);
+
+ fgAddRefPred(newH, h); // Add h->newH pred edge
+ JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", h->bbNum, newH->bbNum);
+ fgReplacePred(newH->bbNext, h, newH); // Replace pred in COND fall-through block.
+ JITDUMP("Replace " FMT_BB " -> " FMT_BB " with " FMT_BB " -> " FMT_BB "\n", h->bbNum, newH->bbNext->bbNum,
+ newH->bbNum, newH->bbNext->bbNum);
+
+ h = newH;
}
+ assert(h == loop.lpHead);
- // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.)
+ // Make X2 after B, if necessary. (Not necessary if B is a BBJ_ALWAYS.)
// "newPred" will be the predecessor of the blocks of the cloned loop.
BasicBlock* b = loop.lpBottom;
BasicBlock* newPred = b;
if (b->bbJumpKind != BBJ_ALWAYS)
{
+ assert(b->bbJumpKind == BBJ_COND);
+
BasicBlock* x = b->bbNext;
if (x != nullptr)
{
+ JITDUMP("Create branch around cloned loop\n");
BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true);
- x2->bbWeight = x2->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight;
+ JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", x2->bbNum, b->bbNum);
+ x2->bbWeight = x2->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight;
// This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
x2->bbNatLoopNum = ambientLoop;
x2->bbJumpDest = x;
BlockSetOps::Assign(this, x2->bbReach, h->bbReach);
+
+ fgAddRefPred(x2, b); // Add b->x2 pred edge
+ JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", b->bbNum, x2->bbNum);
+ fgReplacePred(x, b, x2); // The pred of x is now x2, not the fall-through of COND b.
+ JITDUMP("Replace " FMT_BB " -> " FMT_BB " with " FMT_BB " -> " FMT_BB "\n", b->bbNum, x->bbNum, x2->bbNum,
+ x->bbNum);
+
newPred = x2;
}
}
// Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while,
// so that "h" already falls through to "e" (e == t == f).
+ // It might look like this code is unreachable, since "h" must be a BBJ_ALWAYS, but
+ // later we will change "h" to a BBJ_COND along with a set of loop conditions.
+ // TODO: it still might be unreachable, since cloning currently is restricted to "do-while" loop forms.
BasicBlock* h2 = nullptr;
- if (loop.lpHead->bbNext != loop.lpEntry)
+ if (h->bbNext != loop.lpEntry)
{
- BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, loop.lpHead, /*extendRegion*/ true);
- h2->bbWeight = h2->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight;
+ assert(h->bbJumpKind == BBJ_ALWAYS);
+ JITDUMP("Create branch to entry of optimized loop\n");
+ BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
+ JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", h2->bbNum, h->bbNum);
+ h2->bbWeight = h2->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight;
// This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning.
h2->bbNatLoopNum = ambientLoop;
h2->bbJumpDest = loop.lpEntry;
- optUpdateLoopHead(loopInd, loop.lpHead, h2);
+
+ fgAddRefPred(h2, h); // Add h->h2 pred edge
+ JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", h->bbNum, h2->bbNum);
+ fgReplacePred(loop.lpEntry, h, h2);
+ JITDUMP("Replace " FMT_BB " -> " FMT_BB " with " FMT_BB " -> " FMT_BB "\n", h->bbNum, loop.lpEntry->bbNum,
+ h2->bbNum, loop.lpEntry->bbNum);
+
+ optUpdateLoopHead(loopInd, h, h2);
+
+ // NOTE: 'h' is no longer the loop head; 'h2' is!
}
// Now we'll clone the blocks of the loop body.
BasicBlock* newFirst = nullptr;
- BasicBlock* newBot = nullptr;
BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone));
for (BasicBlock* blk = loop.lpFirst; blk != loop.lpBottom->bbNext; blk = blk->bbNext)
{
BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred, /*extendRegion*/ true);
+ JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, newPred->bbNum);
// Call CloneBlockState to make a copy of the block's statements (and attributes), and assert that it
// has a return value indicating success, because optCanOptimizeByLoopCloningVisitor has already
@@ -5568,6 +5627,10 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
bool cloneOk = BasicBlock::CloneBlockState(this, newBlk, blk);
noway_assert(cloneOk);
+ // We're going to create the preds below, which will set the bbRefs properly,
+ // so clear out the cloned bbRefs field.
+ newBlk->bbRefs = 0;
+
#if FEATURE_LOOP_ALIGN
// If the original loop is aligned, do not align the cloned loop because cloned loop will be executed in
// rare scenario. Additionally, having to align cloned loop will force us to disable some VEX prefix encoding
@@ -5588,7 +5651,6 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
{
newFirst = newBlk;
}
- newBot = newBlk; // Continually overwrite to make sure we get the last one.
newPred = newBlk;
blockMap->Set(blk, newBlk);
}
@@ -5596,7 +5658,8 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
// Perform the static optimizations on the fast path.
optPerformStaticOptimizations(loopInd, context DEBUGARG(true));
- // Now go through the new blocks, remapping their jump targets within the loop.
+ // Now go through the new blocks, remapping their jump targets within the loop
+ // and updating the preds lists.
for (BasicBlock* blk = loop.lpFirst; blk != loop.lpBottom->bbNext; blk = blk->bbNext)
{
BasicBlock* newblk = nullptr;
@@ -5609,17 +5672,28 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
optCopyBlkDest(blk, newblk);
// Now redirect the new block according to "blockMap".
- optRedirectBlock(newblk, blockMap);
- }
+ const bool addPreds = true;
+ optRedirectBlock(newblk, blockMap, addPreds);
- assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == loop.lpEntry)) ||
- (h->bbJumpKind == BBJ_ALWAYS));
-
- // If all the conditions are true, go to E2.
- BasicBlock* e2 = nullptr;
- bool foundIt = blockMap->Lookup(loop.lpEntry, &e2);
+ blk->ensurePredListOrder(this);
+ }
- h->bbJumpKind = BBJ_COND;
+#ifdef DEBUG
+ // Display the preds for the new blocks, after all the new blocks have been redirected.
+ JITDUMP("Preds after loop copy:\n");
+ for (BasicBlock* blk = loop.lpFirst; blk != loop.lpBottom->bbNext; blk = blk->bbNext)
+ {
+ BasicBlock* newblk = nullptr;
+ bool b = blockMap->Lookup(blk, &newblk);
+ assert(b && newblk != nullptr);
+ JITDUMP(FMT_BB ":", newblk->bbNum);
+ for (flowList* pred = newblk->bbPreds; pred != nullptr; pred = pred->flNext)
+ {
+ JITDUMP(" " FMT_BB, pred->getBlock()->bbNum);
+ }
+ JITDUMP("\n");
+ }
+#endif // DEBUG
// We will create the following structure
//
@@ -5634,15 +5708,55 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
// We should always have block conditions, at the minimum, the array should be deref-able
assert(context->HasBlockConditions(loopInd));
+ if (h->bbJumpKind == BBJ_NONE)
+ {
+ assert((h->bbNext == h2) || (h->bbNext == loop.lpEntry));
+ }
+ else
+ {
+ assert(h->bbJumpKind == BBJ_ALWAYS);
+ assert(h->bbJumpDest == loop.lpEntry);
+ }
+
+ // If all the conditions are true, go to E2.
+ BasicBlock* e2 = nullptr;
+ bool foundIt = blockMap->Lookup(loop.lpEntry, &e2);
+
+ // We're going to replace the fall-through path from "h".
+ if (h->bbJumpKind == BBJ_NONE)
+ {
+ fgRemoveRefPred(h->bbNext, h);
+ }
+
// Create a unique header for the slow path.
- BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true);
+ JITDUMP("Create unique head block for slow path loop\n");
+ BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true);
+ JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", slowHead->bbNum, h->bbNum);
slowHead->bbWeight = h->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight;
slowHead->bbNatLoopNum = ambientLoop;
slowHead->bbJumpDest = e2;
+ fgAddRefPred(slowHead, h);
+ JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", h->bbNum, slowHead->bbNum);
+
+ // This is the only predecessor to the copied loop, and it hasn't been added yet.
+ fgAddRefPred(slowHead->bbJumpDest, slowHead);
+ JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", slowHead->bbNum, slowHead->bbJumpDest->bbNum);
+
+ // "h" is now going to be a COND block
+ h->bbJumpKind = BBJ_COND;
+
BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead);
condLast->bbJumpDest = slowHead;
+ JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", condLast->bbNum, condLast->bbJumpDest->bbNum);
+ fgAddRefPred(condLast->bbJumpDest, condLast);
+
+ // Add the fall-through path pred.
+ assert(condLast->bbJumpKind == BBJ_COND);
+ JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", condLast->bbNum, condLast->bbNext->bbNum);
+ fgAddRefPred(condLast->bbNext, condLast);
+
// If h2 is present it is already the head or replace 'h' by 'condLast'.
if (h2 == nullptr)
{
@@ -5656,7 +5770,8 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
// loop to take.
loop.lpFlags |= LPFLG_DONT_UNROLL;
- fgUpdateChangedFlowGraph();
+ constexpr bool computePreds = false;
+ fgUpdateChangedFlowGraph(computePreds);
}
//--------------------------------------------------------------------------------------------------
@@ -5668,8 +5783,8 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
// head loop head for "loopNum"
// slowHead the slow path loop head
//
-// Return Values:
-// None.
+// Return Value:
+// The last conditional block inserted.
//
// Operation:
// Create the following structure.
@@ -5686,6 +5801,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context)
// h2/entry (fast)
//
// Insert condition 0 in 'h' and create other condition blocks and insert conditions in them.
+// On entry, block 'h' is a conditional block, but its bbJumpDest hasn't yet been set.
//
BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
unsigned loopNum,
@@ -5702,12 +5818,22 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
bool isHeaderBlock = (curCond == head);
// Flip the condition if header block.
- context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock);
+ context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, /*reverse*/ isHeaderBlock);
// Create each condition block ensuring wiring between them.
- BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true);
+ BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, /*extendRegion*/ true);
curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead;
- curCond = tmp;
+
+ JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", curCond->bbNum, curCond->bbJumpDest->bbNum);
+ fgAddRefPred(curCond->bbJumpDest, curCond);
+
+ if (!isHeaderBlock)
+ {
+ JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", curCond->bbNum, tmp->bbNum);
+ fgAddRefPred(tmp, curCond);
+ }
+
+ curCond = tmp;
curCond->inheritWeight(head);
curCond->bbNatLoopNum = head->bbNatLoopNum;
@@ -5715,7 +5841,7 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context,
}
// Finally insert cloning conditions after all deref conditions have been inserted.
- context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false);
+ context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, /*reverse*/ false);
return curCond;
}