Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/dotnet/runtime.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Smith <lol.tihan@gmail.com>2022-05-12 21:09:20 +0300
committerGitHub <noreply@github.com>2022-05-12 21:09:20 +0300
commit2cc7feae42909f0c54ae118a990c29c48aa9c8fc (patch)
tree565b88373579b42a521c7e6af2fb3125069d3d88 /src/coreclr
parent844b29856a32dd178214cc57dc2df60974c10fa4 (diff)
ARM64 - Consolidate 'msub' and 'madd' logic (#68363)
* Changed GT_MADD to not emit msub * Minor changes * Minor changes * Minor changes * Minor changes * Minor changes * Minor changes * Minor changes * Minor changes * Minor changes * Minor changes * Added break * Added lowering pass * Updates to lowering * Removing morph transformation * Minor format * Removed GT_MSUB. Using simple containment to emit msub. * Removed GT_MSUB case * Removed extra code * Updated comment * Formatting * Minor change * Combining GT_MADD changes * Added extra set flag checks * Added extra set flag checks * Creating helper functions * Fixing build * Fixing build * Fixing build and better codegen * Should produce zero diffs * Formatting * Update lowerarmarch.cpp
Diffstat (limited to 'src/coreclr')
-rw-r--r--src/coreclr/jit/codegen.h2
-rw-r--r--src/coreclr/jit/codegenarm64.cpp132
-rw-r--r--src/coreclr/jit/codegenarmarch.cpp8
-rw-r--r--src/coreclr/jit/gtlist.h4
-rw-r--r--src/coreclr/jit/lower.cpp17
-rw-r--r--src/coreclr/jit/lower.h5
-rw-r--r--src/coreclr/jit/lowerarmarch.cpp206
7 files changed, 225 insertions, 149 deletions
diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h
index 4c0c78a3a03..eecfe4d2d05 100644
--- a/src/coreclr/jit/codegen.h
+++ b/src/coreclr/jit/codegen.h
@@ -1379,8 +1379,6 @@ protected:
#endif
#if defined(TARGET_ARM64)
void genCodeForJumpCompare(GenTreeOp* tree);
- void genCodeForMadd(GenTreeOp* tree);
- void genCodeForMsub(GenTreeOp* tree);
void genCodeForBfiz(GenTreeOp* tree);
void genCodeForAddEx(GenTreeOp* tree);
void genCodeForCond(GenTreeOp* tree);
diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp
index 5c1f8c64f15..1b0f9780cb2 100644
--- a/src/coreclr/jit/codegenarm64.cpp
+++ b/src/coreclr/jit/codegenarm64.cpp
@@ -2377,20 +2377,60 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode)
// Generate code for ADD, SUB, MUL, DIV, UDIV, AND, AND_NOT, OR and XOR
// This method is expected to have called genConsumeOperands() before calling it.
-void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
+void CodeGen::genCodeForBinary(GenTreeOp* tree)
{
- const genTreeOps oper = treeNode->OperGet();
- regNumber targetReg = treeNode->GetRegNum();
- var_types targetType = treeNode->TypeGet();
+ const genTreeOps oper = tree->OperGet();
+ regNumber targetReg = tree->GetRegNum();
+ var_types targetType = tree->TypeGet();
emitter* emit = GetEmitter();
- assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_DIV, GT_UDIV, GT_AND, GT_AND_NOT, GT_OR, GT_XOR));
+ assert(tree->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_DIV, GT_UDIV, GT_AND, GT_AND_NOT, GT_OR, GT_XOR));
- GenTree* op1 = treeNode->gtGetOp1();
- GenTree* op2 = treeNode->gtGetOp2();
- instruction ins = genGetInsForOper(treeNode->OperGet(), targetType);
+ GenTree* op1 = tree->gtGetOp1();
+ GenTree* op2 = tree->gtGetOp2();
- if ((treeNode->gtFlags & GTF_SET_FLAGS) != 0)
+ // Handles combined operations: 'madd', 'msub'
+ if (op2->OperIs(GT_MUL) && op2->isContained())
+ {
+ // In the future, we might consider enabling this for floating-point "unsafe" math.
+ assert(varTypeIsIntegral(tree));
+ assert(!(tree->gtFlags & GTF_SET_FLAGS));
+
+ GenTree* a = op1;
+ GenTree* b = op2->gtGetOp1();
+ GenTree* c = op2->gtGetOp2();
+
+ instruction ins;
+ switch (oper)
+ {
+ case GT_ADD:
+ {
+ // d = a + b * c
+ // madd: d, b, c, a
+ ins = INS_madd;
+ break;
+ }
+
+ case GT_SUB:
+ {
+ // d = a - b * c
+ // msub: d, b, c, a
+ ins = INS_msub;
+ break;
+ }
+
+ default:
+ unreached();
+ }
+
+ emit->emitIns_R_R_R_R(ins, emitActualTypeSize(tree), targetReg, b->GetRegNum(), c->GetRegNum(), a->GetRegNum());
+ genProduceReg(tree);
+ return;
+ }
+
+ instruction ins = genGetInsForOper(tree->OperGet(), targetType);
+
+ if ((tree->gtFlags & GTF_SET_FLAGS) != 0)
{
switch (oper)
{
@@ -2414,10 +2454,10 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode)
// The arithmetic node must be sitting in a register (since it's not contained)
assert(targetReg != REG_NA);
- regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2);
+ regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(tree), tree, op1, op2);
assert(r == targetReg);
- genProduceReg(treeNode);
+ genProduceReg(tree);
}
//------------------------------------------------------------------------
@@ -10062,76 +10102,6 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind)
}
}
-//-----------------------------------------------------------------------------------
-// genCodeForMadd: Emit a madd (Multiply-Add) instruction
-//
-// Arguments:
-// tree - GT_MADD tree where op1 or op2 is GT_ADD
-//
-void CodeGen::genCodeForMadd(GenTreeOp* tree)
-{
- assert(tree->OperIs(GT_MADD) && varTypeIsIntegral(tree) && !(tree->gtFlags & GTF_SET_FLAGS));
- genConsumeOperands(tree);
-
- GenTree* a;
- GenTree* b;
- GenTree* c;
- if (tree->gtGetOp1()->OperIs(GT_MUL) && tree->gtGetOp1()->isContained())
- {
- a = tree->gtGetOp1()->gtGetOp1();
- b = tree->gtGetOp1()->gtGetOp2();
- c = tree->gtGetOp2();
- }
- else
- {
- assert(tree->gtGetOp2()->OperIs(GT_MUL) && tree->gtGetOp2()->isContained());
- a = tree->gtGetOp2()->gtGetOp1();
- b = tree->gtGetOp2()->gtGetOp2();
- c = tree->gtGetOp1();
- }
-
- bool useMsub = false;
- if (a->OperIs(GT_NEG) && a->isContained())
- {
- a = a->gtGetOp1();
- useMsub = true;
- }
- if (b->OperIs(GT_NEG) && b->isContained())
- {
- b = b->gtGetOp1();
- useMsub = !useMsub; // it's either "a * -b" or "-a * -b" which is the same as "a * b"
- }
-
- GetEmitter()->emitIns_R_R_R_R(useMsub ? INS_msub : INS_madd, emitActualTypeSize(tree), tree->GetRegNum(),
- a->GetRegNum(), b->GetRegNum(), c->GetRegNum());
- genProduceReg(tree);
-}
-
-//-----------------------------------------------------------------------------------
-// genCodeForMsub: Emit a msub (Multiply-Subtract) instruction
-//
-// Arguments:
-// tree - GT_MSUB tree where op2 is GT_MUL
-//
-void CodeGen::genCodeForMsub(GenTreeOp* tree)
-{
- assert(tree->OperIs(GT_MSUB) && varTypeIsIntegral(tree) && !(tree->gtFlags & GTF_SET_FLAGS));
- genConsumeOperands(tree);
-
- assert(tree->gtGetOp2()->OperIs(GT_MUL));
- assert(tree->gtGetOp2()->isContained());
-
- GenTree* a = tree->gtGetOp1();
- GenTree* b = tree->gtGetOp2()->gtGetOp1();
- GenTree* c = tree->gtGetOp2()->gtGetOp2();
-
- // d = a - b * c
- // MSUB d, b, c, a
- GetEmitter()->emitIns_R_R_R_R(INS_msub, emitActualTypeSize(tree), tree->GetRegNum(), b->GetRegNum(), c->GetRegNum(),
- a->GetRegNum());
- genProduceReg(tree);
-}
-
//------------------------------------------------------------------------
// genCodeForBfiz: Generates the code sequence for a GenTree node that
// represents a bitfield insert in zero with sign/zero extension.
diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp
index 4905aba6e43..9f8ee65ee97 100644
--- a/src/coreclr/jit/codegenarmarch.cpp
+++ b/src/coreclr/jit/codegenarmarch.cpp
@@ -302,14 +302,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)
break;
#ifdef TARGET_ARM64
- case GT_MADD:
- genCodeForMadd(treeNode->AsOp());
- break;
-
- case GT_MSUB:
- genCodeForMsub(treeNode->AsOp());
- break;
-
case GT_INC_SATURATE:
genCodeForIncSaturate(treeNode);
break;
diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h
index d0aceb1ba07..0322afcb626 100644
--- a/src/coreclr/jit/gtlist.h
+++ b/src/coreclr/jit/gtlist.h
@@ -217,10 +217,6 @@ GTNODE(MUL_LONG , GenTreeOp ,1,GTK_BINOP|DBK_NOTHIR)
GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR)
#ifdef TARGET_ARM64
-GTNODE(MADD , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Generates the Multiply-Add instruction. In the future, we might consider
- // enabling it for both armarch and xarch for floating-point MADD "unsafe" math.
-GTNODE(MSUB , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Generates the Multiply-Subtract instruction. In the future, we might consider
- // enabling it for both armarch and xarch for floating-point MSUB "unsafe" math.
GTNODE(ADDEX, GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Add with sign/zero extension.
GTNODE(BFIZ , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Bitfield Insert in Zero.
GTNODE(CSNEG_MI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Conditional select, negate, minus result
diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp
index 8c3450988e3..fb2c4c79fec 100644
--- a/src/coreclr/jit/lower.cpp
+++ b/src/coreclr/jit/lower.cpp
@@ -3010,7 +3010,10 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp)
#endif // FEATURE_HW_INTRINSICS
)
#else // TARGET_ARM64
- op1->OperIs(GT_AND, GT_ADD, GT_SUB)
+ op1->OperIs(GT_AND, GT_ADD, GT_SUB) &&
+ // This happens in order to emit ARM64 'madd' and 'msub' instructions.
+ // We cannot combine 'adds'/'subs' and 'mul'.
+ !(op1->gtGetOp2()->OperIs(GT_MUL) && op1->gtGetOp2()->isContained())
#endif
)
{
@@ -5443,10 +5446,22 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node)
#endif // TARGET_XARCH
}
+#ifdef TARGET_ARM64
+ if (node->OperIs(GT_ADD))
+ {
+ GenTree* next = LowerAddForPossibleContainment(node);
+ if (next != nullptr)
+ {
+ return next;
+ }
+ }
+#endif // TARGET_ARM64
+
if (node->OperIs(GT_ADD))
{
ContainCheckBinary(node);
}
+
return nullptr;
}
diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h
index 7ad3f6202d6..9a8160ac96e 100644
--- a/src/coreclr/jit/lower.h
+++ b/src/coreclr/jit/lower.h
@@ -355,6 +355,7 @@ private:
bool IsValidConstForMovImm(GenTreeHWIntrinsic* node);
void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node);
GenTree* LowerModPow2(GenTree* node);
+ GenTree* LowerAddForPossibleContainment(GenTreeOp* node);
#endif // !TARGET_XARCH && !TARGET_ARM64
union VectorConstant {
@@ -572,6 +573,10 @@ public:
return m_lsra->isContainableMemoryOp(node);
}
+#ifdef TARGET_ARM64
+ bool IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) const;
+#endif // TARGET_ARM64
+
#ifdef FEATURE_HW_INTRINSICS
// Tries to get a containable node for a given HWIntrinsic
bool TryGetContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode,
diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp
index b6e06898b64..f42ae8a1b50 100644
--- a/src/coreclr/jit/lowerarmarch.cpp
+++ b/src/coreclr/jit/lowerarmarch.cpp
@@ -145,6 +145,58 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const
return false;
}
+#ifdef TARGET_ARM64
+//------------------------------------------------------------------------
+// IsContainableBinaryOp: Is the child node a binary op that is containable from the parent node?
+//
+// Return Value:
+// True if the child node can be contained.
+//
+// Notes:
+// This can handle the decision to emit 'madd' or 'msub'.
+//
+bool Lowering::IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) const
+{
+ if (parentNode->isContained())
+ return false;
+
+ if (!varTypeIsIntegral(parentNode))
+ return false;
+
+ if (parentNode->gtFlags & GTF_SET_FLAGS)
+ return false;
+
+ GenTree* op1 = parentNode->gtGetOp1();
+ GenTree* op2 = parentNode->gtGetOp2();
+
+ if (op2 != childNode)
+ return false;
+
+ if (op1->isContained() || op2->isContained())
+ return false;
+
+ if (!varTypeIsIntegral(op2))
+ return false;
+
+ if (op2->gtFlags & GTF_SET_FLAGS)
+ return false;
+
+ // Find "a + b * c" or "a - b * c".
+ if (parentNode->OperIs(GT_ADD, GT_SUB) && op2->OperIs(GT_MUL))
+ {
+ if (parentNode->gtOverflow())
+ return false;
+
+ if (op2->gtOverflow())
+ return false;
+
+ return !op2->gtGetOp1()->isContained() && !op2->gtGetOp2()->isContained();
+ }
+
+ return false;
+}
+#endif // TARGET_ARM64
+
//------------------------------------------------------------------------
// LowerStoreLoc: Lower a store of a lclVar
//
@@ -697,6 +749,105 @@ GenTree* Lowering::LowerModPow2(GenTree* node)
return cc->gtNext;
}
+
+//------------------------------------------------------------------------
+// LowerAddForPossibleContainment: Tries to lower GT_ADD in such a way
+// that would allow one of its operands
+// to be contained.
+//
+// Arguments:
+// node - the node to lower
+//
+GenTree* Lowering::LowerAddForPossibleContainment(GenTreeOp* node)
+{
+ assert(node->OperIs(GT_ADD));
+
+ if (!comp->opts.OptimizationEnabled())
+ return nullptr;
+
+ if (node->isContained())
+ return nullptr;
+
+ if (!varTypeIsIntegral(node))
+ return nullptr;
+
+ if (node->gtFlags & GTF_SET_FLAGS)
+ return nullptr;
+
+ if (node->gtOverflow())
+ return nullptr;
+
+ GenTree* op1 = node->gtGetOp1();
+ GenTree* op2 = node->gtGetOp2();
+
+ // If the second operand is a containable immediate,
+ // then we do not want to risk moving it around
+ // in this transformation.
+ if (IsContainableImmed(node, op2))
+ return nullptr;
+
+ GenTree* mul = nullptr;
+ GenTree* c = nullptr;
+ if (op1->OperIs(GT_MUL))
+ {
+ // Swap
+ mul = op1;
+ c = op2;
+ }
+ else
+ {
+ mul = op2;
+ c = op1;
+ }
+
+ if (mul->OperIs(GT_MUL) && !(mul->gtFlags & GTF_SET_FLAGS) && varTypeIsIntegral(mul) && !mul->gtOverflow() &&
+ !mul->isContained() && !c->isContained())
+ {
+ GenTree* a = mul->gtGetOp1();
+ GenTree* b = mul->gtGetOp2();
+
+ // Transform "-a * b + c" to "c - a * b"
+ if (a->OperIs(GT_NEG) && !(a->gtFlags & GTF_SET_FLAGS) && !b->OperIs(GT_NEG) && !a->isContained() &&
+ !a->gtGetOp1()->isContained())
+ {
+ mul->AsOp()->gtOp1 = a->gtGetOp1();
+ BlockRange().Remove(a);
+ node->gtOp1 = c;
+ node->gtOp2 = mul;
+ node->ChangeOper(GT_SUB);
+
+ ContainCheckNode(node);
+
+ return node->gtNext;
+ }
+ // Transform "a * -b + c" to "c - a * b"
+ else if (b->OperIs(GT_NEG) && !(b->gtFlags & GTF_SET_FLAGS) && !a->OperIs(GT_NEG) && !b->isContained() &&
+ !b->gtGetOp1()->isContained())
+ {
+ mul->AsOp()->gtOp2 = b->gtGetOp1();
+ BlockRange().Remove(b);
+ node->gtOp1 = c;
+ node->gtOp2 = mul;
+ node->ChangeOper(GT_SUB);
+
+ ContainCheckNode(node);
+
+ return node->gtNext;
+ }
+ // Transform "a * b + c" to "c + a * b"
+ else if (op1->OperIs(GT_MUL))
+ {
+ node->gtOp1 = c;
+ node->gtOp2 = mul;
+
+ ContainCheckNode(node);
+
+ return node->gtNext;
+ }
+ }
+
+ return nullptr;
+}
#endif
#ifdef FEATURE_HW_INTRINSICS
@@ -1698,60 +1849,9 @@ void Lowering::ContainCheckBinary(GenTreeOp* node)
CheckImmedAndMakeContained(node, op2);
#ifdef TARGET_ARM64
- if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(node) && !node->isContained())
+ if (comp->opts.OptimizationEnabled() && IsContainableBinaryOp(node, op2))
{
- // Find "a * b + c" or "c + a * b" in order to emit MADD/MSUB
- if (node->OperIs(GT_ADD) && !node->gtOverflow() && (op1->OperIs(GT_MUL) || op2->OperIs(GT_MUL)))
- {
- GenTree* mul;
- GenTree* c;
- if (op1->OperIs(GT_MUL))
- {
- mul = op1;
- c = op2;
- }
- else
- {
- mul = op2;
- c = op1;
- }
-
- GenTree* a = mul->gtGetOp1();
- GenTree* b = mul->gtGetOp2();
-
- if (!mul->isContained() && !mul->gtOverflow() && !a->isContained() && !b->isContained() &&
- !c->isContained() && varTypeIsIntegral(mul))
- {
- if (a->OperIs(GT_NEG) && !a->gtGetOp1()->isContained() && !a->gtGetOp1()->IsRegOptional())
- {
- // "-a * b + c" to MSUB
- MakeSrcContained(mul, a);
- }
- if (b->OperIs(GT_NEG) && !b->gtGetOp1()->isContained())
- {
- // "a * -b + c" to MSUB
- MakeSrcContained(mul, b);
- }
- // If both 'a' and 'b' are GT_NEG - MADD will be emitted.
-
- node->ChangeOper(GT_MADD);
- MakeSrcContained(node, mul);
- }
- }
- // Find "a - b * c" in order to emit MSUB
- else if (node->OperIs(GT_SUB) && !node->gtOverflow() && op2->OperIs(GT_MUL) && !op2->isContained() &&
- !op2->gtOverflow() && varTypeIsIntegral(op2))
- {
- GenTree* a = op1;
- GenTree* b = op2->gtGetOp1();
- GenTree* c = op2->gtGetOp2();
-
- if (!a->isContained() && !b->isContained() && !c->isContained())
- {
- node->ChangeOper(GT_MSUB);
- MakeSrcContained(node, op2);
- }
- }
+ MakeSrcContained(node, op2);
}
// Change ADD TO ADDEX for ADD(X, CAST(Y)) or ADD(CAST(X), Y) where CAST is int->long