diff options
author | Will Smith <lol.tihan@gmail.com> | 2022-05-12 21:09:20 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-05-12 21:09:20 +0300 |
commit | 2cc7feae42909f0c54ae118a990c29c48aa9c8fc (patch) | |
tree | 565b88373579b42a521c7e6af2fb3125069d3d88 /src/coreclr | |
parent | 844b29856a32dd178214cc57dc2df60974c10fa4 (diff) |
ARM64 - Consolidate 'msub' and 'madd' logic (#68363)
* Changed GT_MADD to not emit msub
* Minor changes
* Minor changes
* Minor changes
* Minor changes
* Minor changes
* Minor changes
* Minor changes
* Minor changes
* Minor changes
* Minor changes
* Added break
* Added lowering pass
* Updates to lowering
* Removing morph transformation
* Minor format
* Removed GT_MSUB. Using simple containment to emit msub.
* Removed GT_MSUB case
* Removed extra code
* Updated comment
* Formatting
* Minor change
* Combining GT_MADD changes
* Added extra set flag checks
* Added extra set flag checks
* Creating helper functions
* Fixing build
* Fixing build
* Fixing build and better codegen
* Should produce zero diffs
* Formatting
* Update lowerarmarch.cpp
Diffstat (limited to 'src/coreclr')
-rw-r--r-- | src/coreclr/jit/codegen.h | 2 | ||||
-rw-r--r-- | src/coreclr/jit/codegenarm64.cpp | 132 | ||||
-rw-r--r-- | src/coreclr/jit/codegenarmarch.cpp | 8 | ||||
-rw-r--r-- | src/coreclr/jit/gtlist.h | 4 | ||||
-rw-r--r-- | src/coreclr/jit/lower.cpp | 17 | ||||
-rw-r--r-- | src/coreclr/jit/lower.h | 5 | ||||
-rw-r--r-- | src/coreclr/jit/lowerarmarch.cpp | 206 |
7 files changed, 225 insertions, 149 deletions
diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 4c0c78a3a03..eecfe4d2d05 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1379,8 +1379,6 @@ protected: #endif #if defined(TARGET_ARM64) void genCodeForJumpCompare(GenTreeOp* tree); - void genCodeForMadd(GenTreeOp* tree); - void genCodeForMsub(GenTreeOp* tree); void genCodeForBfiz(GenTreeOp* tree); void genCodeForAddEx(GenTreeOp* tree); void genCodeForCond(GenTreeOp* tree); diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 5c1f8c64f15..1b0f9780cb2 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2377,20 +2377,60 @@ void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) // Generate code for ADD, SUB, MUL, DIV, UDIV, AND, AND_NOT, OR and XOR // This method is expected to have called genConsumeOperands() before calling it. -void CodeGen::genCodeForBinary(GenTreeOp* treeNode) +void CodeGen::genCodeForBinary(GenTreeOp* tree) { - const genTreeOps oper = treeNode->OperGet(); - regNumber targetReg = treeNode->GetRegNum(); - var_types targetType = treeNode->TypeGet(); + const genTreeOps oper = tree->OperGet(); + regNumber targetReg = tree->GetRegNum(); + var_types targetType = tree->TypeGet(); emitter* emit = GetEmitter(); - assert(treeNode->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_DIV, GT_UDIV, GT_AND, GT_AND_NOT, GT_OR, GT_XOR)); + assert(tree->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_DIV, GT_UDIV, GT_AND, GT_AND_NOT, GT_OR, GT_XOR)); - GenTree* op1 = treeNode->gtGetOp1(); - GenTree* op2 = treeNode->gtGetOp2(); - instruction ins = genGetInsForOper(treeNode->OperGet(), targetType); + GenTree* op1 = tree->gtGetOp1(); + GenTree* op2 = tree->gtGetOp2(); - if ((treeNode->gtFlags & GTF_SET_FLAGS) != 0) + // Handles combined operations: 'madd', 'msub' + if (op2->OperIs(GT_MUL) && op2->isContained()) + { + // In the future, we might consider enabling this for floating-point "unsafe" math. + assert(varTypeIsIntegral(tree)); + assert(!(tree->gtFlags & GTF_SET_FLAGS)); + + GenTree* a = op1; + GenTree* b = op2->gtGetOp1(); + GenTree* c = op2->gtGetOp2(); + + instruction ins; + switch (oper) + { + case GT_ADD: + { + // d = a + b * c + // madd: d, b, c, a + ins = INS_madd; + break; + } + + case GT_SUB: + { + // d = a - b * c + // msub: d, b, c, a + ins = INS_msub; + break; + } + + default: + unreached(); + } + + emit->emitIns_R_R_R_R(ins, emitActualTypeSize(tree), targetReg, b->GetRegNum(), c->GetRegNum(), a->GetRegNum()); + genProduceReg(tree); + return; + } + + instruction ins = genGetInsForOper(tree->OperGet(), targetType); + + if ((tree->gtFlags & GTF_SET_FLAGS) != 0) { switch (oper) { @@ -2414,10 +2454,10 @@ void CodeGen::genCodeForBinary(GenTreeOp* treeNode) // The arithmetic node must be sitting in a register (since it's not contained) assert(targetReg != REG_NA); - regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(treeNode), treeNode, op1, op2); + regNumber r = emit->emitInsTernary(ins, emitActualTypeSize(tree), tree, op1, op2); assert(r == targetReg); - genProduceReg(treeNode); + genProduceReg(tree); } //------------------------------------------------------------------------ @@ -10062,76 +10102,6 @@ void CodeGen::instGen_MemoryBarrier(BarrierKind barrierKind) } } -//----------------------------------------------------------------------------------- -// genCodeForMadd: Emit a madd (Multiply-Add) instruction -// -// Arguments: -// tree - GT_MADD tree where op1 or op2 is GT_ADD -// -void CodeGen::genCodeForMadd(GenTreeOp* tree) -{ - assert(tree->OperIs(GT_MADD) && varTypeIsIntegral(tree) && !(tree->gtFlags & GTF_SET_FLAGS)); - genConsumeOperands(tree); - - GenTree* a; - GenTree* b; - GenTree* c; - if (tree->gtGetOp1()->OperIs(GT_MUL) && tree->gtGetOp1()->isContained()) - { - a = tree->gtGetOp1()->gtGetOp1(); - b = tree->gtGetOp1()->gtGetOp2(); - c = tree->gtGetOp2(); - } - else - { - assert(tree->gtGetOp2()->OperIs(GT_MUL) && tree->gtGetOp2()->isContained()); - a = tree->gtGetOp2()->gtGetOp1(); - b = tree->gtGetOp2()->gtGetOp2(); - c = tree->gtGetOp1(); - } - - bool useMsub = false; - if (a->OperIs(GT_NEG) && a->isContained()) - { - a = a->gtGetOp1(); - useMsub = true; - } - if (b->OperIs(GT_NEG) && b->isContained()) - { - b = b->gtGetOp1(); - useMsub = !useMsub; // it's either "a * -b" or "-a * -b" which is the same as "a * b" - } - - GetEmitter()->emitIns_R_R_R_R(useMsub ? INS_msub : INS_madd, emitActualTypeSize(tree), tree->GetRegNum(), - a->GetRegNum(), b->GetRegNum(), c->GetRegNum()); - genProduceReg(tree); -} - -//----------------------------------------------------------------------------------- -// genCodeForMsub: Emit a msub (Multiply-Subtract) instruction -// -// Arguments: -// tree - GT_MSUB tree where op2 is GT_MUL -// -void CodeGen::genCodeForMsub(GenTreeOp* tree) -{ - assert(tree->OperIs(GT_MSUB) && varTypeIsIntegral(tree) && !(tree->gtFlags & GTF_SET_FLAGS)); - genConsumeOperands(tree); - - assert(tree->gtGetOp2()->OperIs(GT_MUL)); - assert(tree->gtGetOp2()->isContained()); - - GenTree* a = tree->gtGetOp1(); - GenTree* b = tree->gtGetOp2()->gtGetOp1(); - GenTree* c = tree->gtGetOp2()->gtGetOp2(); - - // d = a - b * c - // MSUB d, b, c, a - GetEmitter()->emitIns_R_R_R_R(INS_msub, emitActualTypeSize(tree), tree->GetRegNum(), b->GetRegNum(), c->GetRegNum(), - a->GetRegNum()); - genProduceReg(tree); -} - //------------------------------------------------------------------------ // genCodeForBfiz: Generates the code sequence for a GenTree node that // represents a bitfield insert in zero with sign/zero extension. diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index 4905aba6e43..9f8ee65ee97 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -302,14 +302,6 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode) break; #ifdef TARGET_ARM64 - case GT_MADD: - genCodeForMadd(treeNode->AsOp()); - break; - - case GT_MSUB: - genCodeForMsub(treeNode->AsOp()); - break; - case GT_INC_SATURATE: genCodeForIncSaturate(treeNode); break; diff --git a/src/coreclr/jit/gtlist.h b/src/coreclr/jit/gtlist.h index d0aceb1ba07..0322afcb626 100644 --- a/src/coreclr/jit/gtlist.h +++ b/src/coreclr/jit/gtlist.h @@ -217,10 +217,6 @@ GTNODE(MUL_LONG , GenTreeOp ,1,GTK_BINOP|DBK_NOTHIR) GTNODE(AND_NOT , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) #ifdef TARGET_ARM64 -GTNODE(MADD , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Generates the Multiply-Add instruction. In the future, we might consider - // enabling it for both armarch and xarch for floating-point MADD "unsafe" math. -GTNODE(MSUB , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Generates the Multiply-Subtract instruction. In the future, we might consider - // enabling it for both armarch and xarch for floating-point MSUB "unsafe" math. GTNODE(ADDEX, GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Add with sign/zero extension. GTNODE(BFIZ , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Bitfield Insert in Zero. GTNODE(CSNEG_MI , GenTreeOp ,0,GTK_BINOP|DBK_NOTHIR) // Conditional select, negate, minus result diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 8c3450988e3..fb2c4c79fec 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -3010,7 +3010,10 @@ GenTree* Lowering::OptimizeConstCompare(GenTree* cmp) #endif // FEATURE_HW_INTRINSICS ) #else // TARGET_ARM64 - op1->OperIs(GT_AND, GT_ADD, GT_SUB) + op1->OperIs(GT_AND, GT_ADD, GT_SUB) && + // This happens in order to emit ARM64 'madd' and 'msub' instructions. + // We cannot combine 'adds'/'subs' and 'mul'. + !(op1->gtGetOp2()->OperIs(GT_MUL) && op1->gtGetOp2()->isContained()) #endif ) { @@ -5443,10 +5446,22 @@ GenTree* Lowering::LowerAdd(GenTreeOp* node) #endif // TARGET_XARCH } +#ifdef TARGET_ARM64 + if (node->OperIs(GT_ADD)) + { + GenTree* next = LowerAddForPossibleContainment(node); + if (next != nullptr) + { + return next; + } + } +#endif // TARGET_ARM64 + if (node->OperIs(GT_ADD)) { ContainCheckBinary(node); } + return nullptr; } diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 7ad3f6202d6..9a8160ac96e 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -355,6 +355,7 @@ private: bool IsValidConstForMovImm(GenTreeHWIntrinsic* node); void LowerHWIntrinsicFusedMultiplyAddScalar(GenTreeHWIntrinsic* node); GenTree* LowerModPow2(GenTree* node); + GenTree* LowerAddForPossibleContainment(GenTreeOp* node); #endif // !TARGET_XARCH && !TARGET_ARM64 union VectorConstant { @@ -572,6 +573,10 @@ public: return m_lsra->isContainableMemoryOp(node); } +#ifdef TARGET_ARM64 + bool IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) const; +#endif // TARGET_ARM64 + #ifdef FEATURE_HW_INTRINSICS // Tries to get a containable node for a given HWIntrinsic bool TryGetContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index b6e06898b64..f42ae8a1b50 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -145,6 +145,58 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const return false; } +#ifdef TARGET_ARM64 +//------------------------------------------------------------------------ +// IsContainableBinaryOp: Is the child node a binary op that is containable from the parent node? +// +// Return Value: +// True if the child node can be contained. +// +// Notes: +// This can handle the decision to emit 'madd' or 'msub'. +// +bool Lowering::IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) const +{ + if (parentNode->isContained()) + return false; + + if (!varTypeIsIntegral(parentNode)) + return false; + + if (parentNode->gtFlags & GTF_SET_FLAGS) + return false; + + GenTree* op1 = parentNode->gtGetOp1(); + GenTree* op2 = parentNode->gtGetOp2(); + + if (op2 != childNode) + return false; + + if (op1->isContained() || op2->isContained()) + return false; + + if (!varTypeIsIntegral(op2)) + return false; + + if (op2->gtFlags & GTF_SET_FLAGS) + return false; + + // Find "a + b * c" or "a - b * c". + if (parentNode->OperIs(GT_ADD, GT_SUB) && op2->OperIs(GT_MUL)) + { + if (parentNode->gtOverflow()) + return false; + + if (op2->gtOverflow()) + return false; + + return !op2->gtGetOp1()->isContained() && !op2->gtGetOp2()->isContained(); + } + + return false; +} +#endif // TARGET_ARM64 + //------------------------------------------------------------------------ // LowerStoreLoc: Lower a store of a lclVar // @@ -697,6 +749,105 @@ GenTree* Lowering::LowerModPow2(GenTree* node) return cc->gtNext; } + +//------------------------------------------------------------------------ +// LowerAddForPossibleContainment: Tries to lower GT_ADD in such a way +// that would allow one of its operands +// to be contained. +// +// Arguments: +// node - the node to lower +// +GenTree* Lowering::LowerAddForPossibleContainment(GenTreeOp* node) +{ + assert(node->OperIs(GT_ADD)); + + if (!comp->opts.OptimizationEnabled()) + return nullptr; + + if (node->isContained()) + return nullptr; + + if (!varTypeIsIntegral(node)) + return nullptr; + + if (node->gtFlags & GTF_SET_FLAGS) + return nullptr; + + if (node->gtOverflow()) + return nullptr; + + GenTree* op1 = node->gtGetOp1(); + GenTree* op2 = node->gtGetOp2(); + + // If the second operand is a containable immediate, + // then we do not want to risk moving it around + // in this transformation. + if (IsContainableImmed(node, op2)) + return nullptr; + + GenTree* mul = nullptr; + GenTree* c = nullptr; + if (op1->OperIs(GT_MUL)) + { + // Swap + mul = op1; + c = op2; + } + else + { + mul = op2; + c = op1; + } + + if (mul->OperIs(GT_MUL) && !(mul->gtFlags & GTF_SET_FLAGS) && varTypeIsIntegral(mul) && !mul->gtOverflow() && + !mul->isContained() && !c->isContained()) + { + GenTree* a = mul->gtGetOp1(); + GenTree* b = mul->gtGetOp2(); + + // Transform "-a * b + c" to "c - a * b" + if (a->OperIs(GT_NEG) && !(a->gtFlags & GTF_SET_FLAGS) && !b->OperIs(GT_NEG) && !a->isContained() && + !a->gtGetOp1()->isContained()) + { + mul->AsOp()->gtOp1 = a->gtGetOp1(); + BlockRange().Remove(a); + node->gtOp1 = c; + node->gtOp2 = mul; + node->ChangeOper(GT_SUB); + + ContainCheckNode(node); + + return node->gtNext; + } + // Transform "a * -b + c" to "c - a * b" + else if (b->OperIs(GT_NEG) && !(b->gtFlags & GTF_SET_FLAGS) && !a->OperIs(GT_NEG) && !b->isContained() && + !b->gtGetOp1()->isContained()) + { + mul->AsOp()->gtOp2 = b->gtGetOp1(); + BlockRange().Remove(b); + node->gtOp1 = c; + node->gtOp2 = mul; + node->ChangeOper(GT_SUB); + + ContainCheckNode(node); + + return node->gtNext; + } + // Transform "a * b + c" to "c + a * b" + else if (op1->OperIs(GT_MUL)) + { + node->gtOp1 = c; + node->gtOp2 = mul; + + ContainCheckNode(node); + + return node->gtNext; + } + } + + return nullptr; +} #endif #ifdef FEATURE_HW_INTRINSICS @@ -1698,60 +1849,9 @@ void Lowering::ContainCheckBinary(GenTreeOp* node) CheckImmedAndMakeContained(node, op2); #ifdef TARGET_ARM64 - if (comp->opts.OptimizationEnabled() && varTypeIsIntegral(node) && !node->isContained()) + if (comp->opts.OptimizationEnabled() && IsContainableBinaryOp(node, op2)) { - // Find "a * b + c" or "c + a * b" in order to emit MADD/MSUB - if (node->OperIs(GT_ADD) && !node->gtOverflow() && (op1->OperIs(GT_MUL) || op2->OperIs(GT_MUL))) - { - GenTree* mul; - GenTree* c; - if (op1->OperIs(GT_MUL)) - { - mul = op1; - c = op2; - } - else - { - mul = op2; - c = op1; - } - - GenTree* a = mul->gtGetOp1(); - GenTree* b = mul->gtGetOp2(); - - if (!mul->isContained() && !mul->gtOverflow() && !a->isContained() && !b->isContained() && - !c->isContained() && varTypeIsIntegral(mul)) - { - if (a->OperIs(GT_NEG) && !a->gtGetOp1()->isContained() && !a->gtGetOp1()->IsRegOptional()) - { - // "-a * b + c" to MSUB - MakeSrcContained(mul, a); - } - if (b->OperIs(GT_NEG) && !b->gtGetOp1()->isContained()) - { - // "a * -b + c" to MSUB - MakeSrcContained(mul, b); - } - // If both 'a' and 'b' are GT_NEG - MADD will be emitted. - - node->ChangeOper(GT_MADD); - MakeSrcContained(node, mul); - } - } - // Find "a - b * c" in order to emit MSUB - else if (node->OperIs(GT_SUB) && !node->gtOverflow() && op2->OperIs(GT_MUL) && !op2->isContained() && - !op2->gtOverflow() && varTypeIsIntegral(op2)) - { - GenTree* a = op1; - GenTree* b = op2->gtGetOp1(); - GenTree* c = op2->gtGetOp2(); - - if (!a->isContained() && !b->isContained() && !c->isContained()) - { - node->ChangeOper(GT_MSUB); - MakeSrcContained(node, op2); - } - } + MakeSrcContained(node, op2); } // Change ADD TO ADDEX for ADD(X, CAST(Y)) or ADD(CAST(X), Y) where CAST is int->long |