diff options
Diffstat (limited to 'src/coreclr/jit/lowerarmarch.cpp')
-rw-r--r-- | src/coreclr/jit/lowerarmarch.cpp | 304 |
1 files changed, 232 insertions, 72 deletions
diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index cd9bd3532c9..1eae6c6dbc0 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -158,40 +158,187 @@ bool Lowering::IsContainableImmed(GenTree* parentNode, GenTree* childNode) const // bool Lowering::IsContainableBinaryOp(GenTree* parentNode, GenTree* childNode) const { + // The node we're checking should be one of the two child nodes + assert((parentNode->gtGetOp1() == childNode) || (parentNode->gtGetOp2() == childNode)); + + // We cannot contain if the parent node + // * is contained + // * is not operating on an integer + // * is already marking a child node as contained + // * is required to throw on overflow + if (parentNode->isContained()) return false; if (!varTypeIsIntegral(parentNode)) return false; - if (parentNode->gtFlags & GTF_SET_FLAGS) + if (parentNode->gtGetOp1()->isContained() || parentNode->gtGetOp2()->isContained()) + return false; + + if (parentNode->OperMayOverflow() && parentNode->gtOverflow()) return false; - GenTree* op1 = parentNode->gtGetOp1(); - GenTree* op2 = parentNode->gtGetOp2(); + // We cannot contain if the child node: + // * is not operating on an integer + // * is required to set a flag + // * is required to throw on overflow - if (op2 != childNode) + if (!varTypeIsIntegral(childNode)) return false; - if (op1->isContained() || op2->isContained()) + if ((childNode->gtFlags & GTF_SET_FLAGS) != 0) return false; - if (!varTypeIsIntegral(op2)) + if (childNode->OperMayOverflow() && childNode->gtOverflow()) return false; - if (op2->gtFlags & GTF_SET_FLAGS) + GenTree* matchedOp = nullptr; + + if (childNode->OperIs(GT_MUL)) + { + if (childNode->gtGetOp1()->isContained() || childNode->gtGetOp2()->isContained()) + { + // Cannot contain if either of the childs operands is already contained + return false; + } + + if ((parentNode->gtFlags & GTF_SET_FLAGS) != 0) + { + // Cannot contain if the parent operation needs to set flags + return false; + } + + if (parentNode->OperIs(GT_ADD)) + { + // Find "c + (a * b)" or "(a * b) + c" + return IsSafeToContainMem(parentNode, childNode); + } + + if (parentNode->OperIs(GT_SUB)) + { + // Find "c - (a * b)" + assert(childNode == parentNode->gtGetOp2()); + return IsSafeToContainMem(parentNode, childNode); + } + + // TODO: Handle mneg return false; + } - // Find "a + b * c" or "a - b * c". - if (parentNode->OperIs(GT_ADD, GT_SUB) && op2->OperIs(GT_MUL)) + if (childNode->OperIs(GT_LSH, GT_RSH, GT_RSZ)) { - if (parentNode->gtOverflow()) + // Find "a op (b shift cns)" + + if (childNode->gtGetOp1()->isContained()) + { + // Cannot contain if the childs op1 is already contained + return false; + } + + GenTree* shiftAmountNode = childNode->gtGetOp2(); + + if (!shiftAmountNode->IsCnsIntOrI()) + { + // Cannot contain if the childs op2 is not a constant + return false; + } + + const ssize_t shiftAmount = shiftAmountNode->AsIntCon()->IconValue(); + const ssize_t maxShift = (static_cast<ssize_t>(genTypeSize(parentNode)) * BITS_IN_BYTE) - 1; + + if ((shiftAmount < 0x01) || (shiftAmount > maxShift)) + { + // Cannot contain if the shift amount is less than 1 or greater than maxShift return false; + } + + if (parentNode->OperIs(GT_ADD, GT_SUB, GT_AND)) + { + // These operations can still report flags - if (op2->gtOverflow()) + if (IsSafeToContainMem(parentNode, childNode)) + { + assert(shiftAmountNode->isContained()); + return true; + } + } + + if ((parentNode->gtFlags & GTF_SET_FLAGS) != 0) + { + // Cannot contain if the parent operation needs to set flags return false; + } - return !op2->gtGetOp1()->isContained() && !op2->gtGetOp2()->isContained(); + if (parentNode->OperIs(GT_CMP, GT_OR, GT_XOR)) + { + if (IsSafeToContainMem(parentNode, childNode)) + { + assert(shiftAmountNode->isContained()); + return true; + } + } + + // TODO: Handle CMN, NEG/NEGS, BIC/BICS, EON, MVN, ORN, TST + return false; + } + + if (childNode->OperIs(GT_CAST)) + { + // Find "a op cast(b)" + GenTree* castOp = childNode->AsCast()->CastOp(); + + // We want to prefer the combined op here over containment of the cast op + castOp->ClearContained(); + + bool isSupportedCast = false; + + if (varTypeIsSmall(childNode->CastToType())) + { + // The JIT doesn't track upcasts from small types, instead most types + // are tracked as TYP_INT and then we get explicit downcasts to the + // desired small type instead. + + assert(!varTypeIsFloating(castOp)); + isSupportedCast = true; + } + else if (childNode->TypeIs(TYP_LONG) && genActualTypeIsInt(castOp)) + { + // We can handle "INT -> LONG", "INT -> ULONG", "UINT -> LONG", and "UINT -> ULONG" + isSupportedCast = true; + } + + if (!isSupportedCast) + { + return false; + } + + if (parentNode->OperIs(GT_ADD, GT_SUB)) + { + // These operations can still report flags + + if (IsSafeToContainMem(parentNode, childNode)) + { + return true; + } + } + + if ((parentNode->gtFlags & GTF_SET_FLAGS) != 0) + { + // Cannot contain if the parent operation needs to set flags + return false; + } + + if (parentNode->OperIs(GT_CMP)) + { + if (IsSafeToContainMem(parentNode, childNode)) + { + return true; + } + } + + // TODO: Handle CMN + return false; } return false; @@ -1088,13 +1235,13 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm GenTree* op1 = node->Op(1); GenTree* op2 = node->Op(2); - // Optimize comparison against Vector64/128<>.Zero via UMAX: + // Optimize comparison against Vector64/128<>.Zero via UMAXV: // // bool eq = v == Vector128<integer>.Zero // // to: // - // bool eq = AdvSimd.Arm64.MaxAcross(v.AsUInt16()).ToScalar() == 0; + // bool eq = AdvSimd.Arm64.MaxPairwise(v.AsUInt16(), v.AsUInt16()).GetElement(0) == 0; // GenTree* op = nullptr; GenTree* opZero = nullptr; @@ -1109,20 +1256,36 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm opZero = op2; } - if (!varTypeIsFloating(simdBaseType) && (op != nullptr)) + // Special case: "vec ==/!= zero_vector" + if (!varTypeIsFloating(simdBaseType) && (op != nullptr) && (simdSize != 12)) { - // Use USHORT for V64 and UINT for V128 due to better latency/TP on some CPUs - CorInfoType maxType = (simdSize == 8) ? CORINFO_TYPE_USHORT : CORINFO_TYPE_UINT; - GenTree* cmp = comp->gtNewSimdHWIntrinsicNode(simdType, op, NI_AdvSimd_Arm64_MaxAcross, maxType, simdSize); - BlockRange().InsertBefore(node, cmp); - LowerNode(cmp); + GenTree* cmp = op; + if (simdSize != 8) // we don't need compression for Vector64 + { + node->Op(1) = op; + LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); + ReplaceWithLclVar(tmp1Use); + op = node->Op(1); + GenTree* opClone = comp->gtClone(op); + BlockRange().InsertAfter(op, opClone); + + cmp = comp->gtNewSimdHWIntrinsicNode(simdType, op, opClone, NI_AdvSimd_Arm64_MaxPairwise, CORINFO_TYPE_UINT, + simdSize); + BlockRange().InsertBefore(node, cmp); + LowerNode(cmp); + } + BlockRange().Remove(opZero); - GenTree* val = comp->gtNewSimdHWIntrinsicNode(TYP_INT, cmp, NI_Vector128_ToScalar, CORINFO_TYPE_UINT, simdSize); - BlockRange().InsertAfter(cmp, val); + GenTree* zroCns = comp->gtNewIconNode(0, TYP_INT); + BlockRange().InsertAfter(cmp, zroCns); + + GenTree* val = + comp->gtNewSimdHWIntrinsicNode(TYP_LONG, cmp, zroCns, NI_AdvSimd_Extract, CORINFO_TYPE_ULONG, simdSize); + BlockRange().InsertAfter(zroCns, val); LowerNode(val); - GenTree* cmpZeroCns = comp->gtNewIconNode(0, TYP_INT); + GenTree* cmpZeroCns = comp->gtNewIconNode(0, TYP_LONG); BlockRange().InsertAfter(val, cmpZeroCns); node->ChangeOper(cmpOp); @@ -1190,34 +1353,49 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm cmp = tmp; } - GenTree* msk = - comp->gtNewSimdHWIntrinsicNode(simdType, cmp, NI_AdvSimd_Arm64_MinAcross, CORINFO_TYPE_UBYTE, simdSize); - BlockRange().InsertAfter(cmp, msk); - LowerNode(msk); + if (simdSize != 8) // we don't need compression for Vector64 + { + GenTree* msk; + + // Save cmp into a temp as we're going to need to pass it twice to MinPairwise + node->Op(1) = cmp; + LIR::Use tmp1Use(BlockRange(), &node->Op(1), node); + ReplaceWithLclVar(tmp1Use); + cmp = node->Op(1); + GenTree* cmpClone = comp->gtClone(cmp); + BlockRange().InsertAfter(cmp, cmpClone); + + msk = comp->gtNewSimdHWIntrinsicNode(simdType, cmp, cmpClone, NI_AdvSimd_Arm64_MinPairwise, CORINFO_TYPE_UINT, + simdSize); + BlockRange().InsertAfter(cmpClone, msk); + LowerNode(msk); + + cmp = msk; + } GenTree* zroCns = comp->gtNewIconNode(0, TYP_INT); - BlockRange().InsertAfter(msk, zroCns); + BlockRange().InsertAfter(cmp, zroCns); GenTree* val = - comp->gtNewSimdHWIntrinsicNode(TYP_UBYTE, msk, zroCns, NI_AdvSimd_Extract, CORINFO_TYPE_UBYTE, simdSize); + comp->gtNewSimdHWIntrinsicNode(TYP_LONG, cmp, zroCns, NI_AdvSimd_Extract, CORINFO_TYPE_ULONG, simdSize); BlockRange().InsertAfter(zroCns, val); LowerNode(val); - zroCns = comp->gtNewIconNode(0, TYP_INT); - BlockRange().InsertAfter(val, zroCns); + GenTree* bitMskCns = comp->gtNewIconNode(static_cast<ssize_t>(0xffffffffffffffff), TYP_LONG); + BlockRange().InsertAfter(val, bitMskCns); node->ChangeOper(cmpOp); - node->gtType = TYP_INT; + node->gtType = TYP_LONG; node->AsOp()->gtOp1 = val; - node->AsOp()->gtOp2 = zroCns; + node->AsOp()->gtOp2 = bitMskCns; // The CompareEqual will set (condition is true) or clear (condition is false) all bits of the respective element // The MinAcross then ensures we get either all bits set (all conditions are true) or clear (any condition is false) // So, we need to invert the condition from the operation since we compare against zero - GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::NE : GenCondition::EQ; - GenTree* cc = LowerNodeCC(node, cmpCnd); + GenCondition cmpCnd = (cmpOp == GT_EQ) ? GenCondition::EQ : GenCondition::NE; + LowerNodeCC(node, cmpCnd); node->gtType = TYP_VOID; node->ClearUnusedValue(); @@ -1281,7 +1459,7 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node) BlockRange().Remove(arg); } - GenTreeVecCon* vecCon = comp->gtNewVconNode(simdType, simdBaseJitType); + GenTreeVecCon* vecCon = comp->gtNewVconNode(simdType); vecCon->gtSimd32Val = simd32Val; BlockRange().InsertBefore(node, vecCon); @@ -1815,50 +1993,32 @@ void Lowering::ContainCheckBinary(GenTreeOp* node) GenTree* op1 = node->gtGetOp1(); GenTree* op2 = node->gtGetOp2(); - // Check and make op2 contained (if it is a containable immediate) - CheckImmedAndMakeContained(node, op2); - -#ifdef TARGET_ARM64 - if (comp->opts.OptimizationEnabled() && IsContainableBinaryOp(node, op2)) + if (CheckImmedAndMakeContained(node, op2)) { - MakeSrcContained(node, op2); + return; } - // Change ADD TO ADDEX for ADD(X, CAST(Y)) or ADD(CAST(X), Y) where CAST is int->long - // or for ADD(LSH(X, CNS), X) or ADD(X, LSH(X, CNS)) where CNS is in the (0..typeWidth) range - if (node->OperIs(GT_ADD) && !op1->isContained() && !op2->isContained() && varTypeIsIntegral(node) && - !node->gtOverflow()) + if (node->OperIsCommutative() && CheckImmedAndMakeContained(node, op1)) { - assert(!node->isContained()); + MakeSrcContained(node, op1); + std::swap(node->gtOp1, node->gtOp2); + return; + } - if (op1->OperIs(GT_CAST) || op2->OperIs(GT_CAST)) +#ifdef TARGET_ARM64 + if (comp->opts.OptimizationEnabled()) + { + if (IsContainableBinaryOp(node, op2)) { - GenTree* cast = op1->OperIs(GT_CAST) ? op1 : op2; - if (cast->gtGetOp1()->TypeIs(TYP_INT) && cast->TypeIs(TYP_LONG) && !cast->gtOverflow()) - { - node->ChangeOper(GT_ADDEX); - cast->AsCast()->CastOp()->ClearContained(); // Uncontain any memory operands. - MakeSrcContained(node, cast); - } + MakeSrcContained(node, op2); + return; } - else if (op1->OperIs(GT_LSH) || op2->OperIs(GT_LSH)) - { - GenTree* lsh = op1->OperIs(GT_LSH) ? op1 : op2; - GenTree* shiftBy = lsh->gtGetOp2(); - if (shiftBy->IsCnsIntOrI()) - { - const ssize_t shiftByCns = shiftBy->AsIntCon()->IconValue(); - const ssize_t maxShift = (ssize_t)genTypeSize(node) * BITS_IN_BYTE; - - if ((shiftByCns > 0) && (shiftByCns < maxShift)) - { - // shiftBy is small so it has to be contained at this point. - assert(shiftBy->isContained()); - node->ChangeOper(GT_ADDEX); - MakeSrcContained(node, lsh); - } - } + if (node->OperIsCommutative() && IsContainableBinaryOp(node, op1)) + { + MakeSrcContained(node, op1); + std::swap(node->gtOp1, node->gtOp2); + return; } } #endif |