diff options
author | Egor Bogatov <egorbo@gmail.com> | 2022-11-10 15:01:52 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-11-10 15:01:52 +0300 |
commit | 21700ffcb647081207aff9cba00eb59f976c061e (patch) | |
tree | 5111fdd8f199d7cebbc0f709235f30acdeef084d | |
parent | 6b372fb0671b18276c8f55ae010da95e631795ed (diff) |
Optimize some SIMD comparisons with Max/Min trick (#78027)
-rw-r--r-- | src/coreclr/jit/gentree.cpp | 74 |
1 files changed, 74 insertions, 0 deletions
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index cc166cad4a2..28f5a7c1ce2 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -20023,6 +20023,28 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, if (intrinsic == NI_Illegal) { + // If we don't have an intrinsic set for this, try "Max(op1, op2) == op1" + // NOTE: technically, we can special case byte type to only require SSE2, but it + // complicates the test matrix for little gains. + if (((simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX2)) || + ((simdSize == 16) && compOpportunisticallyDependsOn(InstructionSet_SSE41))) + { + // TODO-AVX512: We can use this trick for longs only with AVX-512 + if (!varTypeIsLong(simdBaseType)) + { + assert(!varTypeIsFloating(simdBaseType)); + GenTree* op1Dup; + op1 = impCloneExpr(op1, &op1Dup, clsHnd, CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op1 for vector GreaterThanOrEqual")); + + // EQ(Max(op1, op2), op1) + GenTree* maxNode = + gtNewSimdMaxNode(type, op1, op2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + return gtNewSimdCmpOpNode(GT_EQ, type, maxNode, op1Dup, simdBaseJitType, simdSize, + isSimdAsHWIntrinsic); + } + } + // There is no direct support for doing a combined comparison and equality for integral types. // These have to be implemented by performing both halves and combining their results. // @@ -20249,6 +20271,28 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op, if (intrinsic == NI_Illegal) { + // If we don't have an intrinsic set for this, try "Min(op1, op2) == op1" + // NOTE: technically, we can special case byte type to only require SSE2, but it + // complicates the test matrix for little gains. + if (((simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX2)) || + ((simdSize == 16) && compOpportunisticallyDependsOn(InstructionSet_SSE41))) + { + // TODO-AVX512: We can use this trick for longs only with AVX-512 + if (!varTypeIsLong(simdBaseType)) + { + assert(!varTypeIsFloating(simdBaseType)); + GenTree* op1Dup; + op1 = impCloneExpr(op1, &op1Dup, clsHnd, CHECK_SPILL_ALL, + nullptr DEBUGARG("Clone op1 for vector LessThanOrEqual")); + + // EQ(Min(op1, op2), op1) + GenTree* minNode = + gtNewSimdMinNode(type, op1, op2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic); + return gtNewSimdCmpOpNode(GT_EQ, type, minNode, op1Dup, simdBaseJitType, simdSize, + isSimdAsHWIntrinsic); + } + } + // There is no direct support for doing a combined comparison and equality for integral types. // These have to be implemented by performing both halves and combining their results. // @@ -21082,6 +21126,12 @@ GenTree* Compiler::gtNewSimdMaxNode(var_types type, case TYP_BYTE: case TYP_USHORT: { + if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + intrinsic = NI_SSE41_Max; + break; + } + GenTree* constVal = nullptr; CorInfoType opJitType = simdBaseJitType; var_types opType = simdBaseType; @@ -21156,6 +21206,15 @@ GenTree* Compiler::gtNewSimdMaxNode(var_types type, case TYP_INT: case TYP_UINT: + { + if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + intrinsic = NI_SSE41_Max; + break; + } + break; + } + case TYP_LONG: case TYP_ULONG: { @@ -21266,6 +21325,12 @@ GenTree* Compiler::gtNewSimdMinNode(var_types type, case TYP_BYTE: case TYP_USHORT: { + if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + intrinsic = NI_SSE41_Min; + break; + } + GenTree* constVal = nullptr; CorInfoType opJitType = simdBaseJitType; var_types opType = simdBaseType; @@ -21340,6 +21405,15 @@ GenTree* Compiler::gtNewSimdMinNode(var_types type, case TYP_INT: case TYP_UINT: + { + if (compOpportunisticallyDependsOn(InstructionSet_SSE41)) + { + intrinsic = NI_SSE41_Min; + break; + } + break; + } + case TYP_LONG: case TYP_ULONG: { |