Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/dotnet/runtime.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEgor Bogatov <egorbo@gmail.com>2022-11-10 15:01:52 +0300
committerGitHub <noreply@github.com>2022-11-10 15:01:52 +0300
commit21700ffcb647081207aff9cba00eb59f976c061e (patch)
tree5111fdd8f199d7cebbc0f709235f30acdeef084d
parent6b372fb0671b18276c8f55ae010da95e631795ed (diff)
Optimize some SIMD comparisons with Max/Min trick (#78027)
-rw-r--r--src/coreclr/jit/gentree.cpp74
1 files changed, 74 insertions, 0 deletions
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index cc166cad4a2..28f5a7c1ce2 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -20023,6 +20023,28 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op,
if (intrinsic == NI_Illegal)
{
+ // If we don't have an intrinsic set for this, try "Max(op1, op2) == op1"
+ // NOTE: technically, we can special case byte type to only require SSE2, but it
+ // complicates the test matrix for little gains.
+ if (((simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX2)) ||
+ ((simdSize == 16) && compOpportunisticallyDependsOn(InstructionSet_SSE41)))
+ {
+ // TODO-AVX512: We can use this trick for longs only with AVX-512
+ if (!varTypeIsLong(simdBaseType))
+ {
+ assert(!varTypeIsFloating(simdBaseType));
+ GenTree* op1Dup;
+ op1 = impCloneExpr(op1, &op1Dup, clsHnd, CHECK_SPILL_ALL,
+ nullptr DEBUGARG("Clone op1 for vector GreaterThanOrEqual"));
+
+ // EQ(Max(op1, op2), op1)
+ GenTree* maxNode =
+ gtNewSimdMaxNode(type, op1, op2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+ return gtNewSimdCmpOpNode(GT_EQ, type, maxNode, op1Dup, simdBaseJitType, simdSize,
+ isSimdAsHWIntrinsic);
+ }
+ }
+
// There is no direct support for doing a combined comparison and equality for integral types.
// These have to be implemented by performing both halves and combining their results.
//
@@ -20249,6 +20271,28 @@ GenTree* Compiler::gtNewSimdCmpOpNode(genTreeOps op,
if (intrinsic == NI_Illegal)
{
+ // If we don't have an intrinsic set for this, try "Min(op1, op2) == op1"
+ // NOTE: technically, we can special case byte type to only require SSE2, but it
+ // complicates the test matrix for little gains.
+ if (((simdSize == 32) && compOpportunisticallyDependsOn(InstructionSet_AVX2)) ||
+ ((simdSize == 16) && compOpportunisticallyDependsOn(InstructionSet_SSE41)))
+ {
+ // TODO-AVX512: We can use this trick for longs only with AVX-512
+ if (!varTypeIsLong(simdBaseType))
+ {
+ assert(!varTypeIsFloating(simdBaseType));
+ GenTree* op1Dup;
+ op1 = impCloneExpr(op1, &op1Dup, clsHnd, CHECK_SPILL_ALL,
+ nullptr DEBUGARG("Clone op1 for vector LessThanOrEqual"));
+
+ // EQ(Min(op1, op2), op1)
+ GenTree* minNode =
+ gtNewSimdMinNode(type, op1, op2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+ return gtNewSimdCmpOpNode(GT_EQ, type, minNode, op1Dup, simdBaseJitType, simdSize,
+ isSimdAsHWIntrinsic);
+ }
+ }
+
// There is no direct support for doing a combined comparison and equality for integral types.
// These have to be implemented by performing both halves and combining their results.
//
@@ -21082,6 +21126,12 @@ GenTree* Compiler::gtNewSimdMaxNode(var_types type,
case TYP_BYTE:
case TYP_USHORT:
{
+ if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
+ {
+ intrinsic = NI_SSE41_Max;
+ break;
+ }
+
GenTree* constVal = nullptr;
CorInfoType opJitType = simdBaseJitType;
var_types opType = simdBaseType;
@@ -21156,6 +21206,15 @@ GenTree* Compiler::gtNewSimdMaxNode(var_types type,
case TYP_INT:
case TYP_UINT:
+ {
+ if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
+ {
+ intrinsic = NI_SSE41_Max;
+ break;
+ }
+ break;
+ }
+
case TYP_LONG:
case TYP_ULONG:
{
@@ -21266,6 +21325,12 @@ GenTree* Compiler::gtNewSimdMinNode(var_types type,
case TYP_BYTE:
case TYP_USHORT:
{
+ if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
+ {
+ intrinsic = NI_SSE41_Min;
+ break;
+ }
+
GenTree* constVal = nullptr;
CorInfoType opJitType = simdBaseJitType;
var_types opType = simdBaseType;
@@ -21340,6 +21405,15 @@ GenTree* Compiler::gtNewSimdMinNode(var_types type,
case TYP_INT:
case TYP_UINT:
+ {
+ if (compOpportunisticallyDependsOn(InstructionSet_SSE41))
+ {
+ intrinsic = NI_SSE41_Min;
+ break;
+ }
+ break;
+ }
+
case TYP_LONG:
case TYP_ULONG:
{