Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/dotnet/runtime.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/coreclr/jit/lowerxarch.cpp')
-rw-r--r--src/coreclr/jit/lowerxarch.cpp207
1 files changed, 204 insertions, 3 deletions
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index 44d6781d081..40ccaebec0f 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -1023,6 +1023,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_Vector128_Create:
case NI_Vector256_Create:
+ case NI_Vector128_CreateScalar:
+ case NI_Vector256_CreateScalar:
{
// We don't directly support the Vector128.Create or Vector256.Create methods in codegen
// and instead lower them to other intrinsic nodes in LowerHWIntrinsicCreate so we expect
@@ -1705,8 +1707,9 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
GenTree* tmp2 = nullptr;
GenTree* tmp3 = nullptr;
- bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val);
- size_t argCnt = node->GetOperandCount();
+ bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val);
+ bool isCreateScalar = (intrinsicId == NI_Vector128_CreateScalar) || (intrinsicId == NI_Vector256_CreateScalar);
+ size_t argCnt = node->GetOperandCount();
if (isConstant)
{
@@ -1745,6 +1748,156 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
}
else if (argCnt == 1)
{
+ if (isCreateScalar)
+ {
+ switch (simdBaseType)
+ {
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ {
+ // Types need to be explicitly zero-extended to ensure upper-bits are zero
+ //
+ // We need to explicitly use TYP_UBYTE since unsigned is ignored for small types
+ // Explicitly handle both BYTE and UBYTE to account for reinterpret casts and the like
+ //
+ // The from type is INT since that is the input type tracked by IR, where-as the target
+ // type needs to be UBYTE so it implicitly zero-extends back to TYP_INT
+
+ tmp1 = comp->gtNewCastNode(TYP_INT, op1, /* unsigned */ true, TYP_UBYTE);
+ BlockRange().InsertAfter(op1, tmp1);
+ LowerNode(tmp1);
+
+ node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128Int32, tmp1);
+ node->SetSimdBaseJitType(CORINFO_TYPE_INT);
+ break;
+ }
+
+ case TYP_SHORT:
+ case TYP_USHORT:
+ {
+ // Types need to be explicitly zero-extended to ensure upper-bits are zero
+ //
+ // We need to explicitly use TYP_USHORT since unsigned is ignored for small types
+ // Explicitly handle both SHORT and USHORT to account for reinterpret casts and the like
+ //
+ // The from type is INT since that is the input type tracked by IR, where-as the target
+ // type needs to be USHORT so it implicitly zero-extends back to TYP_INT
+
+ tmp1 = comp->gtNewCastNode(TYP_INT, op1, /* unsigned */ true, TYP_USHORT);
+ BlockRange().InsertAfter(op1, tmp1);
+ LowerNode(tmp1);
+
+ node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128Int32, tmp1);
+ node->SetSimdBaseJitType(CORINFO_TYPE_INT);
+ break;
+ }
+
+ case TYP_INT:
+ {
+ node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128Int32);
+ break;
+ }
+
+ case TYP_UINT:
+ {
+ node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128UInt32);
+ break;
+ }
+
+#if defined(TARGET_AMD64)
+ case TYP_LONG:
+ {
+ node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertScalarToVector128Int64);
+ break;
+ }
+
+ case TYP_ULONG:
+ {
+ node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertScalarToVector128UInt64);
+ break;
+ }
+#endif // TARGET_AMD64
+
+ case TYP_FLOAT:
+ {
+ tmp1 = comp->gtNewZeroConNode(simdType);
+ BlockRange().InsertBefore(op1, tmp1);
+ LowerNode(tmp1);
+
+ if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
+ {
+ // Sse41.Insert has:
+ // * Bits 0-3: zmask
+ // * Bits 4-5: count_d
+ // * Bits 6-7: count_s (register form only)
+ //
+ // We want zmask 0b1110 (0xE) to zero elements 1/2/3
+ // We want count_d 0b00 (0x0) to insert the value to element 0
+ // We want count_s 0b00 (0x0) as we're just taking element 0 of the source
+
+ idx = comp->gtNewIconNode(0x0E);
+ BlockRange().InsertAfter(op1, idx);
+ LowerNode(idx);
+
+ node->ResetHWIntrinsicId(NI_SSE41_Insert, comp, tmp1, op1, idx);
+ }
+ else
+ {
+ node->ResetHWIntrinsicId(NI_SSE_MoveScalar, comp, tmp1, op1);
+ }
+ break;
+ }
+
+ case TYP_DOUBLE:
+ {
+ tmp1 = comp->gtNewZeroConNode(simdType);
+ BlockRange().InsertBefore(op1, tmp1);
+ LowerNode(tmp1);
+
+ node->ResetHWIntrinsicId(NI_SSE2_MoveScalar, comp, tmp1, op1);
+ break;
+ }
+
+ default:
+ {
+ unreached();
+ }
+ }
+
+ if (simdSize == 32)
+ {
+ // We're creating a Vector256 scalar so we need to treat the original op as Vector128,
+ // we need to unsafely extend up to Vector256 (which is actually safe since the 128-bit
+ // op will zero extend up to 256-bits), and then we need to replace the original use
+ // with the new TYP_SIMD32 node.
+
+ node->ChangeType(TYP_SIMD16);
+ node->SetSimdSize(16);
+ LowerNode(node);
+
+ tmp2 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, node, NI_Vector128_ToVector256Unsafe, simdBaseJitType,
+ 16);
+
+ LIR::Use use;
+ bool foundUse = BlockRange().TryGetUse(node, &use);
+ BlockRange().InsertAfter(node, tmp2);
+
+ if (foundUse)
+ {
+ use.ReplaceWith(tmp2);
+ }
+ else
+ {
+ node->ClearUnusedValue();
+ tmp2->SetUnusedValue();
+ }
+
+ node = tmp2->AsHWIntrinsic();
+ }
+
+ return LowerNode(node);
+ }
+
// We have the following (where simd is simd16 or simd32):
// /--* op1 T
// node = * HWINTRINSIC simd T Create
@@ -1822,6 +1975,8 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
return LowerNode(node);
}
+ assert(intrinsicId == NI_Vector128_Create);
+
// We will be constructing the following parts:
// /--* op1 T
// tmp1 = * HWINTRINSIC simd16 T CreateScalarUnsafe
@@ -6940,7 +7095,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_SSSE3_AlignRight:
case NI_SSE41_Blend:
case NI_SSE41_DotProduct:
- case NI_SSE41_Insert:
case NI_SSE41_X64_Insert:
case NI_SSE41_MultipleSumAbsoluteDifferences:
case NI_AVX_Blend:
@@ -6968,6 +7122,53 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}
+ case NI_SSE41_Insert:
+ {
+ GenTree* lastOp = node->Op(numArgs);
+
+ if ((simdBaseType == TYP_FLOAT) && lastOp->IsCnsIntOrI())
+ {
+ // Sse41.Insert has:
+ // * Bits 0-3: zmask
+ // * Bits 4-5: count_d
+ // * Bits 6-7: count_s (register form only)
+ //
+ // Where zmask specifies which elements to zero
+ // Where count_d specifies the destination index the value is being inserted to
+ // Where count_s specifies the source index of the value being inserted
+
+ ssize_t ival = lastOp->AsIntConCommon()->IconValue();
+
+ ssize_t zmask = (ival & 0x0F);
+ ssize_t count_d = (ival & 0x30) >> 4;
+ ssize_t count_s = (ival & 0xC0) >> 6;
+
+ if (op1->IsVectorZero())
+ {
+ // When op1 is zero, we can contain op1 and modify the mask
+ // to zero everything except for the element we're inserting to
+
+ MakeSrcContained(node, op1);
+
+ zmask |= ~(1 << count_d);
+ zmask &= 0x0F;
+
+ ival = (count_s << 6) | (count_d << 4) | (zmask);
+ lastOp->AsIntConCommon()->SetIconValue(ival);
+ }
+ }
+
+ if (TryGetContainableHWIntrinsicOp(node, &op2, &supportsRegOptional))
+ {
+ MakeSrcContained(node, op2);
+ }
+ else if (supportsRegOptional)
+ {
+ op2->SetRegOptional();
+ }
+ break;
+ }
+
default:
{
assert(!"Unhandled containment for ternary hardware intrinsic with immediate operand");