Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/dotnet/runtime.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTanner Gooding <tagoo@outlook.com>2022-11-11 06:49:10 +0300
committerGitHub <noreply@github.com>2022-11-11 06:49:10 +0300
commit00e6482544b435c66279ffd7abf43e9a7ead0236 (patch)
tree148a6610b9bd06d8064e03179a2a319c0a04feef
parent4820105ba85531da5b388e7b2e98fdcdae9ced07 (diff)
Updating CreateScalar to be intrinsic for Vector64/128/256 (#77798)
* Updating CreateScalar to be intrinsic for Vector64/128/256 * Applying formatting patch * Fixing CreateScalar VecCon nodes created on import and find use before insert * Applying formatting patch * Ensure we zero extend TYP_BYTE and TYP_SHORT * Ensure TYP_UBYTE and TYP_USHORT are also explicitly zero extended * Fix the cast and add a comment explaining "why" * Applying formatting patch
-rw-r--r--src/coreclr/jit/gentree.cpp37
-rw-r--r--src/coreclr/jit/hwintrinsicarm64.cpp47
-rw-r--r--src/coreclr/jit/hwintrinsiccodegenxarch.cpp7
-rw-r--r--src/coreclr/jit/hwintrinsiclistarm64.h4
-rw-r--r--src/coreclr/jit/hwintrinsiclistxarch.h2
-rw-r--r--src/coreclr/jit/hwintrinsicxarch.cpp47
-rw-r--r--src/coreclr/jit/lowerarmarch.cpp23
-rw-r--r--src/coreclr/jit/lowerxarch.cpp207
-rw-r--r--src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs199
-rw-r--r--src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs13
-rw-r--r--src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs115
11 files changed, 407 insertions, 294 deletions
diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp
index 8535f0e90c5..186fcabe5fb 100644
--- a/src/coreclr/jit/gentree.cpp
+++ b/src/coreclr/jit/gentree.cpp
@@ -17362,32 +17362,48 @@ bool GenTreeIntConCommon::AddrNeedsReloc(Compiler* comp)
// true if node represents a constant; otherwise, false
bool GenTreeVecCon::IsHWIntrinsicCreateConstant(GenTreeHWIntrinsic* node, simd32_t& simd32Val)
{
- var_types simdType = node->TypeGet();
- var_types simdBaseType = node->GetSimdBaseType();
- unsigned simdSize = node->GetSimdSize();
+ NamedIntrinsic intrinsic = node->GetHWIntrinsicId();
+ var_types simdType = node->TypeGet();
+ var_types simdBaseType = node->GetSimdBaseType();
+ unsigned simdSize = node->GetSimdSize();
size_t argCnt = node->GetOperandCount();
size_t cnsArgCnt = 0;
- switch (node->GetHWIntrinsicId())
+ switch (intrinsic)
{
case NI_Vector128_Create:
+ case NI_Vector128_CreateScalar:
case NI_Vector128_CreateScalarUnsafe:
#if defined(TARGET_XARCH)
case NI_Vector256_Create:
+ case NI_Vector256_CreateScalar:
case NI_Vector256_CreateScalarUnsafe:
#elif defined(TARGET_ARM64)
case NI_Vector64_Create:
+ case NI_Vector64_CreateScalar:
case NI_Vector64_CreateScalarUnsafe:
#endif
{
+ // Zero out the simd32Val
+ simd32Val = {};
+
// These intrinsics are meant to set the same value to every element.
if ((argCnt == 1) && HandleArgForHWIntrinsicCreate(node->Op(1), 0, simd32Val, simdBaseType))
{
- // Now assign the rest of the arguments.
- for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
+// CreateScalar leaves the upper bits as zero
+
+#if defined(TARGET_XARCH)
+ if ((intrinsic != NI_Vector128_CreateScalar) && (intrinsic != NI_Vector256_CreateScalar))
+#elif defined(TARGET_ARM64)
+ if ((intrinsic != NI_Vector64_CreateScalar) && (intrinsic != NI_Vector128_CreateScalar))
+#endif
{
- HandleArgForHWIntrinsicCreate(node->Op(1), i, simd32Val, simdBaseType);
+ // Now assign the rest of the arguments.
+ for (unsigned i = 1; i < simdSize / genTypeSize(simdBaseType); i++)
+ {
+ HandleArgForHWIntrinsicCreate(node->Op(1), i, simd32Val, simdBaseType);
+ }
}
cnsArgCnt = 1;
@@ -18974,6 +18990,13 @@ bool GenTree::isContainableHWIntrinsic() const
return true;
}
+ case NI_Vector128_get_Zero:
+ case NI_Vector256_get_Zero:
+ {
+ // These HWIntrinsic operations are contained as part of Sse41.Insert
+ return true;
+ }
+
default:
{
return false;
diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp
index 7b966f0a56d..e4653df3e6d 100644
--- a/src/coreclr/jit/hwintrinsicarm64.cpp
+++ b/src/coreclr/jit/hwintrinsicarm64.cpp
@@ -545,6 +545,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
break;
}
+ case NI_Vector64_CreateScalar:
case NI_Vector64_CreateScalarUnsafe:
{
if (genTypeSize(simdBaseType) == 8)
@@ -556,12 +557,14 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
case NI_Vector64_Create:
case NI_Vector128_Create:
+ case NI_Vector128_CreateScalar:
case NI_Vector128_CreateScalarUnsafe:
{
uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType);
assert((sig->numArgs == 1) || (sig->numArgs == simdLength));
- bool isConstant = true;
+ bool isConstant = true;
+ bool isCreateScalar = (intrinsic == NI_Vector64_CreateScalar) || (intrinsic == NI_Vector128_CreateScalar);
if (varTypeIsFloating(simdBaseType))
{
@@ -620,7 +623,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.u8[simdLength - 1 - index] = cnsVal;
}
- if (sig->numArgs == 1)
+ if (isCreateScalar)
+ {
+ vecCon->gtSimd32Val = {};
+ vecCon->gtSimd32Val.u8[0] = cnsVal;
+ }
+ else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < simdLength - 1; index++)
{
@@ -641,7 +649,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.u16[simdLength - 1 - index] = cnsVal;
}
- if (sig->numArgs == 1)
+ if (isCreateScalar)
+ {
+ vecCon->gtSimd32Val = {};
+ vecCon->gtSimd32Val.u16[0] = cnsVal;
+ }
+ else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
@@ -662,7 +675,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.u32[simdLength - 1 - index] = cnsVal;
}
- if (sig->numArgs == 1)
+ if (isCreateScalar)
+ {
+ vecCon->gtSimd32Val = {};
+ vecCon->gtSimd32Val.u32[0] = cnsVal;
+ }
+ else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
@@ -683,7 +701,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.u64[simdLength - 1 - index] = cnsVal;
}
- if (sig->numArgs == 1)
+ if (isCreateScalar)
+ {
+ vecCon->gtSimd32Val = {};
+ vecCon->gtSimd32Val.u64[0] = cnsVal;
+ }
+ else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
@@ -703,7 +726,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.f32[simdLength - 1 - index] = cnsVal;
}
- if (sig->numArgs == 1)
+ if (isCreateScalar)
+ {
+ vecCon->gtSimd32Val = {};
+ vecCon->gtSimd32Val.f32[0] = cnsVal;
+ }
+ else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
@@ -723,7 +751,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd16Val.f64[simdLength - 1 - index] = cnsVal;
}
- if (sig->numArgs == 1)
+ if (isCreateScalar)
+ {
+ vecCon->gtSimd32Val = {};
+ vecCon->gtSimd32Val.f64[0] = cnsVal;
+ }
+ else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
diff --git a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
index 8bd12f54d8d..b2b08c8d828 100644
--- a/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
+++ b/src/coreclr/jit/hwintrinsiccodegenxarch.cpp
@@ -596,6 +596,13 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins,
regNumber op1Reg = op1->GetRegNum();
+ if ((ins == INS_insertps) && (op1Reg == REG_NA))
+ {
+ // insertps is special and can contain op1 when it is zero
+ assert(op1->isContained() && op1->IsVectorZero());
+ op1Reg = targetReg;
+ }
+
assert(targetReg != REG_NA);
assert(op1Reg != REG_NA);
diff --git a/src/coreclr/jit/hwintrinsiclistarm64.h b/src/coreclr/jit/hwintrinsiclistarm64.h
index 92c1c0848b6..cbe956299d1 100644
--- a/src/coreclr/jit/hwintrinsiclistarm64.h
+++ b/src/coreclr/jit/hwintrinsiclistarm64.h
@@ -39,7 +39,8 @@ HARDWARE_INTRINSIC(Vector64, ConvertToInt64,
HARDWARE_INTRINSIC(Vector64, ConvertToSingle, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, ConvertToUInt32, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector64, ConvertToUInt64, 8, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
-HARDWARE_INTRINSIC(Vector64, Create, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_mov, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64, Create, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64, CreateScalar, 8, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector64, CreateScalarUnsafe, 8, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_invalid, INS_invalid, INS_fmov, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(Vector64, Divide, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector64, Dot, 8, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
@@ -143,6 +144,7 @@ HARDWARE_INTRINSIC(Vector128, ConvertToSingle,
HARDWARE_INTRINSIC(Vector128, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_ins, INS_fmov, INS_fmov}, HW_Category_SIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_SupportsContainment)
HARDWARE_INTRINSIC(Vector128, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h
index 37738201789..cb2610315ec 100644
--- a/src/coreclr/jit/hwintrinsiclistxarch.h
+++ b/src/coreclr/jit/hwintrinsiclistxarch.h
@@ -57,6 +57,7 @@ HARDWARE_INTRINSIC(Vector128, ConvertToSingle,
HARDWARE_INTRINSIC(Vector128, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128, CreateScalar, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
@@ -158,6 +159,7 @@ HARDWARE_INTRINSIC(Vector256, ConvertToSingle,
HARDWARE_INTRINSIC(Vector256, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector256, ConvertToUInt64, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible)
+HARDWARE_INTRINSIC(Vector256, CreateScalar, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, Divide, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector256, Dot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoCodeGen)
diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
index 04bab108a2e..8eea6cb6c1a 100644
--- a/src/coreclr/jit/hwintrinsicxarch.cpp
+++ b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -899,13 +899,16 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
case NI_Vector128_Create:
case NI_Vector256_Create:
+ case NI_Vector128_CreateScalar:
+ case NI_Vector256_CreateScalar:
case NI_Vector128_CreateScalarUnsafe:
case NI_Vector256_CreateScalarUnsafe:
{
uint32_t simdLength = getSIMDVectorLength(simdSize, simdBaseType);
assert((sig->numArgs == 1) || (sig->numArgs == simdLength));
- bool isConstant = true;
+ bool isConstant = true;
+ bool isCreateScalar = (intrinsic == NI_Vector128_CreateScalar) || (intrinsic == NI_Vector256_CreateScalar);
if (varTypeIsFloating(simdBaseType))
{
@@ -964,7 +967,12 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd32Val.u8[simdLength - 1 - index] = cnsVal;
}
- if (sig->numArgs == 1)
+ if (isCreateScalar)
+ {
+ vecCon->gtSimd32Val = {};
+ vecCon->gtSimd32Val.u8[0] = cnsVal;
+ }
+ else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < simdLength - 1; index++)
{
@@ -985,7 +993,12 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd32Val.u16[simdLength - 1 - index] = cnsVal;
}
- if (sig->numArgs == 1)
+ if (isCreateScalar)
+ {
+ vecCon->gtSimd32Val = {};
+ vecCon->gtSimd32Val.u16[0] = cnsVal;
+ }
+ else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
@@ -1006,7 +1019,12 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd32Val.u32[simdLength - 1 - index] = cnsVal;
}
- if (sig->numArgs == 1)
+ if (isCreateScalar)
+ {
+ vecCon->gtSimd32Val = {};
+ vecCon->gtSimd32Val.u32[0] = cnsVal;
+ }
+ else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
@@ -1027,7 +1045,12 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd32Val.u64[simdLength - 1 - index] = cnsVal;
}
- if (sig->numArgs == 1)
+ if (isCreateScalar)
+ {
+ vecCon->gtSimd32Val = {};
+ vecCon->gtSimd32Val.u64[0] = cnsVal;
+ }
+ else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
@@ -1047,7 +1070,12 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd32Val.f32[simdLength - 1 - index] = cnsVal;
}
- if (sig->numArgs == 1)
+ if (isCreateScalar)
+ {
+ vecCon->gtSimd32Val = {};
+ vecCon->gtSimd32Val.f32[0] = cnsVal;
+ }
+ else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
@@ -1067,7 +1095,12 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic,
vecCon->gtSimd32Val.f64[simdLength - 1 - index] = cnsVal;
}
- if (sig->numArgs == 1)
+ if (isCreateScalar)
+ {
+ vecCon->gtSimd32Val = {};
+ vecCon->gtSimd32Val.f64[0] = cnsVal;
+ }
+ else if (sig->numArgs == 1)
{
for (uint32_t index = 0; index < (simdLength - 1); index++)
{
diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp
index 1401de10b47..7f988dabad6 100644
--- a/src/coreclr/jit/lowerarmarch.cpp
+++ b/src/coreclr/jit/lowerarmarch.cpp
@@ -1100,6 +1100,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
{
case NI_Vector64_Create:
case NI_Vector128_Create:
+ case NI_Vector64_CreateScalar:
+ case NI_Vector128_CreateScalar:
{
// We don't directly support the Vector64.Create or Vector128.Create methods in codegen
// and instead lower them to other intrinsic nodes in LowerHWIntrinsicCreate so we expect
@@ -1439,8 +1441,9 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
assert(varTypeIsArithmetic(simdBaseType));
assert(simdSize != 0);
- bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val);
- size_t argCnt = node->GetOperandCount();
+ bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val);
+ bool isCreateScalar = (intrinsicId == NI_Vector64_CreateScalar) || (intrinsicId == NI_Vector128_CreateScalar);
+ size_t argCnt = node->GetOperandCount();
// Check if we have a cast that we can remove. Note that "IsValidConstForMovImm"
// will reset Op(1) if it finds such a cast, so we do not need to handle it here.
@@ -1482,6 +1485,22 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
}
else if (argCnt == 1)
{
+ if (isCreateScalar)
+ {
+ GenTree* op1 = node->Op(1);
+
+ GenTree* tmp = comp->gtNewZeroConNode(simdType);
+ BlockRange().InsertBefore(op1, tmp);
+ LowerNode(tmp);
+
+ GenTree* idx = comp->gtNewIconNode(0);
+ BlockRange().InsertAfter(tmp, idx);
+ LowerNode(idx);
+
+ node->ResetHWIntrinsicId(NI_AdvSimd_Insert, comp, tmp, idx, op1);
+ return LowerNode(node);
+ }
+
// We have the following (where simd is simd8 or simd16):
// /--* op1 T
// node = * HWINTRINSIC simd T Create
diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp
index 44d6781d081..40ccaebec0f 100644
--- a/src/coreclr/jit/lowerxarch.cpp
+++ b/src/coreclr/jit/lowerxarch.cpp
@@ -1023,6 +1023,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_Vector128_Create:
case NI_Vector256_Create:
+ case NI_Vector128_CreateScalar:
+ case NI_Vector256_CreateScalar:
{
// We don't directly support the Vector128.Create or Vector256.Create methods in codegen
// and instead lower them to other intrinsic nodes in LowerHWIntrinsicCreate so we expect
@@ -1705,8 +1707,9 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
GenTree* tmp2 = nullptr;
GenTree* tmp3 = nullptr;
- bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val);
- size_t argCnt = node->GetOperandCount();
+ bool isConstant = GenTreeVecCon::IsHWIntrinsicCreateConstant(node, simd32Val);
+ bool isCreateScalar = (intrinsicId == NI_Vector128_CreateScalar) || (intrinsicId == NI_Vector256_CreateScalar);
+ size_t argCnt = node->GetOperandCount();
if (isConstant)
{
@@ -1745,6 +1748,156 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
}
else if (argCnt == 1)
{
+ if (isCreateScalar)
+ {
+ switch (simdBaseType)
+ {
+ case TYP_BYTE:
+ case TYP_UBYTE:
+ {
+ // Types need to be explicitly zero-extended to ensure upper-bits are zero
+ //
+ // We need to explicitly use TYP_UBYTE since unsigned is ignored for small types
+ // Explicitly handle both BYTE and UBYTE to account for reinterpret casts and the like
+ //
+ // The from type is INT since that is the input type tracked by IR, where-as the target
+ // type needs to be UBYTE so it implicitly zero-extends back to TYP_INT
+
+ tmp1 = comp->gtNewCastNode(TYP_INT, op1, /* unsigned */ true, TYP_UBYTE);
+ BlockRange().InsertAfter(op1, tmp1);
+ LowerNode(tmp1);
+
+ node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128Int32, tmp1);
+ node->SetSimdBaseJitType(CORINFO_TYPE_INT);
+ break;
+ }
+
+ case TYP_SHORT:
+ case TYP_USHORT:
+ {
+ // Types need to be explicitly zero-extended to ensure upper-bits are zero
+ //
+ // We need to explicitly use TYP_USHORT since unsigned is ignored for small types
+ // Explicitly handle both SHORT and USHORT to account for reinterpret casts and the like
+ //
+ // The from type is INT since that is the input type tracked by IR, where-as the target
+ // type needs to be USHORT so it implicitly zero-extends back to TYP_INT
+
+ tmp1 = comp->gtNewCastNode(TYP_INT, op1, /* unsigned */ true, TYP_USHORT);
+ BlockRange().InsertAfter(op1, tmp1);
+ LowerNode(tmp1);
+
+ node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128Int32, tmp1);
+ node->SetSimdBaseJitType(CORINFO_TYPE_INT);
+ break;
+ }
+
+ case TYP_INT:
+ {
+ node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128Int32);
+ break;
+ }
+
+ case TYP_UINT:
+ {
+ node->ChangeHWIntrinsicId(NI_SSE2_ConvertScalarToVector128UInt32);
+ break;
+ }
+
+#if defined(TARGET_AMD64)
+ case TYP_LONG:
+ {
+ node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertScalarToVector128Int64);
+ break;
+ }
+
+ case TYP_ULONG:
+ {
+ node->ChangeHWIntrinsicId(NI_SSE2_X64_ConvertScalarToVector128UInt64);
+ break;
+ }
+#endif // TARGET_AMD64
+
+ case TYP_FLOAT:
+ {
+ tmp1 = comp->gtNewZeroConNode(simdType);
+ BlockRange().InsertBefore(op1, tmp1);
+ LowerNode(tmp1);
+
+ if (comp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
+ {
+ // Sse41.Insert has:
+ // * Bits 0-3: zmask
+ // * Bits 4-5: count_d
+ // * Bits 6-7: count_s (register form only)
+ //
+ // We want zmask 0b1110 (0xE) to zero elements 1/2/3
+ // We want count_d 0b00 (0x0) to insert the value to element 0
+ // We want count_s 0b00 (0x0) as we're just taking element 0 of the source
+
+ idx = comp->gtNewIconNode(0x0E);
+ BlockRange().InsertAfter(op1, idx);
+ LowerNode(idx);
+
+ node->ResetHWIntrinsicId(NI_SSE41_Insert, comp, tmp1, op1, idx);
+ }
+ else
+ {
+ node->ResetHWIntrinsicId(NI_SSE_MoveScalar, comp, tmp1, op1);
+ }
+ break;
+ }
+
+ case TYP_DOUBLE:
+ {
+ tmp1 = comp->gtNewZeroConNode(simdType);
+ BlockRange().InsertBefore(op1, tmp1);
+ LowerNode(tmp1);
+
+ node->ResetHWIntrinsicId(NI_SSE2_MoveScalar, comp, tmp1, op1);
+ break;
+ }
+
+ default:
+ {
+ unreached();
+ }
+ }
+
+ if (simdSize == 32)
+ {
+ // We're creating a Vector256 scalar so we need to treat the original op as Vector128,
+ // we need to unsafely extend up to Vector256 (which is actually safe since the 128-bit
+ // op will zero extend up to 256-bits), and then we need to replace the original use
+ // with the new TYP_SIMD32 node.
+
+ node->ChangeType(TYP_SIMD16);
+ node->SetSimdSize(16);
+ LowerNode(node);
+
+ tmp2 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD32, node, NI_Vector128_ToVector256Unsafe, simdBaseJitType,
+ 16);
+
+ LIR::Use use;
+ bool foundUse = BlockRange().TryGetUse(node, &use);
+ BlockRange().InsertAfter(node, tmp2);
+
+ if (foundUse)
+ {
+ use.ReplaceWith(tmp2);
+ }
+ else
+ {
+ node->ClearUnusedValue();
+ tmp2->SetUnusedValue();
+ }
+
+ node = tmp2->AsHWIntrinsic();
+ }
+
+ return LowerNode(node);
+ }
+
// We have the following (where simd is simd16 or simd32):
// /--* op1 T
// node = * HWINTRINSIC simd T Create
@@ -1822,6 +1975,8 @@ GenTree* Lowering::LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node)
return LowerNode(node);
}
+ assert(intrinsicId == NI_Vector128_Create);
+
// We will be constructing the following parts:
// /--* op1 T
// tmp1 = * HWINTRINSIC simd16 T CreateScalarUnsafe
@@ -6940,7 +7095,6 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_SSSE3_AlignRight:
case NI_SSE41_Blend:
case NI_SSE41_DotProduct:
- case NI_SSE41_Insert:
case NI_SSE41_X64_Insert:
case NI_SSE41_MultipleSumAbsoluteDifferences:
case NI_AVX_Blend:
@@ -6968,6 +7122,53 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}
+ case NI_SSE41_Insert:
+ {
+ GenTree* lastOp = node->Op(numArgs);
+
+ if ((simdBaseType == TYP_FLOAT) && lastOp->IsCnsIntOrI())
+ {
+ // Sse41.Insert has:
+ // * Bits 0-3: zmask
+ // * Bits 4-5: count_d
+ // * Bits 6-7: count_s (register form only)
+ //
+ // Where zmask specifies which elements to zero
+ // Where count_d specifies the destination index the value is being inserted to
+ // Where count_s specifies the source index of the value being inserted
+
+ ssize_t ival = lastOp->AsIntConCommon()->IconValue();
+
+ ssize_t zmask = (ival & 0x0F);
+ ssize_t count_d = (ival & 0x30) >> 4;
+ ssize_t count_s = (ival & 0xC0) >> 6;
+
+ if (op1->IsVectorZero())
+ {
+ // When op1 is zero, we can contain op1 and modify the mask
+ // to zero everything except for the element we're inserting to
+
+ MakeSrcContained(node, op1);
+
+ zmask |= ~(1 << count_d);
+ zmask &= 0x0F;
+
+ ival = (count_s << 6) | (count_d << 4) | (zmask);
+ lastOp->AsIntConCommon()->SetIconValue(ival);
+ }
+ }
+
+ if (TryGetContainableHWIntrinsicOp(node, &op2, &supportsRegOptional))
+ {
+ MakeSrcContained(node, op2);
+ }
+ else if (supportsRegOptional)
+ {
+ op2->SetRegOptional();
+ }
+ break;
+ }
+
default:
{
assert(!"Unhandled containment for ternary hardware intrinsic with immediate operand");
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
index 98d99645709..a1d433731df 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
@@ -1156,6 +1156,7 @@ namespace System.Runtime.Intrinsics
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{T}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
/// <exception cref="NotSupportedException">The type of <paramref name="value" /> (<typeparamref name="T" />) is not supported.</exception>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector128<T> CreateScalar<T>(T value)
where T : struct => Vector64.CreateScalar(value).ToVector128();
@@ -1163,241 +1164,91 @@ namespace System.Runtime.Intrinsics
/// <summary>Creates a new <see cref="Vector128{Byte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{Byte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector128<byte> CreateScalar(byte value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector128<byte>.Zero, 0, value);
- }
- else if (Sse2.IsSupported)
- {
- // ConvertScalarToVector128 only deals with 32/64-bit inputs and we need to ensure all upper-bits are zeroed, so we call
- // the UInt32 overload to ensure zero extension. We can then just treat the result as byte and return.
- return Sse2.ConvertScalarToVector128UInt32(value).AsByte();
- }
- else
- {
- return CreateScalar<byte>(value);
- }
- }
+ public static unsafe Vector128<byte> CreateScalar(byte value) => CreateScalar<byte>(value);
/// <summary>Creates a new <see cref="Vector128{Double}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{Double}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector128<double> CreateScalar(double value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector128<double>.Zero, 0, value);
- }
- else if (Sse2.IsSupported)
- {
- return Sse2.MoveScalar(Vector128<double>.Zero, CreateScalarUnsafe(value));
- }
- else
- {
- return CreateScalar<double>(value);
- }
- }
+ public static unsafe Vector128<double> CreateScalar(double value) => CreateScalar<double>(value);
/// <summary>Creates a new <see cref="Vector128{Int16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{Int16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector128<short> CreateScalar(short value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector128<short>.Zero, 0, value);
- }
- else if (Sse2.IsSupported)
- {
- // ConvertScalarToVector128 only deals with 32/64-bit inputs and we need to ensure all upper-bits are zeroed, so we cast
- // to ushort and call the UInt32 overload to ensure zero extension. We can then just treat the result as short and return.
- return Sse2.ConvertScalarToVector128UInt32((ushort)(value)).AsInt16();
- }
- else
- {
- return CreateScalar<short>(value);
- }
- }
+ public static unsafe Vector128<short> CreateScalar(short value) => CreateScalar<short>(value);
/// <summary>Creates a new <see cref="Vector128{Int32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{Int32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector128<int> CreateScalar(int value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector128<int>.Zero, 0, value);
- }
- else if (Sse2.IsSupported)
- {
- return Sse2.ConvertScalarToVector128Int32(value);
- }
- else
- {
- return CreateScalar<int>(value);
- }
- }
+ public static unsafe Vector128<int> CreateScalar(int value) => CreateScalar<int>(value);
/// <summary>Creates a new <see cref="Vector128{Int64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{Int64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector128<long> CreateScalar(long value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector128<long>.Zero, 0, value);
- }
- else if (Sse2.X64.IsSupported)
- {
- return Sse2.X64.ConvertScalarToVector128Int64(value);
- }
- else
- {
- return CreateScalar<long>(value);
- }
- }
+ public static unsafe Vector128<long> CreateScalar(long value) => CreateScalar<long>(value);
/// <summary>Creates a new <see cref="Vector128{IntPtr}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{IntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector128<nint> CreateScalar(nint value)
- {
-#if TARGET_64BIT
- return CreateScalar((long)(value)).AsNInt();
-#else
- return CreateScalar((int)(value)).AsNInt();
-#endif
- }
+ public static unsafe Vector128<nint> CreateScalar(nint value) => CreateScalar<nint>(value);
/// <summary>Creates a new <see cref="Vector128{UIntPtr}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{UIntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector128<nuint> CreateScalar(nuint value)
- {
-#if TARGET_64BIT
- return CreateScalar((ulong)(value)).AsNUInt();
-#else
- return CreateScalar((uint)(value)).AsNUInt();
-#endif
- }
+ public static unsafe Vector128<nuint> CreateScalar(nuint value) => CreateScalar<nuint>(value);
/// <summary>Creates a new <see cref="Vector128{SByte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{SByte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector128<sbyte> CreateScalar(sbyte value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector128<sbyte>.Zero, 0, value);
- }
- else if (Sse2.IsSupported)
- {
- // ConvertScalarToVector128 only deals with 32/64-bit inputs and we need to ensure all upper-bits are zeroed, so we cast
- // to byte and call the UInt32 overload to ensure zero extension. We can then just treat the result as sbyte and return.
- return Sse2.ConvertScalarToVector128UInt32((byte)(value)).AsSByte();
- }
- else
- {
- return CreateScalar<sbyte>(value);
- }
- }
+ public static unsafe Vector128<sbyte> CreateScalar(sbyte value) => CreateScalar<sbyte>(value);
/// <summary>Creates a new <see cref="Vector128{Single}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{Single}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector128<float> CreateScalar(float value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector128<float>.Zero, 0, value);
- }
- else if (Sse.IsSupported)
- {
- return Sse.MoveScalar(Vector128<float>.Zero, CreateScalarUnsafe(value));
- }
- else
- {
- return CreateScalar<float>(value);
- }
- }
+ public static unsafe Vector128<float> CreateScalar(float value) => CreateScalar<float>(value);
/// <summary>Creates a new <see cref="Vector128{UInt16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{UInt16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector128<ushort> CreateScalar(ushort value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector128<ushort>.Zero, 0, value);
- }
- else if (Sse2.IsSupported)
- {
- // ConvertScalarToVector128 only deals with 32/64-bit inputs and we need to ensure all upper-bits are zeroed, so we call
- // the UInt32 overload to ensure zero extension. We can then just treat the result as ushort and return.
- return Sse2.ConvertScalarToVector128UInt32(value).AsUInt16();
- }
- else
- {
- return CreateScalar<ushort>(value);
- }
- }
+ public static unsafe Vector128<ushort> CreateScalar(ushort value) => CreateScalar<ushort>(value);
/// <summary>Creates a new <see cref="Vector128{UInt32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{UInt32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector128<uint> CreateScalar(uint value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector128<uint>.Zero, 0, value);
- }
- else if (Sse2.IsSupported)
- {
- return Sse2.ConvertScalarToVector128UInt32(value);
- }
- else
- {
- return CreateScalar<uint>(value);
- }
- }
+ public static unsafe Vector128<uint> CreateScalar(uint value) => CreateScalar<uint>(value);
/// <summary>Creates a new <see cref="Vector128{UInt64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector128{UInt64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector128<ulong> CreateScalar(ulong value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector128<ulong>.Zero, 0, value);
- }
- else if (Sse2.X64.IsSupported)
- {
- return Sse2.X64.ConvertScalarToVector128UInt64(value);
- }
- else
- {
- return CreateScalar<ulong>(value);
- }
- }
+ public static unsafe Vector128<ulong> CreateScalar(ulong value) => CreateScalar<ulong>(value);
/// <summary>Creates a new <see cref="Vector128{T}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
/// <typeparam name="T">The type of the elements in the vector.</typeparam>
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
index 8a6f11b1015..552236ddc4e 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
@@ -1151,6 +1151,7 @@ namespace System.Runtime.Intrinsics
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{T}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
/// <exception cref="NotSupportedException">The type of <paramref name="value" /> (<typeparamref name="T" />) is not supported.</exception>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<T> CreateScalar<T>(T value)
where T : struct => Vector128.CreateScalar(value).ToVector256();
@@ -1158,42 +1159,49 @@ namespace System.Runtime.Intrinsics
/// <summary>Creates a new <see cref="Vector256{Byte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Byte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<byte> CreateScalar(byte value) => CreateScalar<byte>(value);
/// <summary>Creates a new <see cref="Vector256{Double}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Double}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<double> CreateScalar(double value) => CreateScalar<double>(value);
/// <summary>Creates a new <see cref="Vector256{Int16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Int16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<short> CreateScalar(short value) => CreateScalar<short>(value);
/// <summary>Creates a new <see cref="Vector256{Int32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Int32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<int> CreateScalar(int value) => CreateScalar<int>(value);
/// <summary>Creates a new <see cref="Vector256{Int64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Int64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<long> CreateScalar(long value) => CreateScalar<long>(value);
/// <summary>Creates a new <see cref="Vector256{IntPtr}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{IntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<nint> CreateScalar(nint value) => CreateScalar<nint>(value);
/// <summary>Creates a new <see cref="Vector256{UIntPtr}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{UIntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<nuint> CreateScalar(nuint value) => CreateScalar<nuint>(value);
@@ -1201,6 +1209,7 @@ namespace System.Runtime.Intrinsics
/// <summary>Creates a new <see cref="Vector256{SByte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{SByte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<sbyte> CreateScalar(sbyte value) => CreateScalar<sbyte>(value);
@@ -1208,12 +1217,14 @@ namespace System.Runtime.Intrinsics
/// <summary>Creates a new <see cref="Vector256{Single}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{Single}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<float> CreateScalar(float value) => CreateScalar<float>(value);
/// <summary>Creates a new <see cref="Vector256{UInt16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{UInt16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<ushort> CreateScalar(ushort value) => CreateScalar<ushort>(value);
@@ -1221,6 +1232,7 @@ namespace System.Runtime.Intrinsics
/// <summary>Creates a new <see cref="Vector256{UInt32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{UInt32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<uint> CreateScalar(uint value) => CreateScalar<uint>(value);
@@ -1228,6 +1240,7 @@ namespace System.Runtime.Intrinsics
/// <summary>Creates a new <see cref="Vector256{UInt64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector256{UInt64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static Vector256<ulong> CreateScalar(ulong value) => CreateScalar<ulong>(value);
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
index dad208aeb29..6a897cd9036 100644
--- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
+++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
@@ -859,6 +859,7 @@ namespace System.Runtime.Intrinsics
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector64{Byte}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
/// <exception cref="NotSupportedException">The type of <paramref name="value" /> (<typeparamref name="T" />) is not supported.</exception>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector64<T> CreateScalar<T>(T value)
where T : struct
@@ -871,160 +872,88 @@ namespace System.Runtime.Intrinsics
/// <summary>Creates a new <see cref="Vector64{Byte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector64{Byte}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector64<byte> CreateScalar(byte value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector64<byte>.Zero, 0, value);
- }
- else
- {
- return CreateScalar<byte>(value);
- }
- }
+ public static unsafe Vector64<byte> CreateScalar(byte value) => CreateScalar<byte>(value);
/// <summary>Creates a new <see cref="Vector64{Double}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector64{Double}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector64<double> CreateScalar(double value) => CreateScalar<double>(value);
/// <summary>Creates a new <see cref="Vector64{Int16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector64{Int16}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector64<short> CreateScalar(short value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector64<short>.Zero, 0, value);
- }
- else
- {
- return CreateScalar<short>(value);
- }
- }
+ public static unsafe Vector64<short> CreateScalar(short value) => CreateScalar<short>(value);
/// <summary>Creates a new <see cref="Vector64{Int32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector64{Int32}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector64<int> CreateScalar(int value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector64<int>.Zero, 0, value);
- }
- else
- {
- return CreateScalar<int>(value);
- }
- }
+ public static unsafe Vector64<int> CreateScalar(int value) => CreateScalar<int>(value);
/// <summary>Creates a new <see cref="Vector64{Int64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector64{Int64}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector64<long> CreateScalar(long value) => CreateScalar<long>(value);
/// <summary>Creates a new <see cref="Vector64{IntPtr}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector64{IntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector64<nint> CreateScalar(nint value)
- {
-#if TARGET_64BIT
- return CreateScalar((long)(value)).AsNInt();
-#else
- return CreateScalar((int)(value)).AsNInt();
-#endif
- }
+ public static unsafe Vector64<nint> CreateScalar(nint value) => CreateScalar<nint>(value);
/// <summary>Creates a new <see cref="Vector64{UIntPtr}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector64{UIntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector64<nuint> CreateScalar(nuint value)
- {
-#if TARGET_64BIT
- return CreateScalar((ulong)(value)).AsNUInt();
-#else
- return CreateScalar((uint)(value)).AsNUInt();
-#endif
- }
+ public static unsafe Vector64<nuint> CreateScalar(nuint value) => CreateScalar<nuint>(value);
/// <summary>Creates a new <see cref="Vector64{SByte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector64{SByte}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector64<sbyte> CreateScalar(sbyte value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector64<sbyte>.Zero, 0, value);
- }
- else
- {
- return CreateScalar<sbyte>(value);
- }
- }
+ public static unsafe Vector64<sbyte> CreateScalar(sbyte value) => CreateScalar<sbyte>(value);
/// <summary>Creates a new <see cref="Vector64{Single}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector64{Single}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector64<float> CreateScalar(float value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector64<float>.Zero, 0, value);
- }
- else
- {
- return CreateScalar<float>(value);
- }
- }
+ public static unsafe Vector64<float> CreateScalar(float value) => CreateScalar<float>(value);
/// <summary>Creates a new <see cref="Vector64{UInt16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector64{UInt16}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector64<ushort> CreateScalar(ushort value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector64<ushort>.Zero, 0, value);
- }
- else
- {
- return CreateScalar<ushort>(value);
- }
- }
+ public static unsafe Vector64<ushort> CreateScalar(ushort value) => CreateScalar<ushort>(value);
/// <summary>Creates a new <see cref="Vector64{UInt32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector64{UInt32}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
- public static unsafe Vector64<uint> CreateScalar(uint value)
- {
- if (AdvSimd.IsSupported)
- {
- return AdvSimd.Insert(Vector64<uint>.Zero, 0, value);
- }
- else
- {
- return CreateScalar<uint>(value);
- }
- }
+ public static unsafe Vector64<uint> CreateScalar(uint value) => CreateScalar<uint>(value);
/// <summary>Creates a new <see cref="Vector64{UInt64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
/// <param name="value">The value that element 0 will be initialized to.</param>
/// <returns>A new <see cref="Vector64{UInt64}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
+ [Intrinsic]
[CLSCompliant(false)]
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static unsafe Vector64<ulong> CreateScalar(ulong value) => CreateScalar<ulong>(value);