diff options
Diffstat (limited to 'src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse.cs')
-rw-r--r-- | src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse.cs | 631 |
1 files changed, 631 insertions, 0 deletions
diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse.cs new file mode 100644 index 000000000..3b01aee70 --- /dev/null +++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/X86/Sse.cs @@ -0,0 +1,631 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; + +namespace System.Runtime.Intrinsics.X86 +{ + /// <summary> + /// This class provides access to Intel SSE hardware instructions via intrinsics + /// </summary> + [CLSCompliant(false)] + public abstract class Sse + { + internal Sse() { } + + public static bool IsSupported { get => IsSupported; } + + /// <summary> + /// __m128 _mm_add_ps (__m128 a, __m128 b) + /// ADDPS xmm, xmm/m128 + /// </summary> + public static Vector128<float> Add(Vector128<float> left, Vector128<float> right) => Add(left, right); + + /// <summary> + /// __m128 _mm_add_ss (__m128 a, __m128 b) + /// ADDSS xmm, xmm/m32 + /// </summary> + public static Vector128<float> AddScalar(Vector128<float> left, Vector128<float> right) => AddScalar(left, right); + + /// <summary> + /// __m128 _mm_and_ps (__m128 a, __m128 b) + /// ANDPS xmm, xmm/m128 + /// </summary> + public static Vector128<float> And(Vector128<float> left, Vector128<float> right) => And(left, right); + + /// <summary> + /// __m128 _mm_andnot_ps (__m128 a, __m128 b) + /// ANDNPS xmm, xmm/m128 + /// </summary> + public static Vector128<float> AndNot(Vector128<float> left, Vector128<float> right) => AndNot(left, right); + + /// <summary> + /// __m128 _mm_cmpeq_ps (__m128 a, __m128 b) + /// CMPPS xmm, xmm/m128, imm8(0) + /// </summary> + public static Vector128<float> CompareEqual(Vector128<float> left, Vector128<float> right) => CompareEqual(left, right); + + /// <summary> + /// int _mm_comieq_ss (__m128 a, __m128 b) + /// COMISS xmm, xmm/m32 + /// </summary> + public static bool CompareEqualOrderedScalar(Vector128<float> left, Vector128<float> right) => CompareEqualOrderedScalar(left, right); + + /// <summary> + /// int _mm_ucomieq_ss (__m128 a, __m128 b) + /// UCOMISS xmm, xmm/m32 + /// </summary> + public static bool CompareEqualUnorderedScalar(Vector128<float> left, Vector128<float> right) => CompareEqualUnorderedScalar(left, right); + + /// <summary> + /// __m128 _mm_cmpeq_ss (__m128 a, __m128 b) + /// CMPSS xmm, xmm/m32, imm8(0) + /// </summary> + public static Vector128<float> CompareEqualScalar(Vector128<float> left, Vector128<float> right) => CompareEqualScalar(left, right); + + /// <summary> + /// __m128 _mm_cmpgt_ps (__m128 a, __m128 b) + /// CMPPS xmm, xmm/m128, imm8(6) + /// </summary> + public static Vector128<float> CompareGreaterThan(Vector128<float> left, Vector128<float> right) => CompareGreaterThan(left, right); + + /// <summary> + /// int _mm_comigt_ss (__m128 a, __m128 b) + /// COMISS xmm, xmm/m32 + /// </summary> + public static bool CompareGreaterThanOrderedScalar(Vector128<float> left, Vector128<float> right) => CompareGreaterThanOrderedScalar(left, right); + + /// <summary> + /// int _mm_ucomigt_ss (__m128 a, __m128 b) + /// UCOMISS xmm, xmm/m32 + /// </summary> + public static bool CompareGreaterThanUnorderedScalar(Vector128<float> left, Vector128<float> right) => CompareGreaterThanUnorderedScalar(left, right); + + /// <summary> + /// __m128 _mm_cmpgt_ss (__m128 a, __m128 b) + /// CMPSS xmm, xmm/m32, imm8(6) + /// </summary> + public static Vector128<float> CompareGreaterThanScalar(Vector128<float> left, Vector128<float> right) => CompareGreaterThanScalar(left, right); + + /// <summary> + /// __m128 _mm_cmpge_ps (__m128 a, __m128 b) + /// CMPPS xmm, xmm/m128, imm8(5) + /// </summary> + public static Vector128<float> CompareGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareGreaterThanOrEqual(left, right); + + /// <summary> + /// int _mm_comige_ss (__m128 a, __m128 b) + /// COMISS xmm, xmm/m32 + /// </summary> + public static bool CompareGreaterThanOrEqualOrderedScalar(Vector128<float> left, Vector128<float> right) => CompareGreaterThanOrEqualOrderedScalar(left, right); + + /// <summary> + /// int _mm_ucomige_ss (__m128 a, __m128 b) + /// UCOMISS xmm, xmm/m32 + /// </summary> + public static bool CompareGreaterThanOrEqualUnorderedScalar(Vector128<float> left, Vector128<float> right) => CompareGreaterThanOrEqualUnorderedScalar(left, right); + + /// <summary> + /// __m128 _mm_cmpge_ss (__m128 a, __m128 b) + /// CMPPS xmm, xmm/m32, imm8(5) + /// </summary> + public static Vector128<float> CompareGreaterThanOrEqualScalar(Vector128<float> left, Vector128<float> right) => CompareGreaterThanOrEqualScalar(left, right); + + /// <summary> + /// __m128 _mm_cmplt_ps (__m128 a, __m128 b) + /// CMPPS xmm, xmm/m128, imm8(1) + /// </summary> + public static Vector128<float> CompareLessThan(Vector128<float> left, Vector128<float> right) => CompareLessThan(left, right); + + /// <summary> + /// int _mm_comilt_ss (__m128 a, __m128 b) + /// COMISS xmm, xmm/m32 + /// </summary> + public static bool CompareLessThanOrderedScalar(Vector128<float> left, Vector128<float> right) => CompareLessThanOrderedScalar(left, right); + + /// <summary> + /// int _mm_ucomilt_ss (__m128 a, __m128 b) + /// UCOMISS xmm, xmm/m32 + /// </summary> + public static bool CompareLessThanUnorderedScalar(Vector128<float> left, Vector128<float> right) => CompareLessThanUnorderedScalar(left, right); + + /// <summary> + /// __m128 _mm_cmplt_ss (__m128 a, __m128 b) + /// CMPSS xmm, xmm/m32, imm8(1) + /// </summary> + public static Vector128<float> CompareLessThanScalar(Vector128<float> left, Vector128<float> right) => CompareLessThanScalar(left, right); + + /// <summary> + /// __m128 _mm_cmple_ps (__m128 a, __m128 b) + /// CMPPS xmm, xmm/m128, imm8(2) + /// </summary> + public static Vector128<float> CompareLessThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareLessThanOrEqual(left, right); + + /// <summary> + /// int _mm_comile_ss (__m128 a, __m128 b) + /// COMISS xmm, xmm/m32 + /// </summary> + public static bool CompareLessThanOrEqualOrderedScalar(Vector128<float> left, Vector128<float> right) => CompareLessThanOrEqualOrderedScalar(left, right); + + /// <summary> + /// int _mm_ucomile_ss (__m128 a, __m128 b) + /// UCOMISS xmm, xmm/m32 + /// </summary> + public static bool CompareLessThanOrEqualUnorderedScalar(Vector128<float> left, Vector128<float> right) => CompareLessThanOrEqualUnorderedScalar(left, right); + + /// <summary> + /// __m128 _mm_cmple_ss (__m128 a, __m128 b) + /// CMPSS xmm, xmm/m32, imm8(2) + /// </summary> + public static Vector128<float> CompareLessThanOrEqualScalar(Vector128<float> left, Vector128<float> right) => CompareLessThanOrEqualScalar(left, right); + + /// <summary> + /// __m128 _mm_cmpneq_ps (__m128 a, __m128 b) + /// CMPPS xmm, xmm/m128, imm8(4) + /// </summary> + public static Vector128<float> CompareNotEqual(Vector128<float> left, Vector128<float> right) => CompareNotEqual(left, right); + + /// <summary> + /// int _mm_comineq_ss (__m128 a, __m128 b) + /// COMISS xmm, xmm/m32 + /// </summary> + public static bool CompareNotEqualOrderedScalar(Vector128<float> left, Vector128<float> right) => CompareNotEqualOrderedScalar(left, right); + + /// <summary> + /// int _mm_ucomineq_ss (__m128 a, __m128 b) + /// UCOMISS xmm, xmm/m32 + /// </summary> + public static bool CompareNotEqualUnorderedScalar(Vector128<float> left, Vector128<float> right) => CompareNotEqualUnorderedScalar(left, right); + + /// <summary> + /// __m128 _mm_cmpneq_ss (__m128 a, __m128 b) + /// CMPSS xmm, xmm/m32, imm8(4) + /// </summary> + public static Vector128<float> CompareNotEqualScalar(Vector128<float> left, Vector128<float> right) => CompareNotEqualScalar(left, right); + + /// <summary> + /// __m128 _mm_cmpngt_ps (__m128 a, __m128 b) + /// CMPPS xmm, xmm/m128, imm8(2) + /// </summary> + public static Vector128<float> CompareNotGreaterThan(Vector128<float> left, Vector128<float> right) => CompareNotGreaterThan(left, right); + + /// <summary> + /// __m128 _mm_cmpngt_ss (__m128 a, __m128 b) + /// CMPSS xmm, xmm/m32, imm8(2) + /// </summary> + public static Vector128<float> CompareNotGreaterThanScalar(Vector128<float> left, Vector128<float> right) => CompareNotGreaterThanScalar(left, right); + + /// <summary> + /// __m128 _mm_cmpnge_ps (__m128 a, __m128 b) + /// CMPPS xmm, xmm/m128, imm8(1) + /// </summary> + public static Vector128<float> CompareNotGreaterThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareNotGreaterThanOrEqual(left, right); + + /// <summary> + /// __m128 _mm_cmpnge_ss (__m128 a, __m128 b) + /// CMPSS xmm, xmm/m32, imm8(1) + /// </summary> + public static Vector128<float> CompareNotGreaterThanOrEqualScalar(Vector128<float> left, Vector128<float> right) => CompareNotGreaterThanOrEqualScalar(left, right); + + /// <summary> + /// __m128 _mm_cmpnlt_ps (__m128 a, __m128 b) + /// CMPPS xmm, xmm/m128, imm8(5) + /// </summary> + public static Vector128<float> CompareNotLessThan(Vector128<float> left, Vector128<float> right) => CompareNotLessThan(left, right); + + /// <summary> + /// __m128 _mm_cmpnlt_ss (__m128 a, __m128 b) + /// CMPSS xmm, xmm/m32, imm8(5) + /// </summary> + public static Vector128<float> CompareNotLessThanScalar(Vector128<float> left, Vector128<float> right) => CompareNotLessThanScalar(left, right); + + /// <summary> + /// __m128 _mm_cmpnle_ps (__m128 a, __m128 b) + /// CMPPS xmm, xmm/m128, imm8(6) + /// </summary> + public static Vector128<float> CompareNotLessThanOrEqual(Vector128<float> left, Vector128<float> right) => CompareNotLessThanOrEqual(left, right); + + /// <summary> + /// __m128 _mm_cmpnle_ss (__m128 a, __m128 b) + /// CMPSS xmm, xmm/m32, imm8(6) + /// </summary> + public static Vector128<float> CompareNotLessThanOrEqualScalar(Vector128<float> left, Vector128<float> right) => CompareNotLessThanOrEqualScalar(left, right); + + /// <summary> + /// __m128 _mm_cmpord_ps (__m128 a, __m128 b) + /// CMPPS xmm, xmm/m128, imm8(7) + /// </summary> + public static Vector128<float> CompareOrdered(Vector128<float> left, Vector128<float> right) => CompareOrdered(left, right); + + /// <summary> + /// __m128 _mm_cmpord_ss (__m128 a, __m128 b) + /// CMPSS xmm, xmm/m32, imm8(7) + /// </summary> + public static Vector128<float> CompareOrderedScalar(Vector128<float> left, Vector128<float> right) => CompareOrderedScalar(left, right); + + /// <summary> + /// __m128 _mm_cmpunord_ps (__m128 a, __m128 b) + /// CMPPS xmm, xmm/m128, imm8(3) + /// </summary> + public static Vector128<float> CompareUnordered(Vector128<float> left, Vector128<float> right) => CompareUnordered(left, right); + + /// <summary> + /// __m128 _mm_cmpunord_ss (__m128 a, __m128 b) + /// CMPSS xmm, xmm/m32, imm8(3) + /// </summary> + public static Vector128<float> CompareUnorderedScalar(Vector128<float> left, Vector128<float> right) => CompareUnorderedScalar(left, right); + + /// <summary> + /// int _mm_cvtss_si32 (__m128 a) + /// CVTSS2SI r32, xmm/m32 + /// </summary> + public static int ConvertToInt32(Vector128<float> value) => ConvertToInt32(value); + /// <summary> + /// __int64 _mm_cvtss_si64 (__m128 a) + /// CVTSS2SI r64, xmm/m32 + /// </summary> + public static long ConvertToInt64(Vector128<float> value) => ConvertToInt64(value); + + /// <summary> + /// float _mm_cvtss_f32 (__m128 a) + /// HELPER: MOVSS + /// </summary> + public static float ConvertToSingle(Vector128<float> value) => ConvertToSingle(value); + + /// <summary> + /// __m128 _mm_cvtsi32_ss (__m128 a, int b) + /// CVTSI2SS xmm, reg/m32 + /// </summary> + public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, int value) => ConvertScalarToVector128Single(upper, value); + /// <summary> + /// __m128 _mm_cvtsi64_ss (__m128 a, __int64 b) + /// CVTSI2SS xmm, reg/m64 + /// </summary> + public static Vector128<float> ConvertScalarToVector128Single(Vector128<float> upper, long value) => ConvertScalarToVector128Single(upper, value); + + /// <summary> + /// int _mm_cvttss_si32 (__m128 a) + /// CVTTSS2SI r32, xmm/m32 + /// </summary> + public static int ConvertToInt32WithTruncation(Vector128<float> value) => ConvertToInt32WithTruncation(value); + /// <summary> + /// __int64 _mm_cvttss_si64 (__m128 a) + /// CVTTSS2SI r64, xmm/m32 + /// </summary> + public static long ConvertToInt64WithTruncation(Vector128<float> value) => ConvertToInt64WithTruncation(value); + + /// <summary> + /// __m128 _mm_div_ps (__m128 a, __m128 b) + /// DIVPS xmm, xmm/m128 + /// </summary> + public static Vector128<float> Divide(Vector128<float> left, Vector128<float> right) => Divide(left, right); + + /// <summary> + /// __m128 _mm_div_ss (__m128 a, __m128 b) + /// DIVSS xmm, xmm/m32 + /// </summary> + public static Vector128<float> DivideScalar(Vector128<float> left, Vector128<float> right) => DivideScalar(left, right); + + /// <summary> + /// __m128 _mm_loadu_ps (float const* mem_address) + /// MOVUPS xmm, m128 + /// </summary> + public static unsafe Vector128<float> LoadVector128(float* address) => LoadVector128(address); + + /// <summary> + /// __m128 _mm_load_ss (float const* mem_address) + /// MOVSS xmm, m32 + /// </summary> + public static unsafe Vector128<float> LoadScalarVector128(float* address) => LoadScalarVector128(address); + + /// <summary> + /// __m128 _mm_load_ps (float const* mem_address) + /// MOVAPS xmm, m128 + /// </summary> + public static unsafe Vector128<float> LoadAlignedVector128(float* address) => LoadAlignedVector128(address); + + /// <summary> + /// __m128 _mm_loadh_pi (__m128 a, __m64 const* mem_addr) + /// MOVHPS xmm, m64 + /// </summary> + public static unsafe Vector128<float> LoadHigh(Vector128<float> lower, float* address) => LoadHigh(lower, address); + + /// <summary> + /// __m128 _mm_loadl_pi (__m128 a, __m64 const* mem_addr) + /// MOVLPS xmm, m64 + /// </summary> + public static unsafe Vector128<float> LoadLow(Vector128<float> upper, float* address) => LoadLow(upper, address); + + /// <summary> + /// __m128 _mm_max_ps (__m128 a, __m128 b) + /// MAXPS xmm, xmm/m128 + /// </summary> + public static Vector128<float> Max(Vector128<float> left, Vector128<float> right) => Max(left, right); + + /// <summary> + /// __m128 _mm_max_ss (__m128 a, __m128 b) + /// MAXSS xmm, xmm/m32 + /// </summary> + public static Vector128<float> MaxScalar(Vector128<float> left, Vector128<float> right) => MaxScalar(left, right); + + /// <summary> + /// __m128 _mm_min_ps (__m128 a, __m128 b) + /// MINPS xmm, xmm/m128 + /// </summary> + public static Vector128<float> Min(Vector128<float> left, Vector128<float> right) => Min(left, right); + + /// <summary> + /// __m128 _mm_min_ss (__m128 a, __m128 b) + /// MINSS xmm, xmm/m32 + /// </summary> + public static Vector128<float> MinScalar(Vector128<float> left, Vector128<float> right) => MinScalar(left, right); + + /// <summary> + /// __m128 _mm_move_ss (__m128 a, __m128 b) + /// MOVSS xmm, xmm + /// </summary> + public static Vector128<float> MoveScalar(Vector128<float> upper, Vector128<float> value) => MoveScalar(upper, value); + + /// <summary> + /// __m128 _mm_movehl_ps (__m128 a, __m128 b) + /// MOVHLPS xmm, xmm + /// </summary> + public static Vector128<float> MoveHighToLow(Vector128<float> left, Vector128<float> right) => MoveHighToLow(left, right); + + /// <summary> + /// __m128 _mm_movelh_ps (__m128 a, __m128 b) + /// MOVLHPS xmm, xmm + /// </summary> + public static Vector128<float> MoveLowToHigh(Vector128<float> left, Vector128<float> right) => MoveLowToHigh(left, right); + + /// <summary> + /// int _mm_movemask_ps (__m128 a) + /// MOVMSKPS reg, xmm + /// </summary> + public static int MoveMask(Vector128<float> value) => MoveMask(value); + + /// <summary> + /// __m128 _mm_mul_ps (__m128 a, __m128 b) + /// MULPS xmm, xmm/m128 + /// </summary> + public static Vector128<float> Multiply(Vector128<float> left, Vector128<float> right) => Multiply(left, right); + + /// <summary> + /// __m128 _mm_mul_ss (__m128 a, __m128 b) + /// MULPS xmm, xmm/m32 + /// </summary> + public static Vector128<float> MultiplyScalar(Vector128<float> left, Vector128<float> right) => MultiplyScalar(left, right); + + /// <summary> + /// __m128 _mm_or_ps (__m128 a, __m128 b) + /// ORPS xmm, xmm/m128 + /// </summary> + public static Vector128<float> Or(Vector128<float> left, Vector128<float> right) => Or(left, right); + + /// <summary> + /// void _mm_prefetch(char* p, int i) + /// PREFETCHT0 m8 + /// </summary> + public static unsafe void Prefetch0(void* address) => Prefetch0(address); + + /// <summary> + /// void _mm_prefetch(char* p, int i) + /// PREFETCHT1 m8 + /// </summary> + public static unsafe void Prefetch1(void* address) => Prefetch1(address); + + /// <summary> + /// void _mm_prefetch(char* p, int i) + /// PREFETCHT2 m8 + /// </summary> + public static unsafe void Prefetch2(void* address) => Prefetch2(address); + + /// <summary> + /// void _mm_prefetch(char* p, int i) + /// PREFETCHNTA m8 + /// </summary> + public static unsafe void PrefetchNonTemporal(void* address) => PrefetchNonTemporal(address); + + /// <summary> + /// __m128 _mm_rcp_ps (__m128 a) + /// RCPPS xmm, xmm/m128 + /// </summary> + public static Vector128<float> Reciprocal(Vector128<float> value) => Reciprocal(value); + + /// <summary> + /// __m128 _mm_rcp_ss (__m128 a) + /// RCPSS xmm, xmm/m32 + /// </summary> + public static Vector128<float> ReciprocalScalar(Vector128<float> value) => ReciprocalScalar(value); + + /// <summary> + /// __m128 _mm_rcp_ss (__m128 a, __m128 b) + /// RCPSS xmm, xmm/m32 + /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs. + /// </summary> + public static Vector128<float> ReciprocalScalar(Vector128<float> upper, Vector128<float> value) => ReciprocalScalar(upper, value); + + /// <summary> + /// __m128 _mm_rsqrt_ps (__m128 a) + /// RSQRTPS xmm, xmm/m128 + /// </summary> + public static Vector128<float> ReciprocalSqrt(Vector128<float> value) => ReciprocalSqrt(value); + + /// <summary> + /// __m128 _mm_rsqrt_ss (__m128 a) + /// RSQRTSS xmm, xmm/m32 + /// </summary> + public static Vector128<float> ReciprocalSqrtScalar(Vector128<float> value) => ReciprocalSqrtScalar(value); + + /// <summary> + /// __m128 _mm_rsqrt_ss (__m128 a, __m128 b) + /// RSQRTSS xmm, xmm/m32 + /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs. + /// </summary> + public static Vector128<float> ReciprocalSqrtScalar(Vector128<float> upper, Vector128<float> value) => ReciprocalSqrtScalar(upper, value); + + /// <summary> + /// __m128 _mm_set1_ps (float a) + /// HELPER + /// </summary> + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static Vector128<float> SetAllVector128(float value) + { + // Zero vector and load value et index 0 + Vector128<float> vector = SetScalarVector128(value); + // Create { vl vl vl vl } and return result + return Shuffle(vector, vector, 0); + } + + /// <summary> + /// __m128 _mm_set_ss (float a) + /// HELPER + /// </summary> + public static Vector128<float> SetScalarVector128(float value) => SetScalarVector128(value); + + /// <summary> + /// __m128 _mm_set_ps (float e3, float e2, float e1, float e0) + /// </summary> + public static unsafe Vector128<float> SetVector128(float e3, float e2, float e1, float e0) + { + // TODO-CQ Optimize algorithm choice based on benchmarks + + // Zero vector and load e2 et index 0 + Vector128<float> e2Vector = SetScalarVector128(e2); + Vector128<float> e1Vector = SetScalarVector128(e1); + Vector128<float> e0Vector = SetScalarVector128(e0); + // Create { -- -- e2 e0 } + e0Vector = UnpackLow(e0Vector, e2Vector); + e2Vector = SetScalarVector128(e3); + // Create { -- -- e3 e1 } + e1Vector = UnpackLow(e1Vector, e2Vector); + // Create { e3 e2 e1 e0 } and return result + return UnpackLow(e0Vector, e1Vector); + } + + /// <summary> + /// __m128d _mm_setzero_ps (void) + /// HELPER - XORPS + /// </summary> + public static Vector128<float> SetZeroVector128() => SetZeroVector128(); + + /// <summary> + /// __m128 _mm_castpd_ps (__m128d a) + /// HELPER - No Codegen + /// __m128i _mm_castpd_si128 (__m128d a) + /// HELPER - No Codegen + /// __m128d _mm_castps_pd (__m128 a) + /// HELPER - No Codegen + /// __m128i _mm_castps_si128 (__m128 a) + /// HELPER - No Codegen + /// __m128d _mm_castsi128_pd (__m128i a) + /// HELPER - No Codegen + /// __m128 _mm_castsi128_ps (__m128i a) + /// HELPER - No Codegen + /// </summary> + public static Vector128<U> StaticCast<T, U>(Vector128<T> value) where T : struct where U : struct + { + return StaticCast<T, U>(value); + } + + /// <summary> + /// __m128 _mm_shuffle_ps (__m128 a, __m128 b, unsigned int control) + /// SHUFPS xmm, xmm/m128, imm8 + /// </summary> + public static Vector128<float> Shuffle(Vector128<float> left, Vector128<float> right, byte control) => Shuffle(left, right, control); + + /// <summary> + /// __m128 _mm_sqrt_ps (__m128 a) + /// SQRTPS xmm, xmm/m128 + /// </summary> + public static Vector128<float> Sqrt(Vector128<float> value) => Sqrt(value); + + /// <summary> + /// __m128 _mm_sqrt_ss (__m128 a) + /// SQRTSS xmm, xmm/m32 + /// </summary> + public static Vector128<float> SqrtScalar(Vector128<float> value) => SqrtScalar(value); + + /// <summary> + /// __m128 _mm_sqrt_ss (__m128 a, __m128 b) + /// SQRTSS xmm, xmm/m32 + /// The above native signature does not exist. We provide this additional overload for consistency with the other scalar APIs. + /// </summary> + public static Vector128<float> SqrtScalar(Vector128<float> upper, Vector128<float> value) => SqrtScalar(upper, value); + + /// <summary> + /// void _mm_store_ps (float* mem_addr, __m128 a) + /// MOVAPS m128, xmm + /// </summary> + public static unsafe void StoreAligned(float* address, Vector128<float> source) => StoreAligned(address, source); + + /// <summary> + /// void _mm_stream_ps (float* mem_addr, __m128 a) + /// MOVNTPS m128, xmm + /// </summary> + public static unsafe void StoreAlignedNonTemporal(float* address, Vector128<float> source) => StoreAlignedNonTemporal(address, source); + + /// <summary> + /// void _mm_storeu_ps (float* mem_addr, __m128 a) + /// MOVUPS m128, xmm + /// </summary> + public static unsafe void Store(float* address, Vector128<float> source) => Store(address, source); + + /// <summary> + /// void _mm_sfence(void) + /// SFENCE + /// </summary> + public static void StoreFence() => StoreFence(); + + /// <summary> + /// void _mm_store_ss (float* mem_addr, __m128 a) + /// MOVSS m32, xmm + /// </summary> + public static unsafe void StoreScalar(float* address, Vector128<float> source) => StoreScalar(address, source); + + /// <summary> + /// void _mm_storeh_pi (__m64* mem_addr, __m128 a) + /// MOVHPS m64, xmm + /// </summary> + public static unsafe void StoreHigh(float* address, Vector128<float> source) => StoreHigh(address, source); + + /// <summary> + /// void _mm_storel_pi (__m64* mem_addr, __m128 a) + /// MOVLPS m64, xmm + /// </summary> + public static unsafe void StoreLow(float* address, Vector128<float> source) => StoreLow(address, source); + + /// <summary> + /// __m128d _mm_sub_ps (__m128d a, __m128d b) + /// SUBPS xmm, xmm/m128 + /// </summary> + public static Vector128<float> Subtract(Vector128<float> left, Vector128<float> right) => Subtract(left, right); + + /// <summary> + /// __m128 _mm_sub_ss (__m128 a, __m128 b) + /// SUBSS xmm, xmm/m32 + /// </summary> + public static Vector128<float> SubtractScalar(Vector128<float> left, Vector128<float> right) => SubtractScalar(left, right); + + /// <summary> + /// __m128 _mm_unpackhi_ps (__m128 a, __m128 b) + /// UNPCKHPS xmm, xmm/m128 + /// </summary> + public static Vector128<float> UnpackHigh(Vector128<float> left, Vector128<float> right) => UnpackHigh(left, right); + + /// <summary> + /// __m128 _mm_unpacklo_ps (__m128 a, __m128 b) + /// UNPCKLPS xmm, xmm/m128 + /// </summary> + public static Vector128<float> UnpackLow(Vector128<float> left, Vector128<float> right) => UnpackLow(left, right); + + /// <summary> + /// __m128 _mm_xor_ps (__m128 a, __m128 b) + /// XORPS xmm, xmm/m128 + /// </summary> + public static Vector128<float> Xor(Vector128<float> left, Vector128<float> right) => Xor(left, right); + } +} |