Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/mono.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEgor Bogatov <egorbo@gmail.com>2019-03-10 00:55:43 +0300
committerMarek Safar <marek.safar@gmail.com>2019-03-10 02:56:08 +0300
commit8a3a41b81a53eff99561de92bf5f39ad172bd2e5 (patch)
tree1aeea13e68fb3218e8dd0438b610080d1437094e /netcore/System.Private.CoreLib/shared/System/Buffer.cs
parent73caa49efb5d9cce8c62d1fa4b5423d663441cdc (diff)
Move Buffer to shared (dotnet/coreclr#23157)
Signed-off-by: dotnet-bot <dotnet-bot@microsoft.com>
Diffstat (limited to 'netcore/System.Private.CoreLib/shared/System/Buffer.cs')
-rw-r--r--netcore/System.Private.CoreLib/shared/System/Buffer.cs586
1 files changed, 586 insertions, 0 deletions
diff --git a/netcore/System.Private.CoreLib/shared/System/Buffer.cs b/netcore/System.Private.CoreLib/shared/System/Buffer.cs
new file mode 100644
index 00000000000..dda25b827f9
--- /dev/null
+++ b/netcore/System.Private.CoreLib/shared/System/Buffer.cs
@@ -0,0 +1,586 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+#if AMD64 || ARM64 || (BIT32 && !ARM)
+#define HAS_CUSTOM_BLOCKS
+#endif
+
+using System.Diagnostics;
+using System.Runtime;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+
+using Internal.Runtime.CompilerServices;
+
+#if BIT64
+using nint = System.Int64;
+using nuint = System.UInt64;
+#else
+using nint = System.Int32;
+using nuint = System.UInt32;
+#endif
+
+namespace System
+{
+ public static partial class Buffer
+ {
+ public static int ByteLength(Array array)
+ {
+ // Is the array present?
+ if (array == null)
+ throw new ArgumentNullException(nameof(array));
+
+ // Is it of primitive types?
+ if (!IsPrimitiveTypeArray(array))
+ throw new ArgumentException(SR.Arg_MustBePrimArray, nameof(array));
+
+ return _ByteLength(array);
+ }
+
+ public static byte GetByte(Array array, int index)
+ {
+ // Is the array present?
+ if (array == null)
+ throw new ArgumentNullException(nameof(array));
+
+ // Is it of primitive types?
+ if (!IsPrimitiveTypeArray(array))
+ throw new ArgumentException(SR.Arg_MustBePrimArray, nameof(array));
+
+ // Is the index in valid range of the array?
+ if ((uint)index >= (uint)_ByteLength(array))
+ throw new ArgumentOutOfRangeException(nameof(index));
+
+ return Unsafe.Add<byte>(ref array.GetRawArrayData(), index);
+ }
+
+ public static void SetByte(Array array, int index, byte value)
+ {
+ // Is the array present?
+ if (array == null)
+ throw new ArgumentNullException(nameof(array));
+
+ // Is it of primitive types?
+ if (!IsPrimitiveTypeArray(array))
+ throw new ArgumentException(SR.Arg_MustBePrimArray, nameof(array));
+
+ // Is the index in valid range of the array?
+ if ((uint)index >= (uint)_ByteLength(array))
+ throw new ArgumentOutOfRangeException(nameof(index));
+
+ Unsafe.Add<byte>(ref array.GetRawArrayData(), index) = value;
+ }
+
+ // This is currently used by System.IO.UnmanagedMemoryStream
+ internal static unsafe void ZeroMemory(byte* dest, long len)
+ {
+ Debug.Assert((ulong)(len) == (nuint)(len));
+ ZeroMemory(dest, (nuint)(len));
+ }
+
+ // This method has different signature for x64 and other platforms and is done for performance reasons.
+ internal static unsafe void ZeroMemory(byte* dest, nuint len)
+ {
+ SpanHelpers.ClearWithoutReferences(ref *dest, len);
+ }
+
+ // The attributes on this method are chosen for best JIT performance.
+ // Please do not edit unless intentional.
+ [MethodImplAttribute(MethodImplOptions.AggressiveInlining)]
+ [CLSCompliant(false)]
+ public static unsafe void MemoryCopy(void* source, void* destination, long destinationSizeInBytes, long sourceBytesToCopy)
+ {
+ if (sourceBytesToCopy > destinationSizeInBytes)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.sourceBytesToCopy);
+ }
+ Memmove((byte*)destination, (byte*)source, checked((nuint)sourceBytesToCopy));
+ }
+
+ // The attributes on this method are chosen for best JIT performance.
+ // Please do not edit unless intentional.
+ [MethodImplAttribute(MethodImplOptions.AggressiveInlining)]
+ [CLSCompliant(false)]
+ public static unsafe void MemoryCopy(void* source, void* destination, ulong destinationSizeInBytes, ulong sourceBytesToCopy)
+ {
+ if (sourceBytesToCopy > destinationSizeInBytes)
+ {
+ ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.sourceBytesToCopy);
+ }
+ Memmove((byte*)destination, (byte*)source, checked((nuint)sourceBytesToCopy));
+ }
+
+ internal static unsafe void Memcpy(byte[] dest, int destIndex, byte* src, int srcIndex, int len)
+ {
+ Debug.Assert((srcIndex >= 0) && (destIndex >= 0) && (len >= 0), "Index and length must be non-negative!");
+ Debug.Assert(dest.Length - destIndex >= len, "not enough bytes in dest");
+ // If dest has 0 elements, the fixed statement will throw an
+ // IndexOutOfRangeException. Special-case 0-byte copies.
+ if (len == 0)
+ return;
+ fixed (byte* pDest = dest)
+ {
+ Memcpy(pDest + destIndex, src + srcIndex, len);
+ }
+ }
+
+ internal static unsafe void Memcpy(byte* pDest, int destIndex, byte[] src, int srcIndex, int len)
+ {
+ Debug.Assert((srcIndex >= 0) && (destIndex >= 0) && (len >= 0), "Index and length must be non-negative!");
+ Debug.Assert(src.Length - srcIndex >= len, "not enough bytes in src");
+ // If dest has 0 elements, the fixed statement will throw an
+ // IndexOutOfRangeException. Special-case 0-byte copies.
+ if (len == 0)
+ return;
+ fixed (byte* pSrc = src)
+ {
+ Memcpy(pDest + destIndex, pSrc + srcIndex, len);
+ }
+ }
+
+ // This method has different signature for x64 and other platforms and is done for performance reasons.
+ internal static unsafe void Memmove(byte* dest, byte* src, nuint len)
+ {
+#if AMD64 || (BIT32 && !ARM)
+ const nuint CopyThreshold = 2048;
+#elif ARM64
+#if PLATFORM_WINDOWS
+ // Determined optimal value for Windows.
+ // https://github.com/dotnet/coreclr/issues/13843
+ const nuint CopyThreshold = ulong.MaxValue;
+#else // PLATFORM_WINDOWS
+ // Managed code is currently faster than glibc unoptimized memmove
+ // TODO-ARM64-UNIX-OPT revisit when glibc optimized memmove is in Linux distros
+ // https://github.com/dotnet/coreclr/issues/13844
+ const nuint CopyThreshold = ulong.MaxValue;
+#endif // PLATFORM_WINDOWS
+#else
+ const nuint CopyThreshold = 512;
+#endif // AMD64 || (BIT32 && !ARM)
+
+ // P/Invoke into the native version when the buffers are overlapping.
+ if (((nuint)dest - (nuint)src < len) || ((nuint)src - (nuint)dest < len))
+ {
+ goto PInvoke;
+ }
+
+ byte* srcEnd = src + len;
+ byte* destEnd = dest + len;
+
+ if (len <= 16) goto MCPY02;
+ if (len > 64) goto MCPY05;
+
+ MCPY00:
+ // Copy bytes which are multiples of 16 and leave the remainder for MCPY01 to handle.
+ Debug.Assert(len > 16 && len <= 64);
+#if HAS_CUSTOM_BLOCKS
+ *(Block16*)dest = *(Block16*)src; // [0,16]
+#elif BIT64
+ *(long*)dest = *(long*)src;
+ *(long*)(dest + 8) = *(long*)(src + 8); // [0,16]
+#else
+ *(int*)dest = *(int*)src;
+ *(int*)(dest + 4) = *(int*)(src + 4);
+ *(int*)(dest + 8) = *(int*)(src + 8);
+ *(int*)(dest + 12) = *(int*)(src + 12); // [0,16]
+#endif
+ if (len <= 32) goto MCPY01;
+#if HAS_CUSTOM_BLOCKS
+ *(Block16*)(dest + 16) = *(Block16*)(src + 16); // [0,32]
+#elif BIT64
+ *(long*)(dest + 16) = *(long*)(src + 16);
+ *(long*)(dest + 24) = *(long*)(src + 24); // [0,32]
+#else
+ *(int*)(dest + 16) = *(int*)(src + 16);
+ *(int*)(dest + 20) = *(int*)(src + 20);
+ *(int*)(dest + 24) = *(int*)(src + 24);
+ *(int*)(dest + 28) = *(int*)(src + 28); // [0,32]
+#endif
+ if (len <= 48) goto MCPY01;
+#if HAS_CUSTOM_BLOCKS
+ *(Block16*)(dest + 32) = *(Block16*)(src + 32); // [0,48]
+#elif BIT64
+ *(long*)(dest + 32) = *(long*)(src + 32);
+ *(long*)(dest + 40) = *(long*)(src + 40); // [0,48]
+#else
+ *(int*)(dest + 32) = *(int*)(src + 32);
+ *(int*)(dest + 36) = *(int*)(src + 36);
+ *(int*)(dest + 40) = *(int*)(src + 40);
+ *(int*)(dest + 44) = *(int*)(src + 44); // [0,48]
+#endif
+
+ MCPY01:
+ // Unconditionally copy the last 16 bytes using destEnd and srcEnd and return.
+ Debug.Assert(len > 16 && len <= 64);
+#if HAS_CUSTOM_BLOCKS
+ *(Block16*)(destEnd - 16) = *(Block16*)(srcEnd - 16);
+#elif BIT64
+ *(long*)(destEnd - 16) = *(long*)(srcEnd - 16);
+ *(long*)(destEnd - 8) = *(long*)(srcEnd - 8);
+#else
+ *(int*)(destEnd - 16) = *(int*)(srcEnd - 16);
+ *(int*)(destEnd - 12) = *(int*)(srcEnd - 12);
+ *(int*)(destEnd - 8) = *(int*)(srcEnd - 8);
+ *(int*)(destEnd - 4) = *(int*)(srcEnd - 4);
+#endif
+ return;
+
+ MCPY02:
+ // Copy the first 8 bytes and then unconditionally copy the last 8 bytes and return.
+ if ((len & 24) == 0) goto MCPY03;
+ Debug.Assert(len >= 8 && len <= 16);
+#if BIT64
+ *(long*)dest = *(long*)src;
+ *(long*)(destEnd - 8) = *(long*)(srcEnd - 8);
+#else
+ *(int*)dest = *(int*)src;
+ *(int*)(dest + 4) = *(int*)(src + 4);
+ *(int*)(destEnd - 8) = *(int*)(srcEnd - 8);
+ *(int*)(destEnd - 4) = *(int*)(srcEnd - 4);
+#endif
+ return;
+
+ MCPY03:
+ // Copy the first 4 bytes and then unconditionally copy the last 4 bytes and return.
+ if ((len & 4) == 0) goto MCPY04;
+ Debug.Assert(len >= 4 && len < 8);
+ *(int*)dest = *(int*)src;
+ *(int*)(destEnd - 4) = *(int*)(srcEnd - 4);
+ return;
+
+ MCPY04:
+ // Copy the first byte. For pending bytes, do an unconditionally copy of the last 2 bytes and return.
+ Debug.Assert(len < 4);
+ if (len == 0) return;
+ *dest = *src;
+ if ((len & 2) == 0) return;
+ *(short*)(destEnd - 2) = *(short*)(srcEnd - 2);
+ return;
+
+ MCPY05:
+ // PInvoke to the native version when the copy length exceeds the threshold.
+ if (len > CopyThreshold)
+ {
+ goto PInvoke;
+ }
+
+ // Copy 64-bytes at a time until the remainder is less than 64.
+ // If remainder is greater than 16 bytes, then jump to MCPY00. Otherwise, unconditionally copy the last 16 bytes and return.
+ Debug.Assert(len > 64 && len <= CopyThreshold);
+ nuint n = len >> 6;
+
+ MCPY06:
+#if HAS_CUSTOM_BLOCKS
+ *(Block64*)dest = *(Block64*)src;
+#elif BIT64
+ *(long*)dest = *(long*)src;
+ *(long*)(dest + 8) = *(long*)(src + 8);
+ *(long*)(dest + 16) = *(long*)(src + 16);
+ *(long*)(dest + 24) = *(long*)(src + 24);
+ *(long*)(dest + 32) = *(long*)(src + 32);
+ *(long*)(dest + 40) = *(long*)(src + 40);
+ *(long*)(dest + 48) = *(long*)(src + 48);
+ *(long*)(dest + 56) = *(long*)(src + 56);
+#else
+ *(int*)dest = *(int*)src;
+ *(int*)(dest + 4) = *(int*)(src + 4);
+ *(int*)(dest + 8) = *(int*)(src + 8);
+ *(int*)(dest + 12) = *(int*)(src + 12);
+ *(int*)(dest + 16) = *(int*)(src + 16);
+ *(int*)(dest + 20) = *(int*)(src + 20);
+ *(int*)(dest + 24) = *(int*)(src + 24);
+ *(int*)(dest + 28) = *(int*)(src + 28);
+ *(int*)(dest + 32) = *(int*)(src + 32);
+ *(int*)(dest + 36) = *(int*)(src + 36);
+ *(int*)(dest + 40) = *(int*)(src + 40);
+ *(int*)(dest + 44) = *(int*)(src + 44);
+ *(int*)(dest + 48) = *(int*)(src + 48);
+ *(int*)(dest + 52) = *(int*)(src + 52);
+ *(int*)(dest + 56) = *(int*)(src + 56);
+ *(int*)(dest + 60) = *(int*)(src + 60);
+#endif
+ dest += 64;
+ src += 64;
+ n--;
+ if (n != 0) goto MCPY06;
+
+ len %= 64;
+ if (len > 16) goto MCPY00;
+#if HAS_CUSTOM_BLOCKS
+ *(Block16*)(destEnd - 16) = *(Block16*)(srcEnd - 16);
+#elif BIT64
+ *(long*)(destEnd - 16) = *(long*)(srcEnd - 16);
+ *(long*)(destEnd - 8) = *(long*)(srcEnd - 8);
+#else
+ *(int*)(destEnd - 16) = *(int*)(srcEnd - 16);
+ *(int*)(destEnd - 12) = *(int*)(srcEnd - 12);
+ *(int*)(destEnd - 8) = *(int*)(srcEnd - 8);
+ *(int*)(destEnd - 4) = *(int*)(srcEnd - 4);
+#endif
+ return;
+
+ PInvoke:
+ _Memmove(dest, src, len);
+ }
+
+ // This method has different signature for x64 and other platforms and is done for performance reasons.
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal static void Memmove<T>(ref T destination, ref T source, nuint elementCount)
+ {
+ if (!RuntimeHelpers.IsReferenceOrContainsReferences<T>())
+ {
+ // Blittable memmove
+
+ Memmove(
+ ref Unsafe.As<T, byte>(ref destination),
+ ref Unsafe.As<T, byte>(ref source),
+ elementCount * (nuint)Unsafe.SizeOf<T>());
+ }
+ else
+ {
+ // Non-blittable memmove
+
+ // Try to avoid calling RhBulkMoveWithWriteBarrier if we can get away
+ // with a no-op.
+ if (!Unsafe.AreSame(ref destination, ref source) && elementCount != 0)
+ {
+ RuntimeImports.RhBulkMoveWithWriteBarrier(
+ ref Unsafe.As<T, byte>(ref destination),
+ ref Unsafe.As<T, byte>(ref source),
+ elementCount * (nuint)Unsafe.SizeOf<T>());
+ }
+ }
+ }
+
+ // This method has different signature for x64 and other platforms and is done for performance reasons.
+ private static void Memmove(ref byte dest, ref byte src, nuint len)
+ {
+#if AMD64 || (BIT32 && !ARM)
+ const nuint CopyThreshold = 2048;
+#elif ARM64
+#if PLATFORM_WINDOWS
+ // Determined optimal value for Windows.
+ // https://github.com/dotnet/coreclr/issues/13843
+ const nuint CopyThreshold = ulong.MaxValue;
+#else // PLATFORM_WINDOWS
+ // Managed code is currently faster than glibc unoptimized memmove
+ // TODO-ARM64-UNIX-OPT revisit when glibc optimized memmove is in Linux distros
+ // https://github.com/dotnet/coreclr/issues/13844
+ const nuint CopyThreshold = ulong.MaxValue;
+#endif // PLATFORM_WINDOWS
+#else
+ const nuint CopyThreshold = 512;
+#endif // AMD64 || (BIT32 && !ARM)
+
+ // P/Invoke into the native version when the buffers are overlapping.
+ if (((nuint)Unsafe.ByteOffset(ref src, ref dest) < len) || ((nuint)Unsafe.ByteOffset(ref dest, ref src) < len))
+ {
+ goto BuffersOverlap;
+ }
+
+ // Use "(IntPtr)(nint)len" to avoid overflow checking on the explicit cast to IntPtr
+
+ ref byte srcEnd = ref Unsafe.Add(ref src, (IntPtr)(nint)len);
+ ref byte destEnd = ref Unsafe.Add(ref dest, (IntPtr)(nint)len);
+
+ if (len <= 16)
+ goto MCPY02;
+ if (len > 64)
+ goto MCPY05;
+
+ MCPY00:
+ // Copy bytes which are multiples of 16 and leave the remainder for MCPY01 to handle.
+ Debug.Assert(len > 16 && len <= 64);
+#if HAS_CUSTOM_BLOCKS
+ Unsafe.As<byte, Block16>(ref dest) = Unsafe.As<byte, Block16>(ref src); // [0,16]
+#elif BIT64
+ Unsafe.As<byte, long>(ref dest) = Unsafe.As<byte, long>(ref src);
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 8)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 8)); // [0,16]
+#else
+ Unsafe.As<byte, int>(ref dest) = Unsafe.As<byte, int>(ref src);
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 4));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 8)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 8));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 12)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 12)); // [0,16]
+#endif
+ if (len <= 32)
+ goto MCPY01;
+#if HAS_CUSTOM_BLOCKS
+ Unsafe.As<byte, Block16>(ref Unsafe.Add(ref dest, 16)) = Unsafe.As<byte, Block16>(ref Unsafe.Add(ref src, 16)); // [0,32]
+#elif BIT64
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 16)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 16));
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 24)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 24)); // [0,32]
+#else
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 16)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 16));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 20)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 20));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 24)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 24));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 28)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 28)); // [0,32]
+#endif
+ if (len <= 48)
+ goto MCPY01;
+#if HAS_CUSTOM_BLOCKS
+ Unsafe.As<byte, Block16>(ref Unsafe.Add(ref dest, 32)) = Unsafe.As<byte, Block16>(ref Unsafe.Add(ref src, 32)); // [0,48]
+#elif BIT64
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 32)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 32));
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 40)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 40)); // [0,48]
+#else
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 32)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 32));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 36)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 36));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 40)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 40));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 44)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 44)); // [0,48]
+#endif
+
+ MCPY01:
+ // Unconditionally copy the last 16 bytes using destEnd and srcEnd and return.
+ Debug.Assert(len > 16 && len <= 64);
+#if HAS_CUSTOM_BLOCKS
+ Unsafe.As<byte, Block16>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, Block16>(ref Unsafe.Add(ref srcEnd, -16));
+#elif BIT64
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref srcEnd, -16));
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref srcEnd, -8));
+#else
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -16));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -12)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -12));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -8));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -4));
+#endif
+ return;
+
+ MCPY02:
+ // Copy the first 8 bytes and then unconditionally copy the last 8 bytes and return.
+ if ((len & 24) == 0)
+ goto MCPY03;
+ Debug.Assert(len >= 8 && len <= 16);
+#if BIT64
+ Unsafe.As<byte, long>(ref dest) = Unsafe.As<byte, long>(ref src);
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref srcEnd, -8));
+#else
+ Unsafe.As<byte, int>(ref dest) = Unsafe.As<byte, int>(ref src);
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 4));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -8));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -4));
+#endif
+ return;
+
+ MCPY03:
+ // Copy the first 4 bytes and then unconditionally copy the last 4 bytes and return.
+ if ((len & 4) == 0)
+ goto MCPY04;
+ Debug.Assert(len >= 4 && len < 8);
+ Unsafe.As<byte, int>(ref dest) = Unsafe.As<byte, int>(ref src);
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -4));
+ return;
+
+ MCPY04:
+ // Copy the first byte. For pending bytes, do an unconditionally copy of the last 2 bytes and return.
+ Debug.Assert(len < 4);
+ if (len == 0)
+ return;
+ dest = src;
+ if ((len & 2) == 0)
+ return;
+ Unsafe.As<byte, short>(ref Unsafe.Add(ref destEnd, -2)) = Unsafe.As<byte, short>(ref Unsafe.Add(ref srcEnd, -2));
+ return;
+
+ MCPY05:
+ // PInvoke to the native version when the copy length exceeds the threshold.
+ if (len > CopyThreshold)
+ {
+ goto PInvoke;
+ }
+
+ // Copy 64-bytes at a time until the remainder is less than 64.
+ // If remainder is greater than 16 bytes, then jump to MCPY00. Otherwise, unconditionally copy the last 16 bytes and return.
+ Debug.Assert(len > 64 && len <= CopyThreshold);
+ nuint n = len >> 6;
+
+ MCPY06:
+#if HAS_CUSTOM_BLOCKS
+ Unsafe.As<byte, Block64>(ref dest) = Unsafe.As<byte, Block64>(ref src);
+#elif BIT64
+ Unsafe.As<byte, long>(ref dest) = Unsafe.As<byte, long>(ref src);
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 8)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 8));
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 16)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 16));
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 24)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 24));
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 32)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 32));
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 40)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 40));
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 48)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 48));
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref dest, 56)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref src, 56));
+#else
+ Unsafe.As<byte, int>(ref dest) = Unsafe.As<byte, int>(ref src);
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 4));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 8)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 8));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 12)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 12));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 16)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 16));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 20)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 20));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 24)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 24));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 28)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 28));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 32)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 32));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 36)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 36));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 40)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 40));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 44)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 44));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 48)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 48));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 52)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 52));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 56)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 56));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref dest, 60)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref src, 60));
+#endif
+ dest = ref Unsafe.Add(ref dest, 64);
+ src = ref Unsafe.Add(ref src, 64);
+ n--;
+ if (n != 0)
+ goto MCPY06;
+
+ len %= 64;
+ if (len > 16)
+ goto MCPY00;
+#if HAS_CUSTOM_BLOCKS
+ Unsafe.As<byte, Block16>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, Block16>(ref Unsafe.Add(ref srcEnd, -16));
+#elif BIT64
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref srcEnd, -16));
+ Unsafe.As<byte, long>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, long>(ref Unsafe.Add(ref srcEnd, -8));
+#else
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -16)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -16));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -12)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -12));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -8)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -8));
+ Unsafe.As<byte, int>(ref Unsafe.Add(ref destEnd, -4)) = Unsafe.As<byte, int>(ref Unsafe.Add(ref srcEnd, -4));
+#endif
+ return;
+
+ BuffersOverlap:
+ // If the buffers overlap perfectly, there's no point to copying the data.
+ if (Unsafe.AreSame(ref dest, ref src))
+ {
+ return;
+ }
+
+ PInvoke:
+ _Memmove(ref dest, ref src, len);
+ }
+
+ // Non-inlinable wrapper around the QCall that avoids polluting the fast path
+ // with P/Invoke prolog/epilog.
+ [MethodImplAttribute(MethodImplOptions.NoInlining)]
+ private static unsafe void _Memmove(byte* dest, byte* src, nuint len)
+ {
+ __Memmove(dest, src, len);
+ }
+
+ // Non-inlinable wrapper around the QCall that avoids polluting the fast path
+ // with P/Invoke prolog/epilog.
+ [MethodImplAttribute(MethodImplOptions.NoInlining)]
+ private static unsafe void _Memmove(ref byte dest, ref byte src, nuint len)
+ {
+ fixed (byte* pDest = &dest)
+ fixed (byte* pSrc = &src)
+ __Memmove(pDest, pSrc, len);
+ }
+
+#if HAS_CUSTOM_BLOCKS
+ [StructLayout(LayoutKind.Sequential, Size = 16)]
+ private struct Block16 { }
+
+ [StructLayout(LayoutKind.Sequential, Size = 64)]
+ private struct Block64 { }
+#endif // HAS_CUSTOM_BLOCKS
+ }
+}