Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/corert.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAtsushi Kanamori <AtsushiKan@users.noreply.github.com>2017-04-06 21:51:38 +0300
committerGitHub <noreply@github.com>2017-04-06 21:51:38 +0300
commit312865e4a77cef8626cf23a4f9835470d1bc0526 (patch)
tree8c93b3a8f4f149f911d5c70c97685292931ec66c /src/System.Private.CoreLib
parentc1d3fe1c2df77a009b5bcfacd0f93977c08011ba (diff)
Port Marvin hash code to CoreRT (#3233)
Fix https://github.com/dotnet/corert/issues/2588 Port of SymCryptMarvin32() from marvin.cpp. https://github.com/dotnet/coreclr/blob/master/src/vm/marvin32.cpp#L219 - Tested on various seeds and data arrays up to 30 bytes long. (test data obtained by hacking CoreCLR to get test data with deterministic seeds.) - s0/s1 renamed to p0/p1 (for more consistency with algorithm description in patent.) - Verified that NUTC generates fully inlined code for Marving.ComputeHashString() (including reducing _rotl to rol or ror.) - Marvin specifies interpreting bytes in little-endian fashion - if this code ever runs on a big-endian machine, the result is probably not Marvin (though I can see how it's any less useful for the way we use it.) - This is not turned on by default (we don't yet have a switch to turn on randomized string hashing in CoreRT).
Diffstat (limited to 'src/System.Private.CoreLib')
-rw-r--r--src/System.Private.CoreLib/src/System.Private.CoreLib.csproj1
-rw-r--r--src/System.Private.CoreLib/src/System/Marvin.cs130
-rw-r--r--src/System.Private.CoreLib/src/System/String.Comparison.cs5
3 files changed, 136 insertions, 0 deletions
diff --git a/src/System.Private.CoreLib/src/System.Private.CoreLib.csproj b/src/System.Private.CoreLib/src/System.Private.CoreLib.csproj
index 601b17c9d..d6d02e3f7 100644
--- a/src/System.Private.CoreLib/src/System.Private.CoreLib.csproj
+++ b/src/System.Private.CoreLib/src/System.Private.CoreLib.csproj
@@ -300,6 +300,7 @@
<Compile Include="System\Int32.cs" />
<Compile Include="System\Int64.cs" />
<Compile Include="System\IntPtr.cs" />
+ <Compile Include="System\Marvin.cs" />
<Compile Include="System\Math.cs" />
<Compile Include="System\MathF.cs" />
<Compile Include="System\MissingFieldException.cs" />
diff --git a/src/System.Private.CoreLib/src/System/Marvin.cs b/src/System.Private.CoreLib/src/System/Marvin.cs
new file mode 100644
index 000000000..b4ac62be0
--- /dev/null
+++ b/src/System.Private.CoreLib/src/System/Marvin.cs
@@ -0,0 +1,130 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+namespace System
+{
+ internal static class Marvin
+ {
+ public static uint ComputeStringHash(ref byte data, uint count, ulong seed)
+ {
+ uint p0 = (uint)seed;
+ uint p1 = (uint)(seed >> 32);
+
+ int byteOffset = 0; // declared as signed int so we don't have to cast everywhere (it's passed to Unsafe.Add() and used for nothing else.)
+
+ while (count >= 8)
+ {
+ p0 += Unsafe.As<byte, uint>(ref Unsafe.Add(ref data, byteOffset));
+ Block(ref p0, ref p1);
+
+ p0 += Unsafe.As<byte, uint>(ref Unsafe.Add(ref data, byteOffset + 4));
+ Block(ref p0, ref p1);
+
+ byteOffset += 8;
+ count -= 8;
+ }
+
+ switch (count)
+ {
+ case 4:
+ p0 += Unsafe.As<byte, uint>(ref Unsafe.Add(ref data, byteOffset));
+ Block(ref p0, ref p1);
+ goto case 0;
+
+ case 0:
+ p0 += 0x80u;
+ break;
+
+ case 5:
+ p0 += Unsafe.As<byte, uint>(ref Unsafe.Add(ref data, byteOffset));
+ byteOffset += 4;
+ Block(ref p0, ref p1);
+ goto case 1;
+
+ case 1:
+ p0 += 0x8000u | Unsafe.Add(ref data, byteOffset);
+ break;
+
+ case 6:
+ p0 += Unsafe.As<byte, uint>(ref Unsafe.Add(ref data, byteOffset));
+ byteOffset += 4;
+ Block(ref p0, ref p1);
+ goto case 2;
+
+ case 2:
+ p0 += 0x800000u | Unsafe.As<byte, ushort>(ref Unsafe.Add(ref data, byteOffset));
+ break;
+
+ case 7:
+ p0 += Unsafe.As<byte, uint>(ref Unsafe.Add(ref data, byteOffset));
+ byteOffset += 4;
+ Block(ref p0, ref p1);
+ goto case 3;
+
+ case 3:
+ p0 += 0x80000000u | Unsafe.As<byte, ushort>(ref Unsafe.Add(ref data, byteOffset)) | Unsafe.Add(ref data, byteOffset + 2);
+ break;
+
+ default:
+ Debug.Fail("Should not get here.");
+ throw new InvalidOperationException();
+ }
+
+ Block(ref p0, ref p1);
+ Block(ref p0, ref p1);
+
+ // At this point, p0 and p1 contains the 8-byte Marvin hash result. If we need a general purpose Marvin implementation in the future,
+ // this could be refactored to stop here. For now, String.GetHashCode() is the only user of this function and he wants an 4-byte hash code
+ // so this last step is specific to String.GetHashCode().
+
+ return p0 ^ p1;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static void Block(ref uint rp0, ref uint rp1)
+ {
+ uint p0 = rp0;
+ uint p1 = rp1;
+
+ p1 ^= p0;
+ p0 = _rotl(p0, 20);
+
+ p0 += p1;
+ p1 = _rotl(p1, 9);
+
+ p1 ^= p0;
+ p0 = _rotl(p0, 27);
+
+ p0 += p1;
+ p1 = _rotl(p1, 19);
+
+ rp0 = p0;
+ rp1 = p1;
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private static uint _rotl(uint value, int shift)
+ {
+ // This is expected to be optimized into a single rol (or ror with negated shift value) instruction
+ return (value << shift) | (value >> (32 - shift));
+ }
+
+ public static ulong DefaultSeed => s_defaultSeed;
+
+ private static ulong s_defaultSeed = GenerateSeed();
+
+ private static ulong GenerateSeed()
+ {
+ ulong seed;
+ unsafe
+ {
+ Interop.GetRandomBytes((byte*)&seed, sizeof(ulong));
+ }
+ return seed;
+ }
+ }
+}
diff --git a/src/System.Private.CoreLib/src/System/String.Comparison.cs b/src/System.Private.CoreLib/src/System/String.Comparison.cs
index aa303881e..8280142cf 100644
--- a/src/System.Private.CoreLib/src/System/String.Comparison.cs
+++ b/src/System.Private.CoreLib/src/System/String.Comparison.cs
@@ -4,6 +4,7 @@
using System.Diagnostics;
using System.Globalization;
+using System.Runtime.CompilerServices;
namespace System
{
@@ -996,6 +997,9 @@ namespace System
// they will return the same hash code.
public override int GetHashCode()
{
+#if FEATURE_RANDOMIZED_STRING_HASHING
+ return (int)Marvin.ComputeStringHash(ref Unsafe.As<char, byte>(ref _firstChar), (uint)(_stringLength * 2), Marvin.DefaultSeed);
+#else
unsafe
{
fixed (char* src = &_firstChar)
@@ -1038,6 +1042,7 @@ namespace System
return hash1 + (hash2 * 1566083941);
}
}
+#endif // FEATURE_RANDOMIZED_STRING_HASHING
}
// Determines whether a specified string is a prefix of the current instance