Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/corert.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs')
-rw-r--r--src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs315
1 files changed, 236 insertions, 79 deletions
diff --git a/src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs b/src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs
index 631c8c0f1..8073b4b56 100644
--- a/src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs
+++ b/src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs
@@ -13,6 +13,7 @@
////////////////////////////////////////////////////////////////////////////
using System.Diagnostics;
+using System.Runtime.InteropServices;
using System.Runtime.Serialization;
using System.Text;
@@ -75,22 +76,11 @@ namespace System.Globalization
FinishInitialization();
}
- void IDeserializationCallback.OnDeserialization(Object sender)
+ void IDeserializationCallback.OnDeserialization(object sender)
{
throw new PlatformNotSupportedException();
}
- //
- // Internal ordinal comparison functions
- //
-
- internal static int GetHashCodeOrdinalIgnoreCase(string s)
- {
- // This is the same as an case insensitive hash for Invariant
- // (not necessarily true for sorting, but OK for casing & then we apply normal hash code rules)
- return Invariant.GetCaseInsensitiveHashCode(s);
- }
-
public virtual int ANSICodePage => _cultureData.IDEFAULTANSICODEPAGE;
public virtual int OEMCodePage => _cultureData.IDEFAULTOEMCODEPAGE;
@@ -212,7 +202,224 @@ namespace System.Globalization
return ChangeCase(str, toUpper: false);
}
- private unsafe string ToLowerAsciiInvariant(string s)
+ private unsafe char ChangeCase(char c, bool toUpper)
+ {
+ Debug.Assert(!_invariantMode);
+
+ char dst = default;
+ ChangeCase(&c, 1, &dst, 1, toUpper);
+ return dst;
+ }
+
+ private unsafe string ChangeCase(string source, bool toUpper)
+ {
+ Debug.Assert(!_invariantMode);
+ Debug.Assert(source != null);
+
+ // If the string is empty, we're done.
+ if (source.Length == 0)
+ {
+ return string.Empty;
+ }
+
+ int sourcePos = 0;
+ string result = null;
+
+ // If this culture's casing for ASCII is the same as invariant, try to take
+ // a fast path that'll work in managed code and ASCII rather than calling out
+ // to the OS for culture-aware casing.
+ if (IsAsciiCasingSameAsInvariant)
+ {
+ if (toUpper)
+ {
+ // Loop through each character.
+ for (sourcePos = 0; sourcePos < source.Length; sourcePos++)
+ {
+ // If the character is lower-case, we're going to need to allocate a string.
+ char c = source[sourcePos];
+ if ((uint)(c - 'a') <= 'z' - 'a')
+ {
+ // Allocate the result string.
+ result = string.FastAllocateString(source.Length);
+ fixed (char* pResult = result)
+ {
+ // Store all of characters examined thus far.
+ if (sourcePos > 0)
+ {
+ source.AsSpan(0, sourcePos).CopyTo(new Span<char>(pResult, sourcePos));
+ }
+
+ // And store the current character, upper-cased.
+ char* d = pResult + sourcePos;
+ *d++ = (char)(c & ~0x20);
+ sourcePos++;
+
+ // Then continue looping through the remainder of the characters. If we hit
+ // a non-ASCII character, bail to fall back to culture-aware casing.
+ for (; sourcePos < source.Length; sourcePos++)
+ {
+ c = source[sourcePos];
+ if ((uint)(c - 'a') <= 'z' - 'a')
+ {
+ *d++ = (char)(c & ~0x20);
+ }
+ else if (!IsAscii(c))
+ {
+ break;
+ }
+ else
+ {
+ *d++ = c;
+ }
+ }
+ }
+
+ break;
+ }
+ else if (!IsAscii(c))
+ {
+ // The character isn't ASCII; bail to fall back to a culture-aware casing.
+ break;
+ }
+ }
+ }
+ else // toUpper == false
+ {
+ // Loop through each character.
+ for (sourcePos = 0; sourcePos < source.Length; sourcePos++)
+ {
+ // If the character is upper-case, we're going to need to allocate a string.
+ char c = source[sourcePos];
+ if ((uint)(c - 'A') <= 'Z' - 'A')
+ {
+ // Allocate the result string.
+ result = string.FastAllocateString(source.Length);
+ fixed (char* pResult = result)
+ {
+ // Store all of characters examined thus far.
+ if (sourcePos > 0)
+ {
+ source.AsSpan(0, sourcePos).CopyTo(new Span<char>(pResult, sourcePos));
+ }
+
+ // And store the current character, lower-cased.
+ char* d = pResult + sourcePos;
+ *d++ = (char)(c | 0x20);
+ sourcePos++;
+
+ // Then continue looping through the remainder of the characters. If we hit
+ // a non-ASCII character, bail to fall back to culture-aware casing.
+ for (; sourcePos < source.Length; sourcePos++)
+ {
+ c = source[sourcePos];
+ if ((uint)(c - 'A') <= 'Z' - 'A')
+ {
+ *d++ = (char)(c | 0x20);
+ }
+ else if (!IsAscii(c))
+ {
+ break;
+ }
+ else
+ {
+ *d++ = c;
+ }
+ }
+ }
+
+ break;
+ }
+ else if (!IsAscii(c))
+ {
+ // The character isn't ASCII; bail to fall back to a culture-aware casing.
+ break;
+ }
+ }
+ }
+
+ // If we successfully iterated through all of the characters, we didn't need to fall back
+ // to culture-aware casing. In that case, if we allocated a result string, use it, otherwise
+ // just return the original string, as no modifications were necessary.
+ if (sourcePos == source.Length)
+ {
+ return result ?? source;
+ }
+ }
+
+ // Falling back to culture-aware casing. Make sure we have a result string to write into.
+ // If we need to allocate the result string, we'll also need to copy over to it any
+ // characters already examined.
+ if (result == null)
+ {
+ result = string.FastAllocateString(source.Length);
+ if (sourcePos > 0)
+ {
+ fixed (char* pResult = result)
+ {
+ source.AsSpan(0, sourcePos).CopyTo(new Span<char>(pResult, sourcePos));
+ }
+ }
+ }
+
+ // Do the casing operation on everything after what we already processed.
+ fixed (char* pSource = source)
+ {
+ fixed (char* pResult = result)
+ {
+ ChangeCase(pSource + sourcePos, source.Length - sourcePos, pResult + sourcePos, result.Length - sourcePos, toUpper);
+ }
+ }
+
+ return result;
+ }
+
+ internal unsafe void ChangeCase(ReadOnlySpan<char> source, Span<char> destination, bool toUpper)
+ {
+ Debug.Assert(!_invariantMode);
+ Debug.Assert(destination.Length >= source.Length);
+
+ if (source.IsEmpty)
+ {
+ return;
+ }
+
+ fixed (char* pSource = &MemoryMarshal.GetReference(source))
+ fixed (char* pResult = &MemoryMarshal.GetReference(destination))
+ {
+ if (IsAsciiCasingSameAsInvariant)
+ {
+ int length = 0;
+ char* a = pSource, b = pResult;
+ if (toUpper)
+ {
+ while (length < source.Length && *a < 0x80)
+ {
+ *b++ = ToUpperAsciiInvariant(*a++);
+ length++;
+ }
+ }
+ else
+ {
+ while (length < source.Length && *a < 0x80)
+ {
+ *b++ = ToLowerAsciiInvariant(*a++);
+ length++;
+ }
+ }
+
+ if (length != source.Length)
+ {
+ ChangeCase(a, source.Length - length, b, destination.Length - length, toUpper);
+ }
+ }
+ else
+ {
+ ChangeCase(pSource, source.Length, pResult, destination.Length, toUpper);
+ }
+ }
+ }
+
+ private static unsafe string ToLowerAsciiInvariant(string s)
{
if (s.Length == 0)
{
@@ -258,7 +465,7 @@ namespace System.Globalization
}
}
- internal void ToLowerAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination)
+ internal static void ToLowerAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination)
{
Debug.Assert(destination.Length >= source.Length);
@@ -268,7 +475,7 @@ namespace System.Globalization
}
}
- private unsafe string ToUpperAsciiInvariant(string s)
+ private static unsafe string ToUpperAsciiInvariant(string s)
{
if (s.Length == 0)
{
@@ -314,7 +521,7 @@ namespace System.Globalization
}
}
- internal void ToUpperAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination)
+ internal static void ToUpperAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination)
{
Debug.Assert(destination.Length >= source.Length);
@@ -405,7 +612,7 @@ namespace System.Globalization
// or not object refers to the same CultureInfo as the current instance.
//
////////////////////////////////////////////////////////////////////////
- public override bool Equals(Object obj)
+ public override bool Equals(object obj)
{
TextInfo that = obj as TextInfo;
@@ -602,11 +809,20 @@ namespace System.Globalization
{
Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddTitlecaseLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!");
- // for surrogate pairs do a simple ToUpper operation on the substring
if (charLen == 2)
{
- // Surrogate pair
- result.Append(ToUpper(input.Substring(inputIndex, charLen)));
+ // for surrogate pairs do a ToUpper operation on the substring
+ ReadOnlySpan<char> src = input.AsSpan(inputIndex, 2);
+ if (_invariantMode)
+ {
+ result.Append(src); // surrogate pair in invariant mode, so changing case is a nop
+ }
+ else
+ {
+ Span<char> dst = stackalloc char[2];
+ ChangeCase(src, dst, toUpper: true);
+ result.Append(dst);
+ }
inputIndex++;
}
else
@@ -693,64 +909,5 @@ namespace System.Globalization
|| uc == UnicodeCategory.ModifierLetter
|| uc == UnicodeCategory.OtherLetter);
}
-
- //
- // Get case-insensitive hash code for the specified string.
- //
- internal unsafe int GetCaseInsensitiveHashCode(string str)
- {
- // Validate inputs
- if (str == null)
- {
- throw new ArgumentNullException(nameof(str));
- }
-
- // This code assumes that ASCII casing is safe for whatever context is passed in.
- // this is true today, because we only ever call these methods on Invariant. It would be ideal to refactor
- // these methods so they were correct by construction and we could only ever use Invariant.
-
- uint hash = 5381;
- uint c;
-
- // Note: We assume that str contains only ASCII characters until
- // we hit a non-ASCII character to optimize the common case.
- for (int i = 0; i < str.Length; i++)
- {
- c = str[i];
- if (c >= 0x80)
- {
- return GetCaseInsensitiveHashCodeSlow(str);
- }
-
- // If we have a lowercase character, ANDing off 0x20
- // will make it an uppercase character.
- if ((c - 'a') <= ('z' - 'a'))
- {
- c = (uint)((int)c & ~0x20);
- }
-
- hash = ((hash << 5) + hash) ^ c;
- }
-
- return (int)hash;
- }
-
- private unsafe int GetCaseInsensitiveHashCodeSlow(string str)
- {
- Debug.Assert(str != null);
-
- string upper = ToUpper(str);
-
- uint hash = 5381;
- uint c;
-
- for (int i = 0; i < upper.Length; i++)
- {
- c = upper[i];
- hash = ((hash << 5) + hash) ^ c;
- }
-
- return (int)hash;
- }
}
}