diff options
Diffstat (limited to 'src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs')
-rw-r--r-- | src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs | 315 |
1 files changed, 236 insertions, 79 deletions
diff --git a/src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs b/src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs index 631c8c0f1..8073b4b56 100644 --- a/src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs +++ b/src/System.Private.CoreLib/shared/System/Globalization/TextInfo.cs @@ -13,6 +13,7 @@ //////////////////////////////////////////////////////////////////////////// using System.Diagnostics; +using System.Runtime.InteropServices; using System.Runtime.Serialization; using System.Text; @@ -75,22 +76,11 @@ namespace System.Globalization FinishInitialization(); } - void IDeserializationCallback.OnDeserialization(Object sender) + void IDeserializationCallback.OnDeserialization(object sender) { throw new PlatformNotSupportedException(); } - // - // Internal ordinal comparison functions - // - - internal static int GetHashCodeOrdinalIgnoreCase(string s) - { - // This is the same as an case insensitive hash for Invariant - // (not necessarily true for sorting, but OK for casing & then we apply normal hash code rules) - return Invariant.GetCaseInsensitiveHashCode(s); - } - public virtual int ANSICodePage => _cultureData.IDEFAULTANSICODEPAGE; public virtual int OEMCodePage => _cultureData.IDEFAULTOEMCODEPAGE; @@ -212,7 +202,224 @@ namespace System.Globalization return ChangeCase(str, toUpper: false); } - private unsafe string ToLowerAsciiInvariant(string s) + private unsafe char ChangeCase(char c, bool toUpper) + { + Debug.Assert(!_invariantMode); + + char dst = default; + ChangeCase(&c, 1, &dst, 1, toUpper); + return dst; + } + + private unsafe string ChangeCase(string source, bool toUpper) + { + Debug.Assert(!_invariantMode); + Debug.Assert(source != null); + + // If the string is empty, we're done. + if (source.Length == 0) + { + return string.Empty; + } + + int sourcePos = 0; + string result = null; + + // If this culture's casing for ASCII is the same as invariant, try to take + // a fast path that'll work in managed code and ASCII rather than calling out + // to the OS for culture-aware casing. + if (IsAsciiCasingSameAsInvariant) + { + if (toUpper) + { + // Loop through each character. + for (sourcePos = 0; sourcePos < source.Length; sourcePos++) + { + // If the character is lower-case, we're going to need to allocate a string. + char c = source[sourcePos]; + if ((uint)(c - 'a') <= 'z' - 'a') + { + // Allocate the result string. + result = string.FastAllocateString(source.Length); + fixed (char* pResult = result) + { + // Store all of characters examined thus far. + if (sourcePos > 0) + { + source.AsSpan(0, sourcePos).CopyTo(new Span<char>(pResult, sourcePos)); + } + + // And store the current character, upper-cased. + char* d = pResult + sourcePos; + *d++ = (char)(c & ~0x20); + sourcePos++; + + // Then continue looping through the remainder of the characters. If we hit + // a non-ASCII character, bail to fall back to culture-aware casing. + for (; sourcePos < source.Length; sourcePos++) + { + c = source[sourcePos]; + if ((uint)(c - 'a') <= 'z' - 'a') + { + *d++ = (char)(c & ~0x20); + } + else if (!IsAscii(c)) + { + break; + } + else + { + *d++ = c; + } + } + } + + break; + } + else if (!IsAscii(c)) + { + // The character isn't ASCII; bail to fall back to a culture-aware casing. + break; + } + } + } + else // toUpper == false + { + // Loop through each character. + for (sourcePos = 0; sourcePos < source.Length; sourcePos++) + { + // If the character is upper-case, we're going to need to allocate a string. + char c = source[sourcePos]; + if ((uint)(c - 'A') <= 'Z' - 'A') + { + // Allocate the result string. + result = string.FastAllocateString(source.Length); + fixed (char* pResult = result) + { + // Store all of characters examined thus far. + if (sourcePos > 0) + { + source.AsSpan(0, sourcePos).CopyTo(new Span<char>(pResult, sourcePos)); + } + + // And store the current character, lower-cased. + char* d = pResult + sourcePos; + *d++ = (char)(c | 0x20); + sourcePos++; + + // Then continue looping through the remainder of the characters. If we hit + // a non-ASCII character, bail to fall back to culture-aware casing. + for (; sourcePos < source.Length; sourcePos++) + { + c = source[sourcePos]; + if ((uint)(c - 'A') <= 'Z' - 'A') + { + *d++ = (char)(c | 0x20); + } + else if (!IsAscii(c)) + { + break; + } + else + { + *d++ = c; + } + } + } + + break; + } + else if (!IsAscii(c)) + { + // The character isn't ASCII; bail to fall back to a culture-aware casing. + break; + } + } + } + + // If we successfully iterated through all of the characters, we didn't need to fall back + // to culture-aware casing. In that case, if we allocated a result string, use it, otherwise + // just return the original string, as no modifications were necessary. + if (sourcePos == source.Length) + { + return result ?? source; + } + } + + // Falling back to culture-aware casing. Make sure we have a result string to write into. + // If we need to allocate the result string, we'll also need to copy over to it any + // characters already examined. + if (result == null) + { + result = string.FastAllocateString(source.Length); + if (sourcePos > 0) + { + fixed (char* pResult = result) + { + source.AsSpan(0, sourcePos).CopyTo(new Span<char>(pResult, sourcePos)); + } + } + } + + // Do the casing operation on everything after what we already processed. + fixed (char* pSource = source) + { + fixed (char* pResult = result) + { + ChangeCase(pSource + sourcePos, source.Length - sourcePos, pResult + sourcePos, result.Length - sourcePos, toUpper); + } + } + + return result; + } + + internal unsafe void ChangeCase(ReadOnlySpan<char> source, Span<char> destination, bool toUpper) + { + Debug.Assert(!_invariantMode); + Debug.Assert(destination.Length >= source.Length); + + if (source.IsEmpty) + { + return; + } + + fixed (char* pSource = &MemoryMarshal.GetReference(source)) + fixed (char* pResult = &MemoryMarshal.GetReference(destination)) + { + if (IsAsciiCasingSameAsInvariant) + { + int length = 0; + char* a = pSource, b = pResult; + if (toUpper) + { + while (length < source.Length && *a < 0x80) + { + *b++ = ToUpperAsciiInvariant(*a++); + length++; + } + } + else + { + while (length < source.Length && *a < 0x80) + { + *b++ = ToLowerAsciiInvariant(*a++); + length++; + } + } + + if (length != source.Length) + { + ChangeCase(a, source.Length - length, b, destination.Length - length, toUpper); + } + } + else + { + ChangeCase(pSource, source.Length, pResult, destination.Length, toUpper); + } + } + } + + private static unsafe string ToLowerAsciiInvariant(string s) { if (s.Length == 0) { @@ -258,7 +465,7 @@ namespace System.Globalization } } - internal void ToLowerAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination) + internal static void ToLowerAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination) { Debug.Assert(destination.Length >= source.Length); @@ -268,7 +475,7 @@ namespace System.Globalization } } - private unsafe string ToUpperAsciiInvariant(string s) + private static unsafe string ToUpperAsciiInvariant(string s) { if (s.Length == 0) { @@ -314,7 +521,7 @@ namespace System.Globalization } } - internal void ToUpperAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination) + internal static void ToUpperAsciiInvariant(ReadOnlySpan<char> source, Span<char> destination) { Debug.Assert(destination.Length >= source.Length); @@ -405,7 +612,7 @@ namespace System.Globalization // or not object refers to the same CultureInfo as the current instance. // //////////////////////////////////////////////////////////////////////// - public override bool Equals(Object obj) + public override bool Equals(object obj) { TextInfo that = obj as TextInfo; @@ -602,11 +809,20 @@ namespace System.Globalization { Debug.Assert(charLen == 1 || charLen == 2, "[TextInfo.AddTitlecaseLetter] CharUnicodeInfo.InternalGetUnicodeCategory returned an unexpected charLen!"); - // for surrogate pairs do a simple ToUpper operation on the substring if (charLen == 2) { - // Surrogate pair - result.Append(ToUpper(input.Substring(inputIndex, charLen))); + // for surrogate pairs do a ToUpper operation on the substring + ReadOnlySpan<char> src = input.AsSpan(inputIndex, 2); + if (_invariantMode) + { + result.Append(src); // surrogate pair in invariant mode, so changing case is a nop + } + else + { + Span<char> dst = stackalloc char[2]; + ChangeCase(src, dst, toUpper: true); + result.Append(dst); + } inputIndex++; } else @@ -693,64 +909,5 @@ namespace System.Globalization || uc == UnicodeCategory.ModifierLetter || uc == UnicodeCategory.OtherLetter); } - - // - // Get case-insensitive hash code for the specified string. - // - internal unsafe int GetCaseInsensitiveHashCode(string str) - { - // Validate inputs - if (str == null) - { - throw new ArgumentNullException(nameof(str)); - } - - // This code assumes that ASCII casing is safe for whatever context is passed in. - // this is true today, because we only ever call these methods on Invariant. It would be ideal to refactor - // these methods so they were correct by construction and we could only ever use Invariant. - - uint hash = 5381; - uint c; - - // Note: We assume that str contains only ASCII characters until - // we hit a non-ASCII character to optimize the common case. - for (int i = 0; i < str.Length; i++) - { - c = str[i]; - if (c >= 0x80) - { - return GetCaseInsensitiveHashCodeSlow(str); - } - - // If we have a lowercase character, ANDing off 0x20 - // will make it an uppercase character. - if ((c - 'a') <= ('z' - 'a')) - { - c = (uint)((int)c & ~0x20); - } - - hash = ((hash << 5) + hash) ^ c; - } - - return (int)hash; - } - - private unsafe int GetCaseInsensitiveHashCodeSlow(string str) - { - Debug.Assert(str != null); - - string upper = ToUpper(str); - - uint hash = 5381; - uint c; - - for (int i = 0; i < upper.Length; i++) - { - c = upper[i]; - hash = ((hash << 5) + hash) ^ c; - } - - return (int)hash; - } } } |