Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/mono.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAtsushi Eno <atsushieno@gmail.com>2005-06-28 11:23:06 +0400
committerAtsushi Eno <atsushieno@gmail.com>2005-06-28 11:23:06 +0400
commitdd9256ba20952bebea867eef89ad855d6d7fb802 (patch)
tree96dab187514b86df30a4983535e3706a7c22f838 /mcs/class/corlib/Mono.Globalization.Unicode
parent8d1f1e6c468c236a911ccfaf5702b5149b33fe96 (diff)
2005-06-28 Atsushi Enomoto <atsushi@ximian.com>
* create-mscompat-collation-table.cs : Fixed 0x8 category characters. Added hack for need-to-be-fixed characters to fall into 0xA category. * create-collation-element-table.cs : previous checkin seem failed :( * README: updated a bit. svn path=/trunk/mcs/; revision=46624
Diffstat (limited to 'mcs/class/corlib/Mono.Globalization.Unicode')
-rw-r--r--mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog7
-rw-r--r--mcs/class/corlib/Mono.Globalization.Unicode/README17
-rw-r--r--mcs/class/corlib/Mono.Globalization.Unicode/create-collation-element-table.cs27
-rw-r--r--mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs84
4 files changed, 105 insertions, 30 deletions
diff --git a/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog b/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
index dbaee2fe603..e78193f8409 100644
--- a/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
+++ b/mcs/class/corlib/Mono.Globalization.Unicode/ChangeLog
@@ -1,3 +1,10 @@
+2005-06-28 Atsushi Enomoto <atsushi@ximian.com>
+
+ * create-mscompat-collation-table.cs : Fixed 0x8 category characters.
+ Added hack for need-to-be-fixed characters to fall into 0xA category.
+ * create-collation-element-table.cs : previous checkin seem failed :(
+ * README: updated a bit.
+
2005-06-24 Atsushi Enomoto <atsushi@ximian.com>
* CodePointIndexer.cs :
diff --git a/mcs/class/corlib/Mono.Globalization.Unicode/README b/mcs/class/corlib/Mono.Globalization.Unicode/README
index be5168547f0..8a00092f51b 100644
--- a/mcs/class/corlib/Mono.Globalization.Unicode/README
+++ b/mcs/class/corlib/Mono.Globalization.Unicode/README
@@ -1,13 +1,20 @@
This directory contains support for Unicode normalization and collation.
-This branch contains Normalization.cs and CollationElementTable.cs which
-can be built from generator executables and Unicode Character Database found
-at unicode.org (see Makefile for details).
+This directory contains several files to be autogenerated:
+
+ - MSCompatUnicodeTable.cs : support for CompareInfo.
+ - Normalization.cs : support for String.Normalize()
+ - CollationElementTable.cs : used in code generators which generate
+ above two sources.
+
+Targets in Makefile often downloads dependency files, such as Unicode
+Character Database files. They are fed to source generators which are written
+in C# sources.
Many of the files in this directory are subject to change (for example,
-being rewritten to generate C header like char-conversions.h and
-culture-info-table.h).
+being rewritten to generate resource files and possibly C header like
+char-conversions.h and culture-info-table.h in the later stage).
Atsushi Eno <atsushi@ximian.com>
diff --git a/mcs/class/corlib/Mono.Globalization.Unicode/create-collation-element-table.cs b/mcs/class/corlib/Mono.Globalization.Unicode/create-collation-element-table.cs
index 3e8694c6836..4bb4134d506 100644
--- a/mcs/class/corlib/Mono.Globalization.Unicode/create-collation-element-table.cs
+++ b/mcs/class/corlib/Mono.Globalization.Unicode/create-collation-element-table.cs
@@ -56,13 +56,13 @@ namespace Mono.Globalization.Unicode
private void Serialize ()
{
- Console.WriteLine ("static readonly short [] collElem = new short [] {");
+ Console.WriteLine ("static readonly int [] collElem = new int [] {");
DumpArray (collElem, CollationElementTableUtil.Indexer.TotalCount, true);
Console.WriteLine ("};");
Console.WriteLine ("static readonly SortKeyValue [] keyValues = new SortKeyValue [] {");
for (int i = 0; i < keyCount; i++) {
SortKeyValue s = keyValues [i];
- Console.WriteLine (" new SortKeyValue ({0}, {1}, {2}, {3}, {4}),",
+ Console.WriteLine (" new SortKeyValue ({0}, 0x{1:X04}, 0x{2:X04}, 0x{3:X04}, 0x{4:X04}),",
s.Alt ? "true" : "false", s.Primary, s.Secondary,
s.Thirtiary, s.Quarternary);
}
@@ -87,7 +87,7 @@ namespace Mono.Globalization.Unicode
private void Parse ()
{
- int [] v = new int [4];
+ ushort [] v = new ushort [4];
TextReader reader = Console.In;
while (reader.Peek () != -1) {
@@ -111,20 +111,29 @@ namespace Mono.Globalization.Unicode
line = line.Substring (line.IndexOf (';') + 1).Trim ();
// count entries in a line
int entryPerLine = 0;
- for (int e = 0; (e = line.IndexOf ('[', e + 1)) >= 0;)
+ for (int e = 0; (e = line.IndexOf ('[', e) + 1) > 0;)
entryPerLine++;
int start = 0;
for (int e = 0; e < entryPerLine; e++) {
start = line.IndexOf ('[', start) + 1;
- string s = line.Substring (start, line.IndexOf (']', start) - start - 1);
+ string s = line.Substring (start, line.IndexOf (']', start) - start);
bool alt = false;
if (s [0] == '*')
alt = true;
string [] vslist = s.Substring (1).Split ('.');
- for (int i = 0; i < 4; i++)
- v [i] = int.Parse (vslist [i], NumberStyles.HexNumber);
+ bool skip = false;
+ for (int i = 0; i < 4; i++) {
+ if (vslist [i].Length > 4)
+ skip = true;
+ else
+ v [i] = ushort.Parse (vslist [i], NumberStyles.HexNumber);
+ }
+ if (skip) {
+// Console.Error.WriteLine ("WARNING: skipped entry {0:X}", cp);
+ continue;
+ }
idx = keyCount;
if (entryPerLine == 1) {
// idx = 0 means "no matching entry", so here we start from 1
@@ -151,7 +160,7 @@ namespace Mono.Globalization.Unicode
reader.Close ();
}
- private void AddEntry (bool alt, int [] v)
+ private void AddEntry (bool alt, ushort [] v)
{
if (keyCount == keyValues.Length) {
SortKeyValue [] tmp = new SortKeyValue [keyCount * 2];
@@ -160,7 +169,7 @@ namespace Mono.Globalization.Unicode
}
keyValues [keyCount] =
new SortKeyValue (alt,
- v [0], v [1], v [2], v [3]);
+ v [0], (byte) v [1], (byte) v [2], v [3]);
keyCount++;
}
}
diff --git a/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs b/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs
index 1a42cf15752..6c2ba5615c2 100644
--- a/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs
+++ b/mcs/class/corlib/Mono.Globalization.Unicode/create-mscompat-collation-table.cs
@@ -1450,7 +1450,7 @@ sw.Close ();
AddCharMap ('\u2423', 0x7, 1, 0); // open box
#endregion
- // FIXME: 09 should be more complete.
+ // category 09 - continued symbols from 08
fillIndex [0x9] = 2;
// misc tech mark
for (int cp = 0x2300; cp <= 0x237A; cp++)
@@ -1649,6 +1649,9 @@ sw.Close ();
// but inside a-to-z range.
// 3.there are some expanded characters that
// are not part of Unicode Standard NFKD.
+ // 4. some characters are letter in IsLetter
+ // but not in sortkeys (maybe unicode version
+ // difference caused it).
switch (i) {
// 1. skipping them does not make sense
// case 0xD0: case 0xF0: case 0x131: case 0x138:
@@ -1666,11 +1669,12 @@ sw.Close ();
case 0xFE: // Icelandic Thorn
case 0xDF: // German ss
case 0xFF: // German ss
+ // 4.
+ case 0x1C0: case 0x1C1: case 0x1C2: case 0x1C3:
// not classified yet
// case 0x1A6: case 0x1A7: case 0x1A8: case 0x1A9:
// case 0x1AA: case 0x1B1: case 0x1B7: case 0x1B8:
// case 0x1B9: case 0x1BA: case 0x1BB: case 0x1BF:
-// case 0x1C0: case 0x1C1: case 0x1C2: case 0x1C3:
// case 0x1DD:
continue;
}
@@ -1848,8 +1852,8 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
AddCharMap ('\u0BD7', 0x19, 0);
fillIndex [0x19] = 0xA;
// vowels
- for (int i = 0x0BD7; i < 0x0B94; i++)
- if (Char.IsLetter ((char) i))
+ for (int i = 0x0B82; i < 0x0B94; i++)
+ if (!IsIgnorable ((char) i))
AddCharMap ((char) i, 0x19, 2);
// special vowel
fillIndex [0x19] = 0x24;
@@ -2242,6 +2246,7 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
switch (i) {
case 0xAB: // 08
case 0xB7: // 0A
+ case 0xBB: // 08
case 0x2329: // 09
case 0x232A: // 09
continue;
@@ -2272,14 +2277,6 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
// FIXME: for 07 xx we need more love.
- // FIXME: 08 should be more complete.
- fillIndex [0x8] = 2;
- for (int cp = 0; cp < char.MaxValue; cp++)
- if (!map [cp].Defined &&
- Char.GetUnicodeCategory ((char) cp) ==
- UnicodeCategory.MathSymbol)
- AddCharMapGroup ((char) cp, 0x8, 1, 0);
-
// Characters w/ diacritical marks (NFKD)
for (int i = 0; i <= char.MaxValue; i++) {
if (map [i].Defined || IsIgnorable (i))
@@ -2318,6 +2315,54 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
}
+ // category 08 - symbols
+ fillIndex [0x8] = 2;
+ // Here Windows mapping is not straightforward. It is
+ // not based on computation but seems manual sorting.
+ AddCharMapGroup ('+', 0x8, 1, 0); // plus
+ AddCharMapGroup ('\u2212', 0x8, 1, 0); // minus
+ AddCharMapGroup ('\u229D', 0x8, 1, 0); // minus
+ AddCharMapGroup ('\u2297', 0x8, 1, 0); // mul
+ AddCharMapGroup ('\u2044', 0x8, 1, 0); // div
+ AddCharMapGroup ('\u2215', 0x8, 1, 0); // div
+ AddCharMapGroup ('\u2217', 0x8, 1, 0); // mul
+ AddCharMapGroup ('\u2218', 0x8, 1, 0); // ring
+ AddCharMapGroup ('\u2219', 0x8, 1, 0); // bullet
+ AddCharMapGroup ('\u2213', 0x8, 1, 0); // minus-or-plus
+ AddCharMapGroup ('\u003C', 0x8, 1, 0); // <
+ AddCharMapGroup ('\u227A', 0x8, 1, 0); // precedes relation
+ AddCharMapGroup ('\u22B0', 0x8, 1, 0); // precedes under relation
+
+ for (int cp = 0; cp < 0x2300; cp++) {
+ if (cp == 0x200)
+ cp = 0x2200; // skip to 2200
+ if (cp == 0xAC) // SPECIAL CASE: skip
+ continue;
+ if (!map [cp].Defined &&
+// Char.GetUnicodeCategory ((char) cp) ==
+// UnicodeCategory.MathSymbol)
+ Char.IsSymbol ((char) cp))
+ AddCharMapGroup ((char) cp, 0x8, 1, 0);
+ // SPECIAL CASES: no idea why Windows sorts as such
+ switch (cp) {
+ case 0x3E:
+ AddCharMap ('\u227B', 0x8, 1, 0);
+ AddCharMap ('\u22B1', 0x8, 1, 0);
+ break;
+ case 0xB1:
+ AddCharMapGroup ('\u00AB', 0x8, 1, 0);
+ AddCharMapGroup ('\u226A', 0x8, 1, 0);
+ AddCharMapGroup ('\u00BB', 0x8, 1, 0);
+ AddCharMapGroup ('\u226B', 0x8, 1, 0);
+ break;
+ case 0xF7:
+ AddCharMap ('\u01C0', 0x8, 1, 0);
+ AddCharMap ('\u01C1', 0x8, 1, 0);
+ AddCharMap ('\u01C2', 0x8, 1, 0);
+ break;
+ }
+ }
+
#region Level2 adjustment
// Arabic Hamzah
diacritical [0x624] = 0x5;
@@ -2328,7 +2373,6 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
diacritical [0x649] = 0x5; // 'alif maqs.uurah
diacritical [0x64A] = 0x7; // Yaa'
-
for (int i = 0; i < char.MaxValue; i++) {
byte mod = 0;
byte cat = map [i].Category;
@@ -2350,15 +2394,23 @@ Console.Error.WriteLine ("----- {0:x04}", (int) orderedCyrillic [i]);
}
#endregion
- // FIXME: this is hack but those which are
- // NonSpacingMark characters and still undefined
- // are likely to be nonspacing.
+ // FIXME: this is hack but those NonSpacingMark
+ // characters and still undefined are likely to
+ // be nonspacing.
for (int i = 0; i < char.MaxValue; i++)
if (!map [i].Defined &&
!IsIgnorable (i) &&
Char.GetUnicodeCategory ((char) i) ==
UnicodeCategory.NonSpacingMark)
AddCharMap ((char) i, 1, 1);
+
+ // FIXME: this is hack but those Symbol characters
+ // are likely to fall into 0xA category.
+ for (int i = 0; i < char.MaxValue; i++)
+ if (!map [i].Defined &&
+ !IsIgnorable (i) &&
+ Char.IsSymbol ((char) i))
+ AddCharMap ((char) i, 0xA, 1);
}
private void IncrementSequentialIndex (ref byte hangulCat)