diff options
author | resurrexit <resurrexit@users.noreply.github.com> | 2017-12-17 13:45:21 +0300 |
---|---|---|
committer | Marek Safar <marek.safar@gmail.com> | 2017-12-17 13:45:21 +0300 |
commit | 4a8adbd8dc4233d5bcdc0e19fe451aa94dd28745 (patch) | |
tree | 36fdcafe9a1086c3d25f84f303b7e7290fad45e1 /mcs/class/I18N | |
parent | 6d018681eefe6277d7df066aab275d92be472269 (diff) |
[I18N] Include missing subset of shift_jis characters (#6232)
Diffstat (limited to 'mcs/class/I18N')
-rw-r--r-- | mcs/class/I18N/CJK/CP51932.cs | 7 | ||||
-rw-r--r-- | mcs/class/I18N/CJK/Test/I18N.CJK.Test.cs | 9 | ||||
-rw-r--r-- | mcs/class/I18N/CJK/jis.table | bin | 101572 -> 101572 bytes | |||
-rw-r--r-- | mcs/class/I18N/tools/uni2tab.c | 22 |
4 files changed, 28 insertions, 10 deletions
diff --git a/mcs/class/I18N/CJK/CP51932.cs b/mcs/class/I18N/CJK/CP51932.cs index c5b11e4aaef..188b58e164a 100644 --- a/mcs/class/I18N/CJK/CP51932.cs +++ b/mcs/class/I18N/CJK/CP51932.cs @@ -33,10 +33,9 @@ /* - Well, there looks no jis.table source. Thus, it seems like it is - generated from text files from Unicode Home Page such like - ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT - However, it is non-normative and in Japan it is contains many problem. + jis.table is generated from uni2tab.c, which requires CP932.TXT and + Unihan.txt as input files. However, it is non-normative and in Japan it is + contains many problem. FIXME: Some characters such as 0xFF0B (wide "plus") are missing in that table. diff --git a/mcs/class/I18N/CJK/Test/I18N.CJK.Test.cs b/mcs/class/I18N/CJK/Test/I18N.CJK.Test.cs index 12a9d47bb62..2c045c450a8 100644 --- a/mcs/class/I18N/CJK/Test/I18N.CJK.Test.cs +++ b/mcs/class/I18N/CJK/Test/I18N.CJK.Test.cs @@ -161,6 +161,15 @@ namespace MonoTests.I18N.CJK AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-932.txt", 932); } + [Test] + public void Bug28321 () + { + var expected = "\u00D7\u00B4\u00B1\u00F7\u00B6\u00B0\u00A8\u00A7"; + var text = Manager.GetEncoding ("shift_jis").GetString (Convert.FromBase64String ("gX6BTIF9gYCB94GLgU6BmA==")); + + Assert.AreEqual (expected, text); + } + // EUC-JP [Test] diff --git a/mcs/class/I18N/CJK/jis.table b/mcs/class/I18N/CJK/jis.table Binary files differindex 4d28d82d457..f35d9d5eef0 100644 --- a/mcs/class/I18N/CJK/jis.table +++ b/mcs/class/I18N/CJK/jis.table diff --git a/mcs/class/I18N/tools/uni2tab.c b/mcs/class/I18N/tools/uni2tab.c index a740131974e..0de297513f9 100644 --- a/mcs/class/I18N/tools/uni2tab.c +++ b/mcs/class/I18N/tools/uni2tab.c @@ -23,12 +23,15 @@ */ /* - -Usage: uni2tab - -Required files from ftp.unicode.org: Unihan.txt, CP932.TXT - -*/ + * + * Usage: uni2tab + * + * Required files from ftp.unicode.org: Unihan.txt, CP932.TXT + * + * Unihan.txt and CP932.TXT can be found at: + * ftp://www.unicode.org/Public/5.0.0/ucd/Unihan.txt + * ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT + */ #include <stdio.h> #include <string.h> @@ -321,6 +324,13 @@ static void convertSJISLine(char *buf) /* Non-CJK characters within JIS */ processJis0208(code, (offset / 94) + 1, (offset % 94) + 1); } + else if(code >= 0x00A7 && code <= 0x00F7) + { + /* Non-CJK characters within JIS for which unicodeToJis should not be + * edited. In addition to this, do not track lowJis and highJis. */ + jisx0208ToUnicode[offset] = (unsigned short)(code & 0xFF); + jisx0208ToUnicode[offset + 1] = (((unsigned short)(code & 0x00FF)) >> 8); + } } /* |