Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mono/mono.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorresurrexit <resurrexit@users.noreply.github.com>2017-12-17 13:45:21 +0300
committerMarek Safar <marek.safar@gmail.com>2017-12-17 13:45:21 +0300
commit4a8adbd8dc4233d5bcdc0e19fe451aa94dd28745 (patch)
tree36fdcafe9a1086c3d25f84f303b7e7290fad45e1 /mcs/class/I18N
parent6d018681eefe6277d7df066aab275d92be472269 (diff)
[I18N] Include missing subset of shift_jis characters (#6232)
Diffstat (limited to 'mcs/class/I18N')
-rw-r--r--mcs/class/I18N/CJK/CP51932.cs7
-rw-r--r--mcs/class/I18N/CJK/Test/I18N.CJK.Test.cs9
-rw-r--r--mcs/class/I18N/CJK/jis.tablebin101572 -> 101572 bytes
-rw-r--r--mcs/class/I18N/tools/uni2tab.c22
4 files changed, 28 insertions, 10 deletions
diff --git a/mcs/class/I18N/CJK/CP51932.cs b/mcs/class/I18N/CJK/CP51932.cs
index c5b11e4aaef..188b58e164a 100644
--- a/mcs/class/I18N/CJK/CP51932.cs
+++ b/mcs/class/I18N/CJK/CP51932.cs
@@ -33,10 +33,9 @@
/*
- Well, there looks no jis.table source. Thus, it seems like it is
- generated from text files from Unicode Home Page such like
- ftp://ftp.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT
- However, it is non-normative and in Japan it is contains many problem.
+ jis.table is generated from uni2tab.c, which requires CP932.TXT and
+ Unihan.txt as input files. However, it is non-normative and in Japan it is
+ contains many problem.
FIXME: Some characters such as 0xFF0B (wide "plus") are missing in
that table.
diff --git a/mcs/class/I18N/CJK/Test/I18N.CJK.Test.cs b/mcs/class/I18N/CJK/Test/I18N.CJK.Test.cs
index 12a9d47bb62..2c045c450a8 100644
--- a/mcs/class/I18N/CJK/Test/I18N.CJK.Test.cs
+++ b/mcs/class/I18N/CJK/Test/I18N.CJK.Test.cs
@@ -161,6 +161,15 @@ namespace MonoTests.I18N.CJK
AssertDecode ("Test/texts/japanese-utf8.txt", "Test/texts/japanese-932.txt", 932);
}
+ [Test]
+ public void Bug28321 ()
+ {
+ var expected = "\u00D7\u00B4\u00B1\u00F7\u00B6\u00B0\u00A8\u00A7";
+ var text = Manager.GetEncoding ("shift_jis").GetString (Convert.FromBase64String ("gX6BTIF9gYCB94GLgU6BmA=="));
+
+ Assert.AreEqual (expected, text);
+ }
+
// EUC-JP
[Test]
diff --git a/mcs/class/I18N/CJK/jis.table b/mcs/class/I18N/CJK/jis.table
index 4d28d82d457..f35d9d5eef0 100644
--- a/mcs/class/I18N/CJK/jis.table
+++ b/mcs/class/I18N/CJK/jis.table
Binary files differ
diff --git a/mcs/class/I18N/tools/uni2tab.c b/mcs/class/I18N/tools/uni2tab.c
index a740131974e..0de297513f9 100644
--- a/mcs/class/I18N/tools/uni2tab.c
+++ b/mcs/class/I18N/tools/uni2tab.c
@@ -23,12 +23,15 @@
*/
/*
-
-Usage: uni2tab
-
-Required files from ftp.unicode.org: Unihan.txt, CP932.TXT
-
-*/
+ *
+ * Usage: uni2tab
+ *
+ * Required files from ftp.unicode.org: Unihan.txt, CP932.TXT
+ *
+ * Unihan.txt and CP932.TXT can be found at:
+ * ftp://www.unicode.org/Public/5.0.0/ucd/Unihan.txt
+ * ftp://ftp.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT
+ */
#include <stdio.h>
#include <string.h>
@@ -321,6 +324,13 @@ static void convertSJISLine(char *buf)
/* Non-CJK characters within JIS */
processJis0208(code, (offset / 94) + 1, (offset % 94) + 1);
}
+ else if(code >= 0x00A7 && code <= 0x00F7)
+ {
+ /* Non-CJK characters within JIS for which unicodeToJis should not be
+ * edited. In addition to this, do not track lowJis and highJis. */
+ jisx0208ToUnicode[offset] = (unsigned short)(code & 0xFF);
+ jisx0208ToUnicode[offset + 1] = (((unsigned short)(code & 0x00FF)) >> 8);
+ }
}
/*