From 1ed3c86dd72ba76b9a1048bbd4b756899f803302 Mon Sep 17 00:00:00 2001 From: Jeff Johnston Date: Fri, 27 Sep 2002 20:17:52 +0000 Subject: 2002-09-27 Jeff Johnston * libc/ctype/jp2uc.c: Change to use multiple arrays in jp2uc.h. Also convert to EUCJP before using arrays. For values not in the conversion arrays, return WEOF. * libc/ctype/jp2uc.h: Change from one array to a number of arrays to account for the fact that the originating table is not contiguous for the input values since some are invalid. --- newlib/libc/ctype/jp2uc.c | 99 ++++++++--- newlib/libc/ctype/jp2uc.h | 444 +++++++++++++++++++++------------------------- 2 files changed, 283 insertions(+), 260 deletions(-) (limited to 'newlib/libc/ctype') diff --git a/newlib/libc/ctype/jp2uc.c b/newlib/libc/ctype/jp2uc.c index 1e64b6974..044c76b86 100644 --- a/newlib/libc/ctype/jp2uc.c +++ b/newlib/libc/ctype/jp2uc.c @@ -40,47 +40,104 @@ wint_t _DEFUN (__jp2uc, (c, type), wint_t c _AND int type) { int index, adj; - unsigned char byte1, byte2, adj_byte1, adj_byte2; + unsigned char byte1, byte2; + wint_t ret; - /* we actually use a table of JIS to Unicode. For SJIS, we simply - note that SJIS is essentially JIS with the top bits on in each - byte. For EUCJP, we essentially do a translation to JIS before - accessing the table. */ + /* we actually use tables of EUCJP to Unicode. For JIS, we simply + note that EUCJP is essentially JIS with the top bits on in each + byte and translate to EUCJP. For SJIS, we do a translation to EUCJP before + accessing the tables. */ switch (type) { case JP_JIS: - index = ((c >> 8) - 0x21) * 0xfe + ((c & 0xff) - 0x21); - break; - case JP_SJIS: - index = ((c >> 8) - 0xa1) * 0xfe + ((c & 0xff) - 0xa1); + byte1 = (c >> 8) + 0x80; + byte2 = (c & 0xff) + 0x80; break; case JP_EUCJP: + byte1 = (c >> 8); + byte2 = (c & 0xff); + break; + case JP_SJIS: byte1 = c >> 8; byte2 = c & 0xff; - if (byte2 <= 0x7e || (byte2 & 0x1)) + if (byte2 <= 0x9e) { - adj = -0x22; - adj_byte2 = (byte2 & 0xfe) - 31; + adj = 0xa1 - 0x22; + byte2 = (byte2 - 31) + 0xa1; } else { - adj = -0x21; - adj_byte2 = byte2 - (0x7e + 0x21); + adj = 0xa1 - 0x21; + byte2 = (byte2 - 126) + 0xa1; } if (byte1 <= 0x9f) - adj_byte1 = ((byte1 - 112) >> 1) + adj; + byte1 = ((byte1 - 112) << 1) + adj; else - adj_byte1 = ((byte1 - 112) >> 1) + adj; - index = adj_byte1 * 0xfe + adj_byte2; + byte1 = ((byte1 - 176) << 1) + adj; + break; + default: + return WEOF; + } + + /* find conversion in jp2uc arrays */ + + /* handle larger ranges first */ + if (byte1 >= 0xb0 && byte1 <= 0xcf && c <= 0xcfd3) + { + index = (byte1 - 0xb0) * 0xfe + (byte2 - 0xa1); + return b02cf[index]; + } + else if (byte1 >= 0xd0 && byte1 <= 0xf4 && c <= 0xf4a6) + { + index = (byte1 - 0xd0) * 0xfe + (byte2 - 0xa1); + return d02f4[index]; + } + + /* handle smaller ranges here */ + switch (byte1) + { + case 0xA1: + return (wint_t)a1[byte2 - 0xa1]; + case 0xA2: + ret = a2[byte2 - 0xa1]; + if (ret != 0) + return (wint_t)ret; + break; + case 0xA3: + if (a3[byte2 - 0xa1]) + return (wint_t)(0xff00 + (byte2 - 0xa0)); + break; + case 0xA4: + if (byte2 <= 0xf3) + return (wint_t)(0x3000 + (byte2 - 0x60)); + break; + case 0xA5: + if (byte2 <= 0xf6) + return (wint_t)(0x3000 + byte2); + break; + case 0xA6: + ret = 0; + if (byte2 <= 0xd8) + ret = (wint_t)a6[byte2 - 0xa1]; + if (ret != 0) + return ret; + break; + case 0xA7: + ret = 0; + if (byte2 <= 0xf1) + ret = (wint_t)a7[byte2 - 0xa1]; + if (ret != 0) + return ret; + break; + case 0xA8: + if (byte2 <= 0xc0) + return (wint_t)a8[byte2 - 0xa1]; break; default: return WEOF; } - if (index < 0 || index > (sizeof(jp2uc) / sizeof(unsigned short))) - return WEOF; - - return (wint_t)jp2uc[index]; + return WEOF; } #endif /* MB_CAPABLE */ diff --git a/newlib/libc/ctype/jp2uc.h b/newlib/libc/ctype/jp2uc.h index a38009d02..04eb67d29 100644 --- a/newlib/libc/ctype/jp2uc.h +++ b/newlib/libc/ctype/jp2uc.h @@ -1,6 +1,7 @@ /* based on eucjp-208A.txt */ -static unsigned short jp2uc[] = { +/* a1 is contiguous from a1a1 to a1fe */ +static unsigned short a1[] = { 0x3000, 0x3001, 0x3002, @@ -94,7 +95,11 @@ static unsigned short jp2uc[] = { 0x25CB, 0x25CF, 0x25CE, - 0x25C7, + 0x25C7 +}; + +/* a2 has a number of holes between a2a1 and a2fe which we fill with 0x0000 */ +static unsigned short a2[] = { 0x25C6, 0x25A1, 0x25A0, @@ -109,6 +114,17 @@ static unsigned short jp2uc[] = { 0x2191, 0x2193, 0x3013, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, 0x2208, 0x220B, 0x2286, @@ -117,6 +133,14 @@ static unsigned short jp2uc[] = { 0x2283, 0x222A, 0x2229, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, 0x2227, 0x2228, 0x00AC, @@ -124,6 +148,17 @@ static unsigned short jp2uc[] = { 0x21D4, 0x2200, 0x2203, + 0x2229, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, 0x2220, 0x22A5, 0x2312, @@ -139,6 +174,13 @@ static unsigned short jp2uc[] = { 0x2235, 0x222B, 0x222C, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, 0x212B, 0x2030, 0x266F, @@ -147,238 +189,118 @@ static unsigned short jp2uc[] = { 0x2020, 0x2021, 0x00B6, - 0x25EF, - 0xFF10, - 0xFF11, - 0xFF12, - 0xFF13, - 0xFF14, - 0xFF15, - 0xFF16, - 0xFF17, - 0xFF18, - 0xFF19, - 0xFF21, - 0xFF22, - 0xFF23, - 0xFF24, - 0xFF25, - 0xFF26, - 0xFF27, - 0xFF28, - 0xFF29, - 0xFF2A, - 0xFF2B, - 0xFF2C, - 0xFF2D, - 0xFF2E, - 0xFF2F, - 0xFF30, - 0xFF31, - 0xFF32, - 0xFF33, - 0xFF34, - 0xFF35, - 0xFF36, - 0xFF37, - 0xFF38, - 0xFF39, - 0xFF3A, - 0xFF41, - 0xFF42, - 0xFF43, - 0xFF44, - 0xFF45, - 0xFF46, - 0xFF47, - 0xFF48, - 0xFF49, - 0xFF4A, - 0xFF4B, - 0xFF4C, - 0xFF4D, - 0xFF4E, - 0xFF4F, - 0xFF50, - 0xFF51, - 0xFF52, - 0xFF53, - 0xFF54, - 0xFF55, - 0xFF56, - 0xFF57, - 0xFF58, - 0xFF59, - 0xFF5A, - 0x3041, - 0x3042, - 0x3043, - 0x3044, - 0x3045, - 0x3046, - 0x3047, - 0x3048, - 0x3049, - 0x304A, - 0x304B, - 0x304C, - 0x304D, - 0x304E, - 0x304F, - 0x3050, - 0x3051, - 0x3052, - 0x3053, - 0x3054, - 0x3055, - 0x3056, - 0x3057, - 0x3058, - 0x3059, - 0x305A, - 0x305B, - 0x305C, - 0x305D, - 0x305E, - 0x305F, - 0x3060, - 0x3061, - 0x3062, - 0x3063, - 0x3064, - 0x3065, - 0x3066, - 0x3067, - 0x3068, - 0x3069, - 0x306A, - 0x306B, - 0x306C, - 0x306D, - 0x306E, - 0x306F, - 0x3070, - 0x3071, - 0x3072, - 0x3073, - 0x3074, - 0x3075, - 0x3076, - 0x3077, - 0x3078, - 0x3079, - 0x307A, - 0x307B, - 0x307C, - 0x307D, - 0x307E, - 0x307F, - 0x3080, - 0x3081, - 0x3082, - 0x3083, - 0x3084, - 0x3085, - 0x3086, - 0x3087, - 0x3088, - 0x3089, - 0x308A, - 0x308B, - 0x308C, - 0x308D, - 0x308E, - 0x308F, - 0x3090, - 0x3091, - 0x3092, - 0x3093, - 0x30A1, - 0x30A2, - 0x30A3, - 0x30A4, - 0x30A5, - 0x30A6, - 0x30A7, - 0x30A8, - 0x30A9, - 0x30AA, - 0x30AB, - 0x30AC, - 0x30AD, - 0x30AE, - 0x30AF, - 0x30B0, - 0x30B1, - 0x30B2, - 0x30B3, - 0x30B4, - 0x30B5, - 0x30B6, - 0x30B7, - 0x30B8, - 0x30B9, - 0x30BA, - 0x30BB, - 0x30BC, - 0x30BD, - 0x30BE, - 0x30BF, - 0x30C0, - 0x30C1, - 0x30C2, - 0x30C3, - 0x30C4, - 0x30C5, - 0x30C6, - 0x30C7, - 0x30C8, - 0x30C9, - 0x30CA, - 0x30CB, - 0x30CC, - 0x30CD, - 0x30CE, - 0x30CF, - 0x30D0, - 0x30D1, - 0x30D2, - 0x30D3, - 0x30D4, - 0x30D5, - 0x30D6, - 0x30D7, - 0x30D8, - 0x30D9, - 0x30DA, - 0x30DB, - 0x30DC, - 0x30DD, - 0x30DE, - 0x30DF, - 0x30E0, - 0x30E1, - 0x30E2, - 0x30E3, - 0x30E4, - 0x30E5, - 0x30E6, - 0x30E7, - 0x30E8, - 0x30E9, - 0x30EA, - 0x30EB, - 0x30EC, - 0x30ED, - 0x30EE, - 0x30EF, - 0x30F0, - 0x30F1, - 0x30F2, - 0x30F3, - 0x30F4, - 0x30F5, - 0x30F6, + 0x222C, + 0x0000, + 0x0000, + 0x0000, + 0x25EF +}; + + +/* a3a1 to a3fe is mostly contiguous. Conversion output values are + of the form 0xFFxx where xx is (yy - 0xA0) where the input is 0xA3yy */ +static unsigned char a3[] = { + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1 +}; + +/* a4 is contiguous from a4a1 to a4f3 */ +/* transform = 0x30xx where xx = last byte - 0x60 */ + +/* a5 is contiguous from a5a1 to a5f6 */ +/* transform = 0x30xx where xx = last byte */ + +/* a6 is mostly contiguous from a6a1 to a6d8 */ +static unsigned short a6[] = { 0x0391, 0x0392, 0x0393, @@ -403,6 +325,14 @@ static unsigned short jp2uc[] = { 0x03A7, 0x03A8, 0x03A9, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, 0x03B1, 0x03B2, 0x03B3, @@ -426,7 +356,11 @@ static unsigned short jp2uc[] = { 0x03C6, 0x03C7, 0x03C8, - 0x03C9, + 0x03C9 +}; + +/* a7 is mostly contiguous from a7a1 to a7f1 */ +static unsigned short a7[] = { 0x0410, 0x0411, 0x0412, @@ -460,6 +394,21 @@ static unsigned short jp2uc[] = { 0x042D, 0x042E, 0x042F, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, + 0x0000, 0x0430, 0x0431, 0x0432, @@ -492,7 +441,11 @@ static unsigned short jp2uc[] = { 0x044C, 0x044D, 0x044E, - 0x044F, + 0x044F +}; + +/* a8 is contiguous from a8a1 to a8c0 */ +static unsigned short a8[] = { 0x2500, 0x2502, 0x250C, @@ -524,7 +477,13 @@ static unsigned short jp2uc[] = { 0x2530, 0x2525, 0x2538, - 0x2542, + 0x2542 +}; + +/* no conversion a9 to af */ + +/* b0a1 to cfd3 is contiguous except for illegal sequences with 0xfe */ +static unsigned short b02cf[] = { 0x4E9C, 0x5516, 0x5A03, @@ -3489,7 +3448,12 @@ static unsigned short jp2uc[] = { 0x6900, 0x6E7E, 0x7897, - 0x8155, + 0x8155 +}; + +/* d0a1 to f4a6 is contiguous */ + +static unsigned short d02f4[] = { 0x5F0C, 0x4E10, 0x4E15, @@ -6879,5 +6843,7 @@ static unsigned short jp2uc[] = { 0x9059, 0x7464, 0x51DC, - 0x7199 + 0x7199 }; + +/* f5 to fe is non-existent */ -- cgit v1.2.3