diff options
Diffstat (limited to 'newlib/libc/iconv/ces/utf-8.c')
-rw-r--r-- | newlib/libc/iconv/ces/utf-8.c | 193 |
1 files changed, 193 insertions, 0 deletions
diff --git a/newlib/libc/iconv/ces/utf-8.c b/newlib/libc/iconv/ces/utf-8.c new file mode 100644 index 000000000..c11ce6166 --- /dev/null +++ b/newlib/libc/iconv/ces/utf-8.c @@ -0,0 +1,193 @@ +/*- + * Copyright (c) 1999,2000 + * Konstantin Chuguev. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * iconv (Charset Conversion Library) v2.0 + */ +#ifdef ENABLE_ICONV + +#include "../lib/deps.h" + +#ifdef ICONV_CONVERTER_UTF_8 + +#include <_ansi.h> +#include "../lib/local.h" + +#define cont_byte(b) (((b) & 0x3F) | 0x80) + +static ssize_t +_DEFUN(convert_from_ucs, (ces, in, outbuf, outbytesleft), + struct iconv_ces *ces _AND + ucs_t in _AND + unsigned char **outbuf _AND + size_t *outbytesleft) +{ + unsigned char *cp; + int n; + if (in == UCS_CHAR_NONE) + return 1; /* No state reinitialization for table charsets */ + if (in < 0x80) { + n = 1; + } else if (in < 0x800) { + n = 2; + } else if (in < 0x10000) { + n = 3; + } else if (in < 0x200000) { + n = 4; + } else if (in < 0x4000000) { + n = 5; + } else if (in <= 0x7FFFFFFF) { + n = 6; + } else + return -1; + if (*outbytesleft < n) + return 0; + cp = *outbuf; + switch (n) { + case 1: + *cp = (unsigned char)in; + break; + case 2: + *cp++ = (unsigned char)((in >> 6) | 0xC0); + *cp++ = (unsigned char)cont_byte(in); + break; + case 3: + *cp++ = (unsigned char)((in >> 12) | 0xE0); + *cp++ = (unsigned char)cont_byte(in >> 6); + *cp++ = (unsigned char)cont_byte(in); + break; + case 4: + *cp++ = (unsigned char)((in >> 18) | 0xF0); + *cp++ = (unsigned char)cont_byte(in >> 12); + *cp++ = (unsigned char)cont_byte(in >> 6); + *cp++ = (unsigned char)cont_byte(in); + break; + case 5: + *cp++ = (unsigned char)((in >> 24) | 0xF8); + *cp++ = (unsigned char)cont_byte(in >> 18); + *cp++ = (unsigned char)cont_byte(in >> 12); + *cp++ = (unsigned char)cont_byte(in >> 6); + *cp++ = (unsigned char)cont_byte(in); + break; + case 6: + *cp++ = (unsigned char)((in >> 30) | 0xFC); + *cp++ = (unsigned char)cont_byte(in >> 24); + *cp++ = (unsigned char)cont_byte(in >> 18); + *cp++ = (unsigned char)cont_byte(in >> 12); + *cp++ = (unsigned char)cont_byte(in >> 6); + *cp++ = (unsigned char)cont_byte(in); + break; + } + (*outbytesleft) -= n; + (*outbuf) += n; + return 1; +} + +static ucs_t +_DEFUN(convert_to_ucs, (ces, inbuf, inbytesleft), + struct iconv_ces *ces _AND + _CONST unsigned char **inbuf _AND + size_t *inbytesleft) +{ + _CONST unsigned char *in = *inbuf; + unsigned char byte = *in++; + ucs_t res = byte; + + if (byte >= 0xC0) { + if (byte < 0xE0) { + if (*inbytesleft < 2) + return UCS_CHAR_NONE; + if (((byte & ~0x1F) == 0xC0) + && ((in[0] & 0xC0) == 0x80)) { + res = ((byte & 0x1F) << 6) | (*in++ & 0x3F); + } else + res = UCS_CHAR_INVALID; + } else if (byte < 0xF0) { + if (*inbytesleft < 3) + return UCS_CHAR_NONE; + if (((byte & ~0x0F) == 0xE0) + && ((in[0] & 0xC0) == 0x80) + && ((in[1] & 0xC0) == 0x80)) { + res = ((byte & 0x0F) << 12) | ((in[0] & 0x3F) << 6) + | (in[1] & 0x3F); + in += 2; + } else + res = UCS_CHAR_INVALID; + } else if (byte < 0xF8) { + if (*inbytesleft < 4) + return UCS_CHAR_NONE; + if (((byte & ~0x7) == 0xF0) + && ((in[0] & 0xC0) == 0x80) + && ((in[1] & 0xC0) == 0x80) + && ((in[2] & 0xC0) == 0x80)) { + res = ((byte & 0x7) << 18) | ((in[0] & 0x3F) << 12) + | ((in[1] & 0x3F) << 6) | (in[2] & 0x3F); + in += 3; + } else + res = UCS_CHAR_INVALID; + } else if (byte < 0xFC) { + if (*inbytesleft < 5) + return UCS_CHAR_NONE; + if (((byte & ~0x3) == 0xF8) + && ((in[0] & 0xC0) == 0x80) + && ((in[1] & 0xC0) == 0x80) + && ((in[2] & 0xC0) == 0x80) + && ((in[3] & 0xC0) == 0x80)) { + res = ((byte & 0x3) << 24) | ((in[0] & 0x3F) << 18) + | ((in[1] & 0x3F) << 12) | ((in[2] & 0x3F) << 8) + | (in[3] & 0x3F); + in += 4; + } else + res = UCS_CHAR_INVALID; + } else if (byte <= 0xFD) { + if (*inbytesleft < 6) + return UCS_CHAR_NONE; + if (((byte & ~0x1) == 0xFC) + && ((in[0] & 0xC0) == 0x80) + && ((in[1] & 0xC0) == 0x80) + && ((in[2] & 0xC0) == 0x80) + && ((in[3] & 0xC0) == 0x80) + && ((in[4] & 0xC0) == 0x80)) { + res = ((byte & 0x1) << 30) | ((in[0] & 0x3F) << 24) + | ((in[1] & 0x3F) << 18) | ((in[2] & 0x3F) << 12) + | ((in[3] & 0x3F) << 8) | (in[4] & 0x3F); + in += 5; + } else + res = UCS_CHAR_INVALID; + } else + res = UCS_CHAR_INVALID; + } else if (byte & 0x80) + res = UCS_CHAR_INVALID; + + (*inbytesleft) -= (in - *inbuf); + *inbuf = in; + return res; +} + +ICONV_CES_STATELESS_MODULE_DECL(utf_8); + +#endif /* #ifdef ICONV_CONVERTER_UTF_8 */ + +#endif /* #ifdef ENABLE_ICONV */ + |