diff options
Diffstat (limited to 'newlib/libc/iconv/ces/utf-16.c')
-rw-r--r-- | newlib/libc/iconv/ces/utf-16.c | 144 |
1 files changed, 144 insertions, 0 deletions
diff --git a/newlib/libc/iconv/ces/utf-16.c b/newlib/libc/iconv/ces/utf-16.c new file mode 100644 index 000000000..a12c979bc --- /dev/null +++ b/newlib/libc/iconv/ces/utf-16.c @@ -0,0 +1,144 @@ +/*- + * Copyright (c) 1999,2000 + * Konstantin Chuguev. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * iconv (Charset Conversion Library) v2.0 + */ +#ifdef ENABLE_ICONV + +#include "../lib/deps.h" + +#ifdef ICONV_CONVERTER_UTF_16 + +#include <_ansi.h> +#include <string.h> +#include "../lib/local.h" + +static ssize_t +_DEFUN(convert_from_ucs, (ces, in, outbuf, outbytesleft), + struct iconv_ces *ces _AND + ucs_t in _AND + unsigned char **outbuf _AND + size_t *outbytesleft) +{ + unsigned char *cp; + int *state; + int bytes; + + if (in == UCS_CHAR_NONE) + return 1; /* No state reinitialization for table charsets */ + if (in > 0x10FFFF) + return -1; + bytes = *(state = (int *)(ces->data)) ? 2 : 4; + if (in > 0xFFFF) + bytes += 2; + if (*outbytesleft < bytes) + return 0; /* No space in the output buffer */ + cp = *outbuf; + if (*state == 0) { + *cp++ = 0xFE; + *cp++ = 0xFF; + *state = 1; + } + if (in > 0xFFFF) { + *cp++ = ((in -= 0x10000) >> 18) | 0xD8; + *cp++ = (in >> 10) & 0xFF; + *cp++ = ((in >> 8) & 3) | 0xDC; + } else + *cp++ = (in >> 8) & 0xFF; + *cp++ = in & 0xFF; + (*outbuf) += bytes; + *outbytesleft -= bytes; + return 1; +} + +static __inline ucs_t +_DEFUN(msb, (buf), _CONST unsigned char *buf) +{ + return (buf[0] << 8) | buf[1]; +} + +static ucs_t +_DEFUN(convert_to_ucs, (ces, inbuf, inbytesleft), + struct iconv_ces *ces _AND + _CONST unsigned char **inbuf _AND + size_t *inbytesleft) +{ + ucs_t res, res2; + int *state, mark; + + if (*inbytesleft < 2) + return UCS_CHAR_NONE; /* Not enough bytes in the input buffer */ + state = (int *)(ces->data); + res = msb(*inbuf); + switch (res) { + case UCS_CHAR_ZERO_WIDTH_NBSP: + if (*state == 0) + *state = 1; + mark = 1; + break; + case UCS_CHAR_INVALID: + if (*state == 0) + *state = 2; + mark = 1; + break; + default: + if (*state == 0) + *state = 1; + mark = 0; + } + if (mark) { + if (*inbytesleft < 4) + return UCS_CHAR_NONE; /* Not enough bytes in the input buffer */ + *inbytesleft -= 2; + res = msb((*inbuf) += 2); + } + if (*state == 2) { /* LSB order */ + res = (*(*inbuf) ++); + res |= (*(*inbuf) ++) << 8; + } else + *inbuf += 2; + *inbytesleft -= 2; + if ((res & 0xFC00) != 0xD800) /* Non-surrogate character */ + return res; + if (*inbytesleft < 2) + return UCS_CHAR_NONE; /* Not enough bytes in the input buffer */ + if (*state == 2) { + res2 = (*inbuf)[0]; + res2 |= (*inbuf)[1] << 8; + } else + res2 = msb(*inbuf); + if ((res2 & 0xFC00) != 0xDC00) /* Broken surrogate pair */ + return -1; + (*inbuf) += 2; + (*inbytesleft) -= 2; + return (((res & 0x3FF) << 10) | (res2 & 0x3FF)) + 0x10000; +} + +ICONV_CES_STATEFUL_MODULE_DECL(utf_16); + +#endif /* #ifdef ICONV_CONVERTER_UTF_16 */ + +#endif /* #ifdef ENABLE_ICONV */ + |