Welcome to mirror list, hosted at ThFree Co, Russian Federation.

cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'newlib/libc/iconv/ces/utf-16.c')
-rw-r--r--newlib/libc/iconv/ces/utf-16.c364
1 files changed, 98 insertions, 266 deletions
diff --git a/newlib/libc/iconv/ces/utf-16.c b/newlib/libc/iconv/ces/utf-16.c
index 4b2ff77a1..fae4ed5f0 100644
--- a/newlib/libc/iconv/ces/utf-16.c
+++ b/newlib/libc/iconv/ces/utf-16.c
@@ -1,6 +1,6 @@
-/*
- * Copyright (c) 2003-2004, Artem B. Bityuckiy
- * Copyright (c) 1999,2000, Konstantin Chuguev. All rights reserved.
+/*-
+ * Copyright (c) 1999,2000
+ * Konstantin Chuguev. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -22,285 +22,117 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
+ *
+ * iconv (Charset Conversion Library) v2.0
*/
-#include "cesbi.h"
-
-#if defined (ICONV_TO_UCS_CES_UTF_16) \
- || defined (ICONV_FROM_UCS_CES_UTF_16)
+#include "../lib/deps.h"
-#include <_ansi.h>
-#include <reent.h>
-#include <sys/types.h>
-#include <stdlib.h>
+#ifdef _ICONV_CONVERTER_UTF_16
#include <string.h>
-#include <wchar.h>
#include "../lib/local.h"
-#include "../lib/ucsconv.h"
-#include "../lib/endian.h"
-
-/*
- * On input UTF-16 converter interpret BOM and uses Big Endian byte order if BOM
- * is absent. UTF-16 converter outputs in System Endian and adds correspondent
- * BOM as first code. UTF-16LE and UTF-16BE converters ignore BOM on input and
- * don't output BOM.
- */
-
-#define UTF16_UNDEFINED 0x00
-#define UTF16_BIG_ENDIAN 0x01
-#define UTF16_LITTLE_ENDIAN 0x02
-#define UTF16_SYSTEM_ENDIAN 0x04
-#define UTF16_BOM_WRITTEN 0x08
-
-#define UTF16_BOM 0xFEFF
-
-#define UTF_16 "utf_16"
-#define UTF_16BE "utf_16be"
-#define UTF_16LE "utf_16le"
-
-static size_t
-_DEFUN(utf_16_close, (rptr, data),
- struct _reent *rptr _AND
- _VOID_PTR data)
-{
- _free_r(rptr, data);
- return 0;
-}
-#if defined (ICONV_FROM_UCS_CES_UTF_16)
-static _VOID_PTR
-_DEFUN(utf_16_init_from_ucs, (rptr, encoding),
- struct _reent *rptr _AND
- _CONST char *encoding)
+static ssize_t
+_DEFUN(convert_from_ucs, (ces, in, outbuf, outbytesleft),
+ struct iconv_ces *ces _AND
+ ucs_t in _AND
+ unsigned char **outbuf _AND
+ size_t *outbytesleft)
{
- int *data;
-
- if ((data = (int *)_malloc_r (rptr, sizeof (int))) == NULL)
- return (_VOID_PTR)NULL;
-
- if (strcmp (encoding, UTF_16LE) == 0)
- *data = UTF16_LITTLE_ENDIAN;
- else if (strcmp (encoding, UTF_16BE) == 0)
- *data = UTF16_BIG_ENDIAN;
- else
- *data = UTF16_SYSTEM_ENDIAN;
-
- return (_VOID_PTR)data;
-}
-
-static size_t
-_DEFUN(utf_16_convert_from_ucs, (data, in, outbuf, outbytesleft),
- _VOID_PTR data _AND
- register ucs4_t in _AND
- unsigned char **outbuf _AND
- size_t *outbytesleft)
-{
- register ucs2_t *cp;
- register size_t bytes;
- register int *state;
-
- if (in > 0x0010FFFF || (in >= 0x0000D800 && in <= 0x0000DFFF)
- || in == 0x0000FFFF || in == 0x0000FFFE)
- return (size_t)ICONV_CES_INVALID_CHARACTER;
-
- state = (int *)data;
- bytes = (*state == UTF16_SYSTEM_ENDIAN) ? sizeof (ucs2_t) * 2
- : sizeof (ucs2_t);
-
- if (in > 0x0000FFFF)
- bytes += sizeof (ucs2_t);
-
- if (*outbytesleft < bytes)
- return (size_t)ICONV_CES_NOSPACE;
-
- cp = (ucs2_t *)*outbuf;
-
- if (*state == UTF16_SYSTEM_ENDIAN)
- {
- *cp++ = UTF16_BOM;
- *state |= UTF16_BOM_WRITTEN;
+ unsigned char *cp;
+ int *state;
+ int bytes;
+
+ if (in == UCS_CHAR_NONE)
+ return 1; /* No state reinitialization for table charsets */
+ if (in > 0x10FFFF)
+ return -1;
+ bytes = *(state = (int *)(ces->data)) ? 2 : 4;
+ if (in > 0xFFFF)
+ bytes += 2;
+ if (*outbytesleft < bytes)
+ return 0; /* No space in the output buffer */
+ cp = *outbuf;
+ if (*state == 0) {
+ *cp++ = 0xFE;
+ *cp++ = 0xFF;
+ *state = 1;
}
-
- if (in < 0x00010000)
- {
- switch (*state)
- {
- case UTF16_LITTLE_ENDIAN:
- *cp = ICONV_HTOLES ((ucs2_t)in);
- break;
- case UTF16_BIG_ENDIAN:
- *cp = ICONV_HTOBES ((ucs2_t)in);
- break;
- case (UTF16_SYSTEM_ENDIAN | UTF16_BOM_WRITTEN):
- *cp = (ucs2_t)in;
- break;
- }
- }
- else
- {
- ucs2_t w1, w2;
-
- /* Process surrogate pair */
- in -= 0x00010000;
- w1 = ((ucs2_t)((in >> 10)) & 0x03FF) | 0xD800;
- w2 = (ucs2_t)(in & 0x000003FF) | 0xDC00;
-
- switch (*state)
- {
- case UTF16_LITTLE_ENDIAN:
- *cp++ = ICONV_HTOLES (w1);
- *cp = ICONV_HTOLES (w2);
- break;
- case UTF16_BIG_ENDIAN:
- *cp++ = ICONV_HTOBES (w1);
- *cp = ICONV_HTOBES (w2);
- break;
- case (UTF16_SYSTEM_ENDIAN | UTF16_BOM_WRITTEN):
- *cp++ = w1;
- *cp = w2;
- break;
- }
- }
-
- *outbuf += bytes;
- *outbytesleft -= bytes;
-
- return bytes;
+ if (in > 0xFFFF) {
+ *cp++ = ((in -= 0x10000) >> 18) | 0xD8;
+ *cp++ = (in >> 10) & 0xFF;
+ *cp++ = ((in >> 8) & 3) | 0xDC;
+ } else
+ *cp++ = (in >> 8) & 0xFF;
+ *cp++ = in & 0xFF;
+ (*outbuf) += bytes;
+ *outbytesleft -= bytes;
+ return 1;
}
-#endif /* ICONV_FROM_UCS_CES_UTF_16 */
-#if defined (ICONV_TO_UCS_CES_UTF_16)
-static _VOID_PTR
-_DEFUN(utf_16_init_to_ucs, (rptr, encoding),
- struct _reent *rptr _AND
- _CONST char *encoding)
+static __inline ucs_t
+_DEFUN(msb, (buf), _CONST unsigned char *buf)
{
- int *data;
-
- if ((data = (int *)_malloc_r (rptr, sizeof (int))) == NULL)
- return (_VOID_PTR)NULL;
-
- if (strcmp (encoding, UTF_16BE) == 0)
- *data = UTF16_BIG_ENDIAN;
- else if (strcmp (encoding, UTF_16LE) == 0)
- *data = UTF16_LITTLE_ENDIAN;
- else
- *data = UTF16_UNDEFINED;
-
- return (_VOID_PTR)data;
+ return (buf[0] << 8) | buf[1];
}
-static ucs4_t
-_DEFUN(utf_16_convert_to_ucs, (data, inbuf, inbytesleft),
- _VOID_PTR data _AND
- _CONST unsigned char **inbuf _AND
- size_t *inbytesleft)
+static ucs_t
+_DEFUN(convert_to_ucs, (ces, inbuf, inbytesleft),
+ struct iconv_ces *ces _AND
+ _CONST unsigned char **inbuf _AND
+ size_t *inbytesleft)
{
- register ucs2_t w1;
- register ucs2_t w2;
- register ucs2_t *cp;
- int *state;
- ucs4_t res;
- int bytes = sizeof (ucs2_t);
-
- if (*inbytesleft < bytes)
- return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
-
- state = (int *)data;
- cp = ((ucs2_t *)*inbuf);
-
- if (*state == UTF16_UNDEFINED)
- {
- if (*cp == ICONV_HTOLES(UTF16_BOM))
- *state = UTF16_LITTLE_ENDIAN;
- else
- *state = UTF16_BIG_ENDIAN;
-
- if ( *cp == ICONV_HTOBES (UTF16_BOM)
- || *cp == ICONV_HTOLES (UTF16_BOM))
- {
- if (*inbytesleft < (bytes += sizeof (ucs2_t)))
- return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
- cp += 1;
- }
- }
-
- if (*state == UTF16_LITTLE_ENDIAN)
- w1 = ICONV_LETOHS (*cp);
- else
- w1 = ICONV_BETOHS (*cp);
-
- if (w1 < 0xD800 || w1 > 0xDFFF)
- {
- if (w1 == 0xFFFF || w1 == 0xFFFE)
- return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
- res = (ucs4_t)w1;
+ ucs_t res, res2;
+ int *state, mark;
+
+ if (*inbytesleft < 2)
+ return UCS_CHAR_NONE; /* Not enough bytes in the input buffer */
+ state = (int *)(ces->data);
+ res = msb(*inbuf);
+ switch (res) {
+ case UCS_CHAR_ZERO_WIDTH_NBSP:
+ if (*state == 0)
+ *state = 1;
+ mark = 1;
+ break;
+ case UCS_CHAR_INVALID:
+ if (*state == 0)
+ *state = 2;
+ mark = 1;
+ break;
+ default:
+ if (*state == 0)
+ *state = 1;
+ mark = 0;
}
- else
- {
- /* Process surrogate pair */
- if (*inbytesleft < (bytes += 2))
- return (ucs4_t)ICONV_CES_BAD_SEQUENCE;
-
- if (w1 > 0xDBFF)
- /* Broken surrogate character */
- return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
-
- cp += 1;
-
- if (*state == UTF16_LITTLE_ENDIAN)
- w2 = ICONV_LETOHS (*cp);
- else
- w2 = ICONV_BETOHS (*cp);
-
- if (w2 < 0xDC00 || w2 > 0xDFFF)
- /* Broken surrogate character */
- return (ucs4_t)ICONV_CES_INVALID_CHARACTER;
-
- res = (ucs4_t)(w2 & 0x03FF) | ((ucs4_t)(w1 & 0x03FF) << 10);
- res += 0x00010000;
+ if (mark) {
+ if (*inbytesleft < 4)
+ return UCS_CHAR_NONE; /* Not enough bytes in the input buffer */
+ *inbytesleft -= 2;
+ res = msb((*inbuf) += 2);
}
-
- *inbuf += bytes;
- *inbytesleft -= bytes;
-
- return res;
+ if (*state == 2) { /* LSB order */
+ res = (*(*inbuf) ++);
+ res |= (*(*inbuf) ++) << 8;
+ } else
+ *inbuf += 2;
+ *inbytesleft -= 2;
+ if ((res & 0xFC00) != 0xD800) /* Non-surrogate character */
+ return res;
+ if (*inbytesleft < 2)
+ return UCS_CHAR_NONE; /* Not enough bytes in the input buffer */
+ if (*state == 2) {
+ res2 = (*inbuf)[0];
+ res2 |= (*inbuf)[1] << 8;
+ } else
+ res2 = msb(*inbuf);
+ if ((res2 & 0xFC00) != 0xDC00) /* Broken surrogate pair */
+ return -1;
+ (*inbuf) += 2;
+ (*inbytesleft) -= 2;
+ return (((res & 0x3FF) << 10) | (res2 & 0x3FF)) + 0x10000;
}
-#endif /* ICONV_TO_UCS_CES_UTF_16 */
-static int
-_DEFUN(utf_16_get_mb_cur_max, (data),
- _VOID_PTR data)
-{
- return 6;
-}
-
-#if defined (ICONV_TO_UCS_CES_UTF_16)
-_CONST iconv_to_ucs_ces_handlers_t
-_iconv_to_ucs_ces_handlers_utf_16 =
-{
- utf_16_init_to_ucs,
- utf_16_close,
- utf_16_get_mb_cur_max,
- NULL,
- NULL,
- NULL,
- utf_16_convert_to_ucs
-};
-#endif
-
-#if defined (ICONV_FROM_UCS_CES_UTF_16)
-_CONST iconv_from_ucs_ces_handlers_t
-_iconv_from_ucs_ces_handlers_utf_16 =
-{
- utf_16_init_from_ucs,
- utf_16_close,
- utf_16_get_mb_cur_max,
- NULL,
- NULL,
- NULL,
- utf_16_convert_from_ucs
-};
-#endif
+ICONV_CES_STATEFUL_MODULE_DECL(utf_16);
-#endif /* ICONV_TO_UCS_CES_UTF_16 || ICONV_FROM_UCS_CES_UTF_16 */
+#endif /* #ifdef _ICONV_CONVERTER_UTF_16 */