Welcome to mirror list, hosted at ThFree Co, Russian Federation.

cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCorinna Vinschen <corinna@vinschen.de>2010-02-12 20:46:39 +0300
committerCorinna Vinschen <corinna@vinschen.de>2010-02-12 20:46:39 +0300
commit7bd2296c8384f80585ace60b9d67cff4cab2cc6f (patch)
tree3d0527ee5a1c2c135d57d7c6151087d7c5bed5e6 /winsup/cygwin
parente122c47112e7830e156fffb469ed389ecde0151a (diff)
* regex/regcomp.c (xwcrtomb): New function to convert wide chars
outside of the base plane to UTF-8. Call throughout instead of wcrtomb. (wgetnext): Handle surrogate pairs on UTF-16 systems. * regex/regexec.c (xmbrtowc): Ditto.
Diffstat (limited to 'winsup/cygwin')
-rw-r--r--winsup/cygwin/ChangeLog8
-rw-r--r--winsup/cygwin/regex/regcomp.c41
-rw-r--r--winsup/cygwin/regex/regexec.c18
3 files changed, 62 insertions, 5 deletions
diff --git a/winsup/cygwin/ChangeLog b/winsup/cygwin/ChangeLog
index 955e256b3..3a841c876 100644
--- a/winsup/cygwin/ChangeLog
+++ b/winsup/cygwin/ChangeLog
@@ -1,5 +1,13 @@
2010-02-12 Corinna Vinschen <corinna@vinschen.de>
+ * regex/regcomp.c (xwcrtomb): New function to convert wide chars
+ outside of the base plane to UTF-8. Call throughout instead of
+ wcrtomb.
+ (wgetnext): Handle surrogate pairs on UTF-16 systems.
+ * regex/regexec.c (xmbrtowc): Ditto.
+
+2010-02-12 Corinna Vinschen <corinna@vinschen.de>
+
* sec_auth.cc (get_user_local_groups): Retrieve name of well known
builtin group from system. Explain why.
* sec_helper.cc (well_known_builtin_sid): New SID for BUILTIN group.
diff --git a/winsup/cygwin/regex/regcomp.c b/winsup/cygwin/regex/regcomp.c
index a7a48e023..c5e68a2b5 100644
--- a/winsup/cygwin/regex/regcomp.c
+++ b/winsup/cygwin/regex/regcomp.c
@@ -140,6 +140,7 @@ static void computejumps(struct parse *p, struct re_guts *g);
static void computematchjumps(struct parse *p, struct re_guts *g);
static sopno pluscount(struct parse *p, struct re_guts *g);
static wint_t wgetnext(struct parse *p);
+static size_t xwcrtomb (char *s, wint_t wc, mbstate_t *ps);
#ifdef __cplusplus
}
@@ -994,7 +995,7 @@ bothcases(struct parse *p, wint_t ch)
assert(othercase(ch) != ch); /* p_bracket() would recurse */
p->next = bracket;
memset(&mbs, 0, sizeof(mbs));
- n = wcrtomb(bracket, ch, &mbs);
+ n = xwcrtomb(bracket, ch, &mbs);
assert(n != (size_t)-1);
bracket[n] = ']';
bracket[n + 1] = '\0';
@@ -1136,6 +1137,7 @@ wgetnext(struct parse *p)
{
mbstate_t mbs;
wchar_t wc;
+ wint_t ret;
size_t n;
memset(&mbs, 0, sizeof(mbs));
@@ -1144,12 +1146,43 @@ wgetnext(struct parse *p)
SETERROR(REG_ILLSEQ);
return (0);
}
+ ret = wc;
if (n == 0)
n = 1;
+ else if (sizeof (wchar_t) == 2 && wc >= 0xd800 && wc <= 0xdbff) {
+ /* UTF-16 surrogate pair. Fetch second half and
+ compute UTF-32 value */
+ int n2 = mbrtowc(&wc, p->next + n, p->end - p->next - n, &mbs);
+ if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) {
+ SETERROR(REG_ILLSEQ);
+ return (0);
+ }
+ ret = (((ret & 0x3ff) << 10) | (wc & 0x3ff))
+ + 0x10000;
+ n += n2;
+ }
p->next += n;
- return (wc);
+ return (ret);
}
+static size_t
+xwcrtomb (char *s, wint_t wc, mbstate_t *ps)
+{
+ if (sizeof (wchar_t) == 2 && wc >= 0x10000)
+ {
+ /* UTF-16 systems can't handle these values directly. Since the
+ rest of the code isn't surrogate pair aware, we handle this here,
+ invisible for the rest of the code. */
+ *s++ = 0xf0 | ((wc & 0x1c0000) >> 18);
+ *s++ = 0x80 | ((wc & 0x3f000) >> 12);
+ *s++ = 0x80 | ((wc & 0xfc0) >> 6);
+ *s = 0x80 | (wc & 0x3f);
+ return 4;
+ }
+ return wcrtomb (s, wc, ps);
+}
+
+
/*
- seterr - set an error condition
== static int seterr(struct parse *p, int e);
@@ -1490,7 +1523,7 @@ findmust(struct parse *p, struct re_guts *g)
memset(&mbs, 0, sizeof(mbs));
newstart = scan - 1;
}
- clen = wcrtomb(buf, OPND(s), &mbs);
+ clen = xwcrtomb(buf, OPND(s), &mbs);
if (clen == (size_t)-1)
goto toohard;
newlen += clen;
@@ -1609,7 +1642,7 @@ findmust(struct parse *p, struct re_guts *g)
while (cp < g->must + g->mlen) {
while (OP(s = *scan++) != OCHAR)
continue;
- clen = wcrtomb(cp, OPND(s), &mbs);
+ clen = xwcrtomb(cp, OPND(s), &mbs);
assert(clen != (size_t)-1);
cp += clen;
}
diff --git a/winsup/cygwin/regex/regexec.c b/winsup/cygwin/regex/regexec.c
index 6195e508c..788ef5eeb 100644
--- a/winsup/cygwin/regex/regexec.c
+++ b/winsup/cygwin/regex/regexec.c
@@ -84,8 +84,24 @@ xmbrtowc(wint_t *wi, const char *s, size_t n, mbstate_t *mbs, wint_t dummy)
if (wi != NULL)
*wi = dummy;
return (1);
- } else
+ } else {
+ if (sizeof (wchar_t) == 2 && wc >= 0xd800 && wc <= 0xdbff) {
+ /* UTF-16 surrogate pair. Fetch second half and
+ compute UTF-32 value */
+ int n2 = mbrtowc(&wc, s + nr, n - nr, mbs);
+ if (n2 == 0 || n2 == (size_t)-1 || n2 == (size_t)-2) {
+ memset(mbs, 0, sizeof(*mbs));
+ if (wi != NULL)
+ *wi = dummy;
+ return (1);
+ }
+ if (wi != NULL)
+ *wi = (((*wi & 0x3ff) << 10) | (wc & 0x3ff))
+ + 0x10000;
+ nr += n2;
+ }
return (nr);
+ }
}
static __inline size_t