Welcome to mirror list, hosted at ThFree Co, Russian Federation.

cygwin.com/git/newlib-cygwin.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'winsup/cygwin/regex/engine.c')
-rw-r--r--winsup/cygwin/regex/engine.c552
1 files changed, 365 insertions, 187 deletions
diff --git a/winsup/cygwin/regex/engine.c b/winsup/cygwin/regex/engine.c
index 42c91dc0f..ca24cc50c 100644
--- a/winsup/cygwin/regex/engine.c
+++ b/winsup/cygwin/regex/engine.c
@@ -1,3 +1,41 @@
+/*-
+ * Copyright (c) 1992, 1993, 1994 Henry Spencer.
+ * Copyright (c) 1992, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Henry Spencer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)engine.c 8.5 (Berkeley) 3/20/94
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD: src/lib/libc/regex/engine.c,v 1.23 2009/09/16 06:32:23 dds Exp $");
+
/*
* The matching engine and friends. This file is #included by regexec.c
* after suitable #defines of a variety of macros used herein, so that
@@ -27,25 +65,77 @@
#define at lat
#define match lmat
#endif
+#ifdef MNAMES
+#define matcher mmatcher
+#define fast mfast
+#define slow mslow
+#define dissect mdissect
+#define backref mbackref
+#define step mstep
+#define print mprint
+#define at mat
+#define match mmat
+#endif
/* another structure passed up and down to avoid zillions of parameters */
struct match {
struct re_guts *g;
int eflags;
regmatch_t *pmatch; /* [nsub+1] (0 element unused) */
- char *offp; /* offsets work from here */
- char *beginp; /* start of string -- virtual NUL precedes */
- char *endp; /* end of string -- virtual NUL here */
- char *coldp; /* can be no match starting before here */
- char **lastpos; /* [nplus+1] */
+ const char *offp; /* offsets work from here */
+ const char *beginp; /* start of string -- virtual NUL precedes */
+ const char *endp; /* end of string -- virtual NUL here */
+ const char *coldp; /* can be no match starting before here */
+ const char **lastpos; /* [nplus+1] */
STATEVARS;
states st; /* current states */
states fresh; /* states for a fresh start */
states tmp; /* temporary */
states empty; /* empty set of states */
+ mbstate_t mbs; /* multibyte conversion state */
};
-#include "engine.ih"
+/* ========= begin header generated by ./mkh ========= */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* === engine.c === */
+static int matcher(struct re_guts *g, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags);
+static const char *dissect(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
+static const char *backref(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst, sopno lev, int);
+static const char *fast(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
+static const char *slow(struct match *m, const char *start, const char *stop, sopno startst, sopno stopst);
+static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
+#define MAX_RECURSION 100
+#define BOL (OUT-1)
+#define EOL (BOL-1)
+#define BOLEOL (BOL-2)
+#define NOTHING (BOL-3)
+#define BOW (BOL-4)
+#define EOW (BOL-5)
+#define BADCHAR (BOL-6)
+#ifdef __CYGWIN__
+/* In contrast to BSD, wint_t on Cygwin is unsigned. This breaks this test,
+ unless the compared values are casted to signed. */
+#define NONCHAR(c) ((int)(c) <= (int)OUT)
+#else
+#define NONCHAR(c) ((c) <= OUT)
+#endif
+#ifdef REDEBUG
+static void print(struct match *m, const char *caption, states st, int ch, FILE *d);
+#endif
+#ifdef REDEBUG
+static void at(struct match *m, const char *title, const char *start, const char *stop, sopno startst, sopno stopst);
+#endif
+#ifdef REDEBUG
+static const char *pchar(int ch);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+/* ========= end header generated by ./mkh ========= */
#ifdef REDEBUG
#define SP(t, s, c) print(m, t, s, c, stdout)
@@ -59,26 +149,32 @@ struct match {
/*
- matcher - the actual matching engine
- == static int matcher(register struct re_guts *g, char *string, \
+ == static int matcher(struct re_guts *g, const char *string, \
== size_t nmatch, regmatch_t pmatch[], int eflags);
*/
static int /* 0 success, REG_NOMATCH failure */
-matcher(g, string, nmatch, pmatch, eflags)
-register struct re_guts *g;
-char *string;
-size_t nmatch;
-regmatch_t pmatch[];
-int eflags;
+matcher(struct re_guts *g,
+ const char *string,
+ size_t nmatch,
+ regmatch_t pmatch[],
+ int eflags)
{
- register char *endp;
- register size_t i;
+ const char *endp;
+ int i;
struct match mv;
- register struct match *m = &mv;
- register char *dp;
- const register sopno gf = g->firststate+1; /* +1 for OEND */
- const register sopno gl = g->laststate;
- char *start;
- char *stop;
+ struct match *m = &mv;
+ const char *dp;
+ const sopno gf = g->firststate+1; /* +1 for OEND */
+ const sopno gl = g->laststate;
+ const char *start;
+ const char *stop;
+ /* Boyer-Moore algorithms variables */
+ const char *pp;
+ int cj, mj;
+ const char *mustfirst;
+ const char *mustlast;
+ int *matchjump;
+ int *charjump;
/* simplify the situation where possible */
if (g->cflags&REG_NOSUB)
@@ -95,12 +191,46 @@ int eflags;
/* prescreening; this does wonders for this rather slow code */
if (g->must != NULL) {
- for (dp = start; dp < stop; dp++)
- if (*dp == g->must[0] && stop - dp >= g->mlen &&
- memcmp(dp, g->must, (size_t)g->mlen) == 0)
- break;
- if (dp == stop) /* we didn't find g->must */
- return(REG_NOMATCH);
+ if (g->charjump != NULL && g->matchjump != NULL) {
+ mustfirst = g->must;
+ mustlast = g->must + g->mlen - 1;
+ charjump = g->charjump;
+ matchjump = g->matchjump;
+ pp = mustlast;
+ for (dp = start+g->mlen-1; dp < stop;) {
+ /* Fast skip non-matches */
+ while (dp < stop && charjump[(int)*dp])
+ dp += charjump[(int)*dp];
+
+ if (dp >= stop)
+ break;
+
+ /* Greedy matcher */
+ /* We depend on not being used for
+ * for strings of length 1
+ */
+ while (*--dp == *--pp && pp != mustfirst);
+
+ if (*dp == *pp)
+ break;
+
+ /* Jump to next possible match */
+ mj = matchjump[pp - mustfirst];
+ cj = charjump[(int)*dp];
+ dp += (cj < mj ? mj : cj);
+ pp = mustlast;
+ }
+ if (pp != mustfirst)
+ return(REG_NOMATCH);
+ } else {
+ for (dp = start; dp < stop; dp++)
+ if (*dp == g->must[0] &&
+ stop - dp >= g->mlen &&
+ memcmp(dp, g->must, (size_t)g->mlen) == 0)
+ break;
+ if (dp == stop) /* we didn't find g->must */
+ return(REG_NOMATCH);
+ }
}
/* match struct setup */
@@ -117,11 +247,22 @@ int eflags;
SETUP(m->tmp);
SETUP(m->empty);
CLEAR(m->empty);
+ ZAPSTATE(&m->mbs);
+
+ /* Adjust start according to moffset, to speed things up */
+ if (g->moffset > -1)
+ start = ((dp - g->moffset) < start) ? start : dp - g->moffset;
+
+ SP("mloop", m->st, *start);
/* this loop does only one repetition except for backrefs */
for (;;) {
endp = fast(m, start, stop, gf, gl);
if (endp == NULL) { /* a miss */
+ if (m->pmatch != NULL)
+ free((char *)m->pmatch);
+ if (m->lastpos != NULL)
+ free((char *)m->lastpos);
STATETEARDOWN(m);
return(REG_NOMATCH);
}
@@ -136,7 +277,8 @@ int eflags;
if (endp != NULL)
break;
assert(m->coldp < m->endp);
- m->coldp++;
+ m->coldp += XMBRTOWC(NULL, m->coldp,
+ m->endp - m->coldp, &m->mbs, 0);
}
if (nmatch == 1 && !g->backrefs)
break; /* no further info needed */
@@ -156,15 +298,15 @@ int eflags;
dp = dissect(m, m->coldp, endp, gf, gl);
} else {
if (g->nplus > 0 && m->lastpos == NULL)
- m->lastpos = (char **)malloc((g->nplus+1) *
- sizeof(char *));
+ m->lastpos = malloc((g->nplus+1) *
+ sizeof(const char *));
if (g->nplus > 0 && m->lastpos == NULL) {
free(m->pmatch);
STATETEARDOWN(m);
return(REG_ESPACE);
}
NOTE("backref dissect");
- dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
}
if (dp != NULL)
break;
@@ -187,7 +329,7 @@ int eflags;
}
#endif
NOTE("backoff dissect");
- dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
+ dp = backref(m, m->coldp, endp, gf, gl, (sopno)0, 0);
}
assert(dp == NULL || dp == endp);
if (dp != NULL) /* found a shorter one */
@@ -195,7 +337,9 @@ int eflags;
/* despite initial appearances, there is no match here */
NOTE("false alarm");
- start = m->coldp + 1; /* recycle starting later */
+ /* recycle starting later */
+ start = m->coldp + XMBRTOWC(NULL, m->coldp,
+ stop - m->coldp, &m->mbs, 0);
assert(start <= stop);
}
@@ -225,30 +369,29 @@ int eflags;
/*
- dissect - figure out what matched what, no back references
- == static char *dissect(register struct match *m, char *start, \
- == char *stop, sopno startst, sopno stopst);
+ == static const char *dissect(struct match *m, const char *start, \
+ == const char *stop, sopno startst, sopno stopst);
*/
-static char * /* == stop (success) always */
-dissect(m, start, stop, startst, stopst)
-register struct match *m;
-char *start;
-char *stop;
-sopno startst;
-sopno stopst;
+static const char * /* == stop (success) always */
+dissect(struct match *m,
+ const char *start,
+ const char *stop,
+ sopno startst,
+ sopno stopst)
{
- register int i;
- register sopno ss; /* start sop of current subRE */
- register sopno es; /* end sop of current subRE */
- register char *sp; /* start of string matched by it */
- register char *stp; /* string matched by it cannot pass here */
- register char *rest; /* start of rest of string */
- register char *tail; /* string unmatched by rest of RE */
- register sopno ssub; /* start sop of subsubRE */
- register sopno esub; /* end sop of subsubRE */
- register char *ssp; /* start of string matched by subsubRE */
- register char *sep; /* end of string matched by subsubRE */
- register char *oldssp; /* previous ssp */
- register char *dp;
+ int i;
+ sopno ss; /* start sop of current subRE */
+ sopno es; /* end sop of current subRE */
+ const char *sp; /* start of string matched by it */
+ const char *stp; /* string matched by it cannot pass here */
+ const char *rest; /* start of rest of string */
+ const char *tail; /* string unmatched by rest of RE */
+ sopno ssub; /* start sop of subsubRE */
+ sopno esub; /* end sop of subsubRE */
+ const char *ssp; /* start of string matched by subsubRE */
+ const char *sep; /* end of string matched by subsubRE */
+ const char *oldssp; /* previous ssp */
+ const char *dp;
AT("diss", start, stop, startst, stopst);
sp = start;
@@ -273,7 +416,7 @@ sopno stopst;
assert(nope);
break;
case OCHAR:
- sp++;
+ sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
break;
case OBOL:
case OEOL:
@@ -282,7 +425,7 @@ sopno stopst;
break;
case OANY:
case OANYOF:
- sp++;
+ sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
break;
case OBACK_:
case O_BACK:
@@ -413,30 +556,31 @@ sopno stopst;
/*
- backref - figure out what matched what, figuring in back references
- == static char *backref(register struct match *m, char *start, \
- == char *stop, sopno startst, sopno stopst, sopno lev);
+ == static const char *backref(struct match *m, const char *start, \
+ == const char *stop, sopno startst, sopno stopst, sopno lev);
*/
-static char * /* == stop (success) or NULL (failure) */
-backref(m, start, stop, startst, stopst, lev)
-register struct match *m;
-char *start;
-char *stop;
-sopno startst;
-sopno stopst;
-sopno lev; /* PLUS nesting level */
+static const char * /* == stop (success) or NULL (failure) */
+backref(struct match *m,
+ const char *start,
+ const char *stop,
+ sopno startst,
+ sopno stopst,
+ sopno lev, /* PLUS nesting level */
+ int rec)
{
- register int i;
- register sopno ss; /* start sop of current subRE */
- register char *sp; /* start of string matched by it */
- register sopno ssub; /* start sop of subsubRE */
- register sopno esub; /* end sop of subsubRE */
- register char *ssp; /* start of string matched by subsubRE */
- register char *dp;
- register size_t len;
- register int hard;
- register sop s;
- register regoff_t offsave;
- register cset *cs;
+ int i;
+ sopno ss; /* start sop of current subRE */
+ const char *sp; /* start of string matched by it */
+ sopno ssub; /* start sop of subsubRE */
+ sopno esub; /* end sop of subsubRE */
+ const char *ssp; /* start of string matched by subsubRE */
+ const char *dp;
+ size_t len;
+ int hard;
+ sop s;
+ regoff_t offsave;
+ cset *cs;
+ wint_t wc;
AT("back", start, stop, startst, stopst);
sp = start;
@@ -446,17 +590,25 @@ sopno lev; /* PLUS nesting level */
for (ss = startst; !hard && ss < stopst; ss++)
switch (OP(s = m->g->strip[ss])) {
case OCHAR:
- if (sp == stop || *sp++ != (char)OPND(s))
+ if (sp == stop)
+ return(NULL);
+ sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
+ if (wc != OPND(s))
return(NULL);
break;
case OANY:
if (sp == stop)
return(NULL);
- sp++;
+ sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
+ if (wc == BADCHAR)
+ return (NULL);
break;
case OANYOF:
+ if (sp == stop)
+ return (NULL);
cs = &m->g->sets[OPND(s)];
- if (sp == stop || !CHIN(cs, *sp++))
+ sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
+ if (wc == BADCHAR || !CHIN(cs, wc))
return(NULL);
break;
case OBOL:
@@ -529,6 +681,8 @@ sopno lev; /* PLUS nesting level */
return(NULL);
assert(m->pmatch[i].rm_so != -1);
len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
+ if (len == 0 && rec++ > MAX_RECURSION)
+ return(NULL);
assert(stop - m->beginp >= len);
if (sp > stop - len)
return(NULL); /* not enough left to match */
@@ -537,28 +691,28 @@ sopno lev; /* PLUS nesting level */
return(NULL);
while (m->g->strip[ss] != SOP(O_BACK, i))
ss++;
- return(backref(m, sp+len, stop, ss+1, stopst, lev));
+ return(backref(m, sp+len, stop, ss+1, stopst, lev, rec));
break;
case OQUEST_: /* to null or not */
- dp = backref(m, sp, stop, ss+1, stopst, lev);
+ dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
if (dp != NULL)
return(dp); /* not */
- return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
+ return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev, rec));
break;
case OPLUS_:
assert(m->lastpos != NULL);
assert(lev+1 <= m->g->nplus);
m->lastpos[lev+1] = sp;
- return(backref(m, sp, stop, ss+1, stopst, lev+1));
+ return(backref(m, sp, stop, ss+1, stopst, lev+1, rec));
break;
case O_PLUS:
if (sp == m->lastpos[lev]) /* last pass matched null */
- return(backref(m, sp, stop, ss+1, stopst, lev-1));
+ return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
/* try another pass */
m->lastpos[lev] = sp;
- dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
+ dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev, rec);
if (dp == NULL)
- return(backref(m, sp, stop, ss+1, stopst, lev-1));
+ return(backref(m, sp, stop, ss+1, stopst, lev-1, rec));
else
return(dp);
break;
@@ -567,7 +721,7 @@ sopno lev; /* PLUS nesting level */
esub = ss + OPND(s) - 1;
assert(OP(m->g->strip[esub]) == OOR1);
for (;;) { /* find first matching branch */
- dp = backref(m, sp, stop, ssub, esub, lev);
+ dp = backref(m, sp, stop, ssub, esub, lev, rec);
if (dp != NULL)
return(dp);
/* that one missed, try next one */
@@ -588,7 +742,7 @@ sopno lev; /* PLUS nesting level */
assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_so;
m->pmatch[i].rm_so = sp - m->offp;
- dp = backref(m, sp, stop, ss+1, stopst, lev);
+ dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
if (dp != NULL)
return(dp);
m->pmatch[i].rm_so = offsave;
@@ -599,7 +753,7 @@ sopno lev; /* PLUS nesting level */
assert(0 < i && i <= m->g->nsub);
offsave = m->pmatch[i].rm_eo;
m->pmatch[i].rm_eo = sp - m->offp;
- dp = backref(m, sp, stop, ss+1, stopst, lev);
+ dp = backref(m, sp, stop, ss+1, stopst, lev, rec);
if (dp != NULL)
return(dp);
m->pmatch[i].rm_eo = offsave;
@@ -613,42 +767,57 @@ sopno lev; /* PLUS nesting level */
/* "can't happen" */
assert(nope);
/* NOTREACHED */
- return((char *)NULL); /* dummy */
+ return "shut up gcc";
}
/*
- fast - step through the string at top speed
- == static char *fast(register struct match *m, char *start, \
- == char *stop, sopno startst, sopno stopst);
+ == static const char *fast(struct match *m, const char *start, \
+ == const char *stop, sopno startst, sopno stopst);
*/
-static char * /* where tentative match ended, or NULL */
-fast(m, start, stop, startst, stopst)
-register struct match *m;
-char *start;
-char *stop;
-sopno startst;
-sopno stopst;
+static const char * /* where tentative match ended, or NULL */
+fast( struct match *m,
+ const char *start,
+ const char *stop,
+ sopno startst,
+ sopno stopst)
{
- register states st = m->st;
- register states fresh = m->fresh;
- register states tmp = m->tmp;
- register char *p = start;
- register int c = (start == m->beginp) ? OUT : *(start-1);
- register int lastc; /* previous c */
- register int flagch;
- register int i;
- register char *coldp; /* last p after which no match was underway */
+ states st = m->st;
+ states fresh = m->fresh;
+ states tmp = m->tmp;
+ const char *p = start;
+ wint_t c;
+ wint_t lastc; /* previous c */
+ wint_t flagch;
+ int i;
+ const char *coldp; /* last p after which no match was underway */
+ size_t clen;
CLEAR(st);
SET1(st, startst);
+ SP("fast", st, *p);
st = step(m->g, startst, stopst, st, NOTHING, st);
ASSIGN(fresh, st);
SP("start", st, *p);
coldp = NULL;
+ if (start == m->beginp)
+ c = OUT;
+ else {
+ /*
+ * XXX Wrong if the previous character was multi-byte.
+ * Newline never is (in encodings supported by FreeBSD),
+ * so this only breaks the ISWORD tests below.
+ */
+ c = (uch)*(start - 1);
+ }
for (;;) {
/* next character */
lastc = c;
- c = (p == m->endp) ? OUT : *p;
+ if (p == m->endp) {
+ clen = 0;
+ c = OUT;
+ } else
+ clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
if (EQ(st, fresh))
coldp = p;
@@ -686,7 +855,7 @@ sopno stopst;
}
/* are we done? */
- if (ISSET(st, stopst) || p == stop)
+ if (ISSET(st, stopst) || p == stop || clen > stop - p)
break; /* NOTE BREAK OUT */
/* no, we must deal with this character */
@@ -696,39 +865,39 @@ sopno stopst;
st = step(m->g, startst, stopst, tmp, c, st);
SP("aft", st, c);
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
- p++;
+ p += clen;
}
assert(coldp != NULL);
m->coldp = coldp;
if (ISSET(st, stopst))
- return(p+1);
+ return(p+XMBRTOWC(NULL, p, stop - p, &m->mbs, 0));
else
return(NULL);
}
/*
- slow - step through the string more deliberately
- == static char *slow(register struct match *m, char *start, \
- == char *stop, sopno startst, sopno stopst);
+ == static const char *slow(struct match *m, const char *start, \
+ == const char *stop, sopno startst, sopno stopst);
*/
-static char * /* where it ended */
-slow(m, start, stop, startst, stopst)
-register struct match *m;
-char *start;
-char *stop;
-sopno startst;
-sopno stopst;
+static const char * /* where it ended */
+slow( struct match *m,
+ const char *start,
+ const char *stop,
+ sopno startst,
+ sopno stopst)
{
- register states st = m->st;
- register states empty = m->empty;
- register states tmp = m->tmp;
- register char *p = start;
- register int c = (start == m->beginp) ? OUT : *(start-1);
- register int lastc; /* previous c */
- register int flagch;
- register int i;
- register char *matchp; /* last p at which a match ended */
+ states st = m->st;
+ states empty = m->empty;
+ states tmp = m->tmp;
+ const char *p = start;
+ wint_t c;
+ wint_t lastc; /* previous c */
+ wint_t flagch;
+ int i;
+ const char *matchp; /* last p at which a match ended */
+ size_t clen;
AT("slow", start, stop, startst, stopst);
CLEAR(st);
@@ -736,10 +905,24 @@ sopno stopst;
SP("sstart", st, *p);
st = step(m->g, startst, stopst, st, NOTHING, st);
matchp = NULL;
+ if (start == m->beginp)
+ c = OUT;
+ else {
+ /*
+ * XXX Wrong if the previous character was multi-byte.
+ * Newline never is (in encodings supported by FreeBSD),
+ * so this only breaks the ISWORD tests below.
+ */
+ c = (uch)*(start - 1);
+ }
for (;;) {
/* next character */
lastc = c;
- c = (p == m->endp) ? OUT : *p;
+ if (p == m->endp) {
+ c = OUT;
+ clen = 0;
+ } else
+ clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
/* is there an EOL and/or BOL between lastc and c? */
flagch = '\0';
@@ -777,7 +960,7 @@ sopno stopst;
/* are we done? */
if (ISSET(st, stopst))
matchp = p;
- if (EQ(st, empty) || p == stop)
+ if (EQ(st, empty) || p == stop || clen > stop - p)
break; /* NOTE BREAK OUT */
/* no, we must deal with this character */
@@ -787,7 +970,7 @@ sopno stopst;
st = step(m->g, startst, stopst, tmp, c, st);
SP("saft", st, c);
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
- p++;
+ p += clen;
}
return(matchp);
@@ -796,33 +979,31 @@ sopno stopst;
/*
- step - map set of states reachable before char to set reachable after
- == static states step(register struct re_guts *g, sopno start, sopno stop, \
- == register states bef, int ch, register states aft);
- == #define BOL (OUT+1)
- == #define EOL (BOL+1)
- == #define BOLEOL (BOL+2)
- == #define NOTHING (BOL+3)
- == #define BOW (BOL+4)
- == #define EOW (BOL+5)
- == #define CODEMAX (BOL+5) // highest code used
- == #define NONCHAR(c) ((c) > CHAR_MAX)
- == #define NNONCHAR (CODEMAX-CHAR_MAX)
+ == static states step(struct re_guts *g, sopno start, sopno stop, \
+ == states bef, int ch, states aft);
+ == #define BOL (OUT-1)
+ == #define EOL (BOL-1)
+ == #define BOLEOL (BOL-2)
+ == #define NOTHING (BOL-3)
+ == #define BOW (BOL-4)
+ == #define EOW (BOL-5)
+ == #define BADCHAR (BOL-6)
+ == #define NONCHAR(c) ((c) <= OUT)
*/
static states
-step(g, start, stop, bef, ch, aft)
-register struct re_guts *g;
-sopno start; /* start state within strip */
-sopno stop; /* state after stop state within strip */
-register states bef; /* states reachable before */
-int ch; /* character or NONCHAR code */
-register states aft; /* states already known reachable after */
+step(struct re_guts *g,
+ sopno start, /* start state within strip */
+ sopno stop, /* state after stop state within strip */
+ states bef, /* states reachable before */
+ wint_t ch, /* character or NONCHAR code */
+ states aft) /* states already known reachable after */
{
- register cset *cs;
- register sop s;
- register sopno pc;
- register onestate here; /* note, macros know this name */
- register sopno look;
- register long i;
+ cset *cs;
+ sop s;
+ sopno pc;
+ onestate here; /* note, macros know this name */
+ sopno look;
+ int i;
for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
s = g->strip[pc];
@@ -832,8 +1013,8 @@ register states aft; /* states already known reachable after */
break;
case OCHAR:
/* only characters can match */
- assert(!NONCHAR(ch) || ch != (char)OPND(s));
- if (ch == (char)OPND(s))
+ assert(!NONCHAR(ch) || ch != OPND(s));
+ if (ch == OPND(s))
FWD(aft, bef, 1);
break;
case OBOL:
@@ -900,7 +1081,7 @@ register states aft; /* states already known reachable after */
OP(s = g->strip[pc+look]) != O_CH;
look += OPND(s))
assert(OP(s) == OOR2);
- FWD(aft, aft, look);
+ FWD(aft, aft, look + 1);
}
break;
case OOR2: /* propagate OCH_'s marking */
@@ -926,21 +1107,20 @@ register states aft; /* states already known reachable after */
/*
- print - print a set of states
== #ifdef REDEBUG
- == static void print(struct match *m, char *caption, states st, \
+ == static void print(struct match *m, const char *caption, states st, \
== int ch, FILE *d);
== #endif
*/
static void
-print(m, caption, st, ch, d)
-struct match *m;
-char *caption;
-states st;
-int ch;
-FILE *d;
+print(struct match *m,
+ const char *caption,
+ states st,
+ int ch,
+ FILE *d)
{
- register struct re_guts *g = m->g;
- register int i;
- register int first = 1;
+ struct re_guts *g = m->g;
+ int i;
+ int first = 1;
if (!(m->eflags&REG_TRACE))
return;
@@ -959,18 +1139,17 @@ FILE *d;
/*
- at - print current situation
== #ifdef REDEBUG
- == static void at(struct match *m, char *title, char *start, char *stop, \
- == sopno startst, sopno stopst);
+ == static void at(struct match *m, const char *title, const char *start, \
+ == const char *stop, sopno startst, sopno stopst);
== #endif
*/
static void
-at(m, title, start, stop, startst, stopst)
-struct match *m;
-char *title;
-char *start;
-char *stop;
-sopno startst;
-sopno stopst;
+at( struct match *m,
+ const char *title,
+ const char *start,
+ const char *stop,
+ sopno startst,
+ sopno stopst)
{
if (!(m->eflags&REG_TRACE))
return;
@@ -985,7 +1164,7 @@ sopno stopst;
/*
- pchar - make a character printable
== #ifdef REDEBUG
- == static char *pchar(int ch);
+ == static const char *pchar(int ch);
== #endif
*
* Is this identical to regchar() over in debug.c? Well, yes. But a
@@ -993,13 +1172,12 @@ sopno stopst;
* a matching debug.o, and this is convenient. It all disappears in
* the non-debug compilation anyway, so it doesn't matter much.
*/
-static char * /* -> representation */
-pchar(ch)
-int ch;
+static const char * /* -> representation */
+pchar(int ch)
{
static char pbuf[10];
- if (isprint(ch) || ch == ' ')
+ if (isprint((uch)ch) || ch == ' ')
sprintf(pbuf, "%c", ch);
else
sprintf(pbuf, "\\%o", ch);