summaryrefslogtreecommitdiffstats
path: root/lib/libc/regex/engine.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libc/regex/engine.c')
-rw-r--r--lib/libc/regex/engine.c132
1 files changed, 92 insertions, 40 deletions
diff --git a/lib/libc/regex/engine.c b/lib/libc/regex/engine.c
index 9d69c1e..e6484ef 100644
--- a/lib/libc/regex/engine.c
+++ b/lib/libc/regex/engine.c
@@ -69,6 +69,17 @@ __FBSDID("$FreeBSD$");
#define at lat
#define match lmat
#endif
+#ifdef MNAMES
+#define matcher mmatcher
+#define fast mfast
+#define slow mslow
+#define dissect mdissect
+#define backref mbackref
+#define step mstep
+#define print mprint
+#define at mat
+#define match mmat
+#endif
/* another structure passed up and down to avoid zillions of parameters */
struct match {
@@ -85,6 +96,7 @@ struct match {
states fresh; /* states for a fresh start */
states tmp; /* temporary */
states empty; /* empty set of states */
+ mbstate_t mbs; /* multibyte conversion state */
};
/* ========= begin header generated by ./mkh ========= */
@@ -98,16 +110,15 @@ static char *dissect(struct match *m, char *start, char *stop, sopno startst, so
static char *backref(struct match *m, char *start, char *stop, sopno startst, sopno stopst, sopno lev);
static char *fast(struct match *m, char *start, char *stop, sopno startst, sopno stopst);
static char *slow(struct match *m, char *start, char *stop, sopno startst, sopno stopst);
-static states step(struct re_guts *g, sopno start, sopno stop, states bef, int ch, states aft);
-#define BOL (OUT+1)
-#define EOL (BOL+1)
-#define BOLEOL (BOL+2)
-#define NOTHING (BOL+3)
-#define BOW (BOL+4)
-#define EOW (BOL+5)
-#define CODEMAX (BOL+5) /* highest code used */
-#define NONCHAR(c) ((c) > CHAR_MAX)
-#define NNONCHAR (CODEMAX-CHAR_MAX)
+static states step(struct re_guts *g, sopno start, sopno stop, states bef, wint_t ch, states aft);
+#define BOL (OUT-1)
+#define EOL (BOL-1)
+#define BOLEOL (BOL-2)
+#define NOTHING (BOL-3)
+#define BOW (BOL-4)
+#define EOW (BOL-5)
+#define BADCHAR (BOL-6)
+#define NONCHAR(c) ((c) <= OUT)
#ifdef REDEBUG
static void print(struct match *m, char *caption, states st, int ch, FILE *d);
#endif
@@ -234,6 +245,7 @@ int eflags;
SETUP(m->tmp);
SETUP(m->empty);
CLEAR(m->empty);
+ ZAPSTATE(&m->mbs);
/* Adjust start according to moffset, to speed things up */
if (g->moffset > -1)
@@ -257,7 +269,8 @@ int eflags;
if (endp != NULL)
break;
assert(m->coldp < m->endp);
- m->coldp++;
+ m->coldp += XMBRTOWC(NULL, m->coldp,
+ m->endp - m->coldp, &m->mbs, 0);
}
if (nmatch == 1 && !g->backrefs)
break; /* no further info needed */
@@ -316,7 +329,9 @@ int eflags;
/* despite initial appearances, there is no match here */
NOTE("false alarm");
- start = m->coldp + 1; /* recycle starting later */
+ /* recycle starting later */
+ start = m->coldp + XMBRTOWC(NULL, m->coldp,
+ m->endp - m->coldp, &m->mbs, 0);
assert(start <= stop);
}
@@ -394,7 +409,7 @@ sopno stopst;
assert(nope);
break;
case OCHAR:
- sp++;
+ sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
break;
case OBOL:
case OEOL:
@@ -403,7 +418,7 @@ sopno stopst;
break;
case OANY:
case OANYOF:
- sp++;
+ sp += XMBRTOWC(NULL, sp, stop - start, &m->mbs, 0);
break;
case OBACK_:
case O_BACK:
@@ -558,6 +573,7 @@ sopno lev; /* PLUS nesting level */
sop s;
regoff_t offsave;
cset *cs;
+ wint_t wc;
AT("back", start, stop, startst, stopst);
sp = start;
@@ -567,17 +583,25 @@ sopno lev; /* PLUS nesting level */
for (ss = startst; !hard && ss < stopst; ss++)
switch (OP(s = m->g->strip[ss])) {
case OCHAR:
- if (sp == stop || *sp++ != (char)OPND(s))
+ if (sp == stop)
+ return(NULL);
+ sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
+ if (wc != OPND(s))
return(NULL);
break;
case OANY:
if (sp == stop)
return(NULL);
- sp++;
+ sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
+ if (wc == BADCHAR)
+ return (NULL);
break;
case OANYOF:
+ if (sp == stop)
+ return (NULL);
cs = &m->g->sets[OPND(s)];
- if (sp == stop || !CHIN(cs, *sp++))
+ sp += XMBRTOWC(&wc, sp, stop - sp, &m->mbs, BADCHAR);
+ if (wc == BADCHAR || !CHIN(cs, wc))
return(NULL);
break;
case OBOL:
@@ -754,11 +778,12 @@ sopno stopst;
states fresh = m->fresh;
states tmp = m->tmp;
char *p = start;
- int c = (start == m->beginp) ? OUT : *(start-1);
- int lastc; /* previous c */
- int flagch;
+ wint_t c;
+ wint_t lastc; /* previous c */
+ wint_t flagch;
int i;
char *coldp; /* last p after which no match was underway */
+ size_t clen;
CLEAR(st);
SET1(st, startst);
@@ -766,10 +791,23 @@ sopno stopst;
ASSIGN(fresh, st);
SP("start", st, *p);
coldp = NULL;
+ if (start == m->beginp)
+ c = OUT;
+ else {
+ /*
+ * XXX Wrong if the previous character was multi-byte.
+ * Newline never is (in encodings supported by FreeBSD),
+ * so this only breaks the ISWORD tests below.
+ */
+ c = (uch)*(start - 1);
+ }
for (;;) {
/* next character */
lastc = c;
- c = (p == m->endp) ? OUT : *p;
+ if (p == m->endp)
+ c = OUT;
+ else
+ clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
if (EQ(st, fresh))
coldp = p;
@@ -817,13 +855,13 @@ sopno stopst;
st = step(m->g, startst, stopst, tmp, c, st);
SP("aft", st, c);
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
- p++;
+ p += clen;
}
assert(coldp != NULL);
m->coldp = coldp;
if (ISSET(st, stopst))
- return(p+1);
+ return(p+XMBRTOWC(NULL, p, m->endp - p, &m->mbs, 0));
else
return(NULL);
}
@@ -845,11 +883,12 @@ sopno stopst;
states empty = m->empty;
states tmp = m->tmp;
char *p = start;
- int c = (start == m->beginp) ? OUT : *(start-1);
- int lastc; /* previous c */
- int flagch;
+ wint_t c;
+ wint_t lastc; /* previous c */
+ wint_t flagch;
int i;
char *matchp; /* last p at which a match ended */
+ size_t clen;
AT("slow", start, stop, startst, stopst);
CLEAR(st);
@@ -857,10 +896,24 @@ sopno stopst;
SP("sstart", st, *p);
st = step(m->g, startst, stopst, st, NOTHING, st);
matchp = NULL;
+ if (start == m->beginp)
+ c = OUT;
+ else {
+ /*
+ * XXX Wrong if the previous character was multi-byte.
+ * Newline never is (in encodings supported by FreeBSD),
+ * so this only breaks the ISWORD tests below.
+ */
+ c = (uch)*(start - 1);
+ }
for (;;) {
/* next character */
lastc = c;
- c = (p == m->endp) ? OUT : *p;
+ if (p == m->endp) {
+ c = OUT;
+ clen = 0;
+ } else
+ clen = XMBRTOWC(&c, p, m->endp - p, &m->mbs, BADCHAR);
/* is there an EOL and/or BOL between lastc and c? */
flagch = '\0';
@@ -908,7 +961,7 @@ sopno stopst;
st = step(m->g, startst, stopst, tmp, c, st);
SP("saft", st, c);
assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
- p++;
+ p += clen;
}
return(matchp);
@@ -919,15 +972,14 @@ sopno stopst;
- step - map set of states reachable before char to set reachable after
== static states step(struct re_guts *g, sopno start, sopno stop, \
== states bef, int ch, states aft);
- == #define BOL (OUT+1)
- == #define EOL (BOL+1)
- == #define BOLEOL (BOL+2)
- == #define NOTHING (BOL+3)
- == #define BOW (BOL+4)
- == #define EOW (BOL+5)
- == #define CODEMAX (BOL+5) // highest code used
- == #define NONCHAR(c) ((c) > CHAR_MAX)
- == #define NNONCHAR (CODEMAX-CHAR_MAX)
+ == #define BOL (OUT-1)
+ == #define EOL (BOL-1)
+ == #define BOLEOL (BOL-2)
+ == #define NOTHING (BOL-3)
+ == #define BOW (BOL-4)
+ == #define EOW (BOL-5)
+ == #define BADCHAR (BOL-6)
+ == #define NONCHAR(c) ((c) <= OUT)
*/
static states
step(g, start, stop, bef, ch, aft)
@@ -935,7 +987,7 @@ struct re_guts *g;
sopno start; /* start state within strip */
sopno stop; /* state after stop state within strip */
states bef; /* states reachable before */
-int ch; /* character or NONCHAR code */
+wint_t ch; /* character or NONCHAR code */
states aft; /* states already known reachable after */
{
cset *cs;
@@ -953,8 +1005,8 @@ states aft; /* states already known reachable after */
break;
case OCHAR:
/* only characters can match */
- assert(!NONCHAR(ch) || ch != (char)OPND(s));
- if (ch == (char)OPND(s))
+ assert(!NONCHAR(ch) || ch != OPND(s));
+ if (ch == OPND(s))
FWD(aft, bef, 1);
break;
case OBOL:
OpenPOWER on IntegriCloud