diff options
Diffstat (limited to 'lib/libc/regex')
-rw-r--r-- | lib/libc/regex/engine.c | 19 | ||||
-rw-r--r-- | lib/libc/regex/regcomp.c | 169 | ||||
-rw-r--r-- | lib/libc/regex/regex2.h | 1 |
3 files changed, 177 insertions, 12 deletions
diff --git a/lib/libc/regex/engine.c b/lib/libc/regex/engine.c index 9d11f54..8bc3de8 100644 --- a/lib/libc/regex/engine.c +++ b/lib/libc/regex/engine.c @@ -180,7 +180,7 @@ int eflags; /* prescreening; this does wonders for this rather slow code */ if (g->must != NULL) { - if(g->charjump != NULL && g->matchjump != NULL) { + if (g->charjump != NULL && g->matchjump != NULL) { mustlen = -g->mlen; mustfirst = g->must; mustlast = g->must + g->mlen - 1; @@ -188,21 +188,21 @@ int eflags; matchjump = g->matchjump; bmps = stop; pp = mustlast; - for(bmp = start+g->mlen-1; bmp < bmps;) { + for (bmp = start+g->mlen-1; bmp < bmps;) { /* Fast skip non-matches */ - while(bmp < bmps && charjump[*bmp]) + while (bmp < bmps && charjump[*bmp]) bmp += charjump[*bmp]; - if(bmp >= bmps) + if (bmp >= bmps) break; /* Greedy matcher */ /* We depend on not being used for * for strings of length 1 */ - while(*--bmp == *--pp && pp != mustfirst); + while (*--bmp == *--pp && pp != mustfirst); - if(*bmp == *pp) + if (*bmp == *pp) break; /* Jump to next possible match */ @@ -211,8 +211,9 @@ int eflags; bmp += (cj < mj ? mj : cj); pp = mustlast; } - if(pp != mustfirst) + if (pp != mustfirst) return(REG_NOMATCH); + dp = bmp; } else { for (dp = start; dp < stop; dp++) if (*dp == g->must[0] && @@ -239,6 +240,10 @@ int eflags; SETUP(m->empty); CLEAR(m->empty); + /* Adjust start according to moffset, to speed things up */ + if (g->moffset > -1) + start = dp - g->moffset; + /* this loop does only one repetition except for backrefs */ for (;;) { endp = fast(m, start, stop, gf, gl); diff --git a/lib/libc/regex/regcomp.c b/lib/libc/regex/regcomp.c index bb8f677..fc50924 100644 --- a/lib/libc/regex/regcomp.c +++ b/lib/libc/regex/regcomp.c @@ -124,6 +124,7 @@ static void dofwd __P((struct parse *p, sopno pos, sop value)); static void enlarge __P((struct parse *p, sopno size)); static void stripsnug __P((struct parse *p, struct re_guts *g)); static void findmust __P((struct parse *p, struct re_guts *g)); +static int altoffset __P((sop *scan, int offset, int mccs)); static void computejumps __P((struct parse *p, struct re_guts *g)); static void computematchjumps __P((struct parse *p, struct re_guts *g)); static sopno pluscount __P((struct parse *p, struct re_guts *g)); @@ -246,6 +247,7 @@ int cflags; g->nbol = 0; g->neol = 0; g->must = NULL; + g->moffset = -1; g->charjump = NULL; g->matchjump = NULL; g->mlen = 0; @@ -1695,13 +1697,23 @@ register struct re_guts *g; register sop s; register char *cp; register sopno i; + int offset; + int cs, mccs; /* avoid making error situations worse */ if (p->error != 0) return; + /* Find out if we can handle OANYOF or not */ + mccs = 0; + for (cs = 0; cs < g->ncsets; cs++) + if (g->sets[cs].multis != NULL) + mccs = 1; + /* find the longest OCHAR sequence in strip */ newlen = 0; + offset = 0; + g->moffset = 0; scan = g->strip + 1; do { s = *scan++; @@ -1717,6 +1729,7 @@ register struct re_guts *g; break; case OQUEST_: /* things that must be skipped */ case OCH_: + offset = altoffset(scan, offset, mccs); scan--; do { scan += OPND(s); @@ -1729,23 +1742,97 @@ register struct re_guts *g; } } while (OP(s) != O_QUEST && OP(s) != O_CH); /* fallthrough */ - default: /* things that break a sequence */ + case OBOW: /* things that break a sequence */ + case OEOW: + case OBOL: + case OEOL: + case O_QUEST: + case O_CH: + case OEND: if (newlen > g->mlen) { /* ends one */ start = newstart; g->mlen = newlen; + if (offset > -1) { + g->moffset += offset; + offset = newlen; + } else + g->moffset = offset; + } else { + if (offset > -1) + offset += newlen; } newlen = 0; break; + case OANY: + if (newlen > g->mlen) { /* ends one */ + start = newstart; + g->mlen = newlen; + if (offset > -1) { + g->moffset += offset; + offset = newlen; + } else + g->moffset = offset; + } else { + if (offset > -1) + offset += newlen; + } + if (offset > -1) + offset++; + newlen = 0; + break; + case OANYOF: /* may or may not invalidate offset */ + /* First, everything as OANY */ + if (newlen > g->mlen) { /* ends one */ + start = newstart; + g->mlen = newlen; + if (offset > -1) { + g->moffset += offset; + offset = newlen; + } else + g->moffset = offset; + } else { + if (offset > -1) + offset += newlen; + } + if (offset > -1) + offset++; + newlen = 0; + /* And, now, if we found out we can't deal with + * it, make offset = -1. + */ + if (mccs) + offset = -1; + break; + default: + /* Anything here makes it impossible or too hard + * to calculate the offset -- so we give up; + * save the last known good offset, in case the + * must sequence doesn't occur later. + */ + if (newlen > g->mlen) { /* ends one */ + start = newstart; + g->mlen = newlen; + if (offset > -1) + g->moffset += offset; + else + g->moffset = offset; + } + offset = -1; + newlen = 0; + break; } } while (OP(s) != OEND); - if (g->mlen == 0) /* there isn't one */ + if (g->mlen == 0) { /* there isn't one */ + g->moffset = -1; return; + } /* turn it into a character string */ g->must = malloc((size_t)g->mlen + 1); if (g->must == NULL) { /* argh; just forget it */ g->mlen = 0; + g->moffset = -1; return; } cp = g->must; @@ -1761,6 +1848,78 @@ register struct re_guts *g; } /* + - altoffset - choose biggest offset among multiple choices + = static int altoffset(sop *scan, int offset, int mccs); + * + * Compute, recursively if necessary, the largest offset among multiple + * re paths. + */ +static int +altoffset(scan, offset, mccs) +sop *scan; +int offset; +int mccs; +{ + int largest; + int try; + sop s; + + /* If we gave up already on offsets, return */ + if (offset == -1) + return -1; + + largest = 0; + try = 0; + s = *scan++; + while (OP(s) != O_QUEST && OP(s) != O_CH) { + switch (OP(s)) { + case OOR1: + if (try > largest) + largest = try; + try = 0; + break; + case OQUEST_: + case OCH_: + try = altoffset(scan, try, mccs); + if (try == -1) + return -1; + scan--; + do { + scan += OPND(s); + s = *scan; + if (OP(s) != O_QUEST && OP(s) != O_CH && + OP(s) != OOR2) + return -1; + } while (OP(s) != O_QUEST && OP(s) != O_CH); + break; + case OANYOF: + if (mccs) + return -1; + case OCHAR: + case OANY: + try++; + case OBOW: + case OEOW: + case OLPAREN: + case ORPAREN: + case OOR2: + break; + default: + try = -1; + break; + } + if (try == -1) + return -1; + s = *scan++; + } + + if (try > largest) + largest = try; + + return largest+offset; +} + +/* - computejumps - compute char jumps for BM scan == static void computejumps(register struct parse *p, register struct re_guts *g); * @@ -1783,7 +1942,7 @@ struct re_guts *g; if (p->error != 0) return; - g->charjump = malloc(256 * sizeof(int)); + g->charjump = malloc((UCHAR_MAX+1) * sizeof(int)); if (g->charjump == NULL) /* Not a fatal error */ return; @@ -1874,8 +2033,8 @@ struct re_guts *g; g->mlen + suffix - mindex); ssuffix = pmatches[suffix]; - while(suffix < g->mlen) { - while(suffix <= ssuffix) { + while (suffix < g->mlen) { + while (suffix <= ssuffix) { g->matchjump[suffix] = MIN(g->matchjump[suffix], g->mlen + ssuffix - suffix); suffix++; diff --git a/lib/libc/regex/regex2.h b/lib/libc/regex/regex2.h index af9cc58..cd634db 100644 --- a/lib/libc/regex/regex2.h +++ b/lib/libc/regex/regex2.h @@ -162,6 +162,7 @@ struct re_guts { int ncategories; /* how many character categories */ cat_t *categories; /* ->catspace[-CHAR_MIN] */ char *must; /* match must contain this string */ + int moffset; /* latest point at which must may be located */ int *charjump; /* Boyer-Moore char jump table */ int *matchjump; /* Boyer-Moore match jump table */ int mlen; /* length of must */ |