summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--lib/libc/regex/engine.c60
-rw-r--r--lib/libc/regex/regcomp.c144
-rw-r--r--lib/libc/regex/regex2.h4
3 files changed, 202 insertions, 6 deletions
diff --git a/lib/libc/regex/engine.c b/lib/libc/regex/engine.c
index be569b1..9d11f54 100644
--- a/lib/libc/regex/engine.c
+++ b/lib/libc/regex/engine.c
@@ -35,6 +35,8 @@
* SUCH DAMAGE.
*
* @(#)engine.c 8.5 (Berkeley) 3/20/94
+ *
+ * $FreeBSD$
*/
/*
@@ -152,6 +154,16 @@ int eflags;
register const sopno gl = g->laststate;
char *start;
char *stop;
+ /* Boyer-Moore algorithms variables */
+ register unsigned char *pp;
+ int cj, mj;
+ register unsigned char *mustfirst;
+ register unsigned char *mustlast;
+ register int mustlen;
+ register int *matchjump;
+ register int *charjump;
+ register unsigned char *bmp;
+ register unsigned char *bmps;
/* simplify the situation where possible */
if (g->cflags&REG_NOSUB)
@@ -168,12 +180,48 @@ int eflags;
/* prescreening; this does wonders for this rather slow code */
if (g->must != NULL) {
- for (dp = start; dp < stop; dp++)
- if (*dp == g->must[0] && stop - dp >= g->mlen &&
- memcmp(dp, g->must, (size_t)g->mlen) == 0)
- break;
- if (dp == stop) /* we didn't find g->must */
- return(REG_NOMATCH);
+ if(g->charjump != NULL && g->matchjump != NULL) {
+ mustlen = -g->mlen;
+ mustfirst = g->must;
+ mustlast = g->must + g->mlen - 1;
+ charjump = g->charjump;
+ matchjump = g->matchjump;
+ bmps = stop;
+ pp = mustlast;
+ for(bmp = start+g->mlen-1; bmp < bmps;) {
+ /* Fast skip non-matches */
+ while(bmp < bmps && charjump[*bmp])
+ bmp += charjump[*bmp];
+
+ if(bmp >= bmps)
+ break;
+
+ /* Greedy matcher */
+ /* We depend on not being used for
+ * for strings of length 1
+ */
+ while(*--bmp == *--pp && pp != mustfirst);
+
+ if(*bmp == *pp)
+ break;
+
+ /* Jump to next possible match */
+ mj = matchjump[pp - mustfirst];
+ cj = charjump[*bmp];
+ bmp += (cj < mj ? mj : cj);
+ pp = mustlast;
+ }
+ if(pp != mustfirst)
+ return(REG_NOMATCH);
+ } else {
+ for (dp = start; dp < stop; dp++)
+ if (*dp == g->must[0] &&
+ stop - dp >= g->mlen &&
+ memcmp(dp, g->must, (size_t)g->mlen) == 0)
+ break;
+ if (dp == stop) /* we didn't find g->must */
+ return(REG_NOMATCH);
+ }
}
/* match struct setup */
diff --git a/lib/libc/regex/regcomp.c b/lib/libc/regex/regcomp.c
index a4e9e1d..f71d63c 100644
--- a/lib/libc/regex/regcomp.c
+++ b/lib/libc/regex/regcomp.c
@@ -35,6 +35,8 @@
* SUCH DAMAGE.
*
* @(#)regcomp.c 8.5 (Berkeley) 3/20/94
+ *
+ * $FreeBSD$
*/
#if defined(LIBC_SCCS) && !defined(lint)
@@ -122,6 +124,8 @@ static void dofwd __P((struct parse *p, sopno pos, sop value));
static void enlarge __P((struct parse *p, sopno size));
static void stripsnug __P((struct parse *p, struct re_guts *g));
static void findmust __P((struct parse *p, struct re_guts *g));
+static void computejumps __P((struct parse *p, struct re_guts *g));
+static void computematchjumps __P((struct parse *p, struct re_guts *g));
static sopno pluscount __P((struct parse *p, struct re_guts *g));
#ifdef __cplusplus
@@ -167,6 +171,9 @@ static int never = 0; /* for use in asserts; shuts lint up */
#define never 0 /* some <assert.h>s have bugs too */
#endif
+/* Macro used by computejump()/computematchjump() */
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
/*
- regcomp - interface for parser and compilation
= extern int regcomp(regex_t *, const char *, int);
@@ -262,6 +269,17 @@ int cflags;
categorize(p, g);
stripsnug(p, g);
findmust(p, g);
+ /* only use Boyer-Moore algorithm if the pattern is bigger
+ * than three characters
+ */
+ if(g->mlen > 3) {
+ computejumps(p, g);
+ computematchjumps(p, g);
+ if(g->matchjump == NULL) {
+ free(g->charjump);
+ g->charjump = NULL;
+ }
+ }
g->nplus = pluscount(p, g);
g->magic = MAGIC2;
preg->re_nsub = g->nsub;
@@ -1741,6 +1759,132 @@ register struct re_guts *g;
}
/*
+ - computejumps - compute char jumps for BM scan
+ == static void computejumps(register struct parse *p, register struct re_guts *g);
+ *
+ * This algorithm assumes g->must exists and is has size greater than
+ * zero. It's based on the algorithm found on Computer Algorithms by
+ * Sara Baase.
+ *
+ * A char jump is the number of characters one needs to jump based on
+ * the value of the character from the text that was mismatched.
+ */
+static void
+computejumps(p, g)
+struct parse *p;
+struct re_guts *g;
+{
+ int ch;
+ int mindex;
+
+ /* Avoid making errors worse */
+ if (p->error != 0)
+ return;
+
+ g->charjump = malloc(256 * sizeof(int));
+ if (g->charjump == NULL) /* Not a fatal error */
+ return;
+
+ /* If the character does not exist in the pattern, the jump
+ * is equal to the number of characters in the pattern.
+ */
+ for (ch = 0; ch < 256; ch++)
+ g->charjump[ch] = g->mlen;
+
+ /* If the character does exist, compute the jump that would
+ * take us to the last character in the pattern equal to it
+ * (notice that we match right to left, so that last character
+ * is the first one that would be matched).
+ */
+ for (mindex = 0; mindex < g->mlen; mindex++)
+ g->charjump[g->must[mindex]] = g->mlen - mindex - 1;
+}
+
+/*
+ - computematchjumps - compute match jumps for BM scan
+ == static void computematchjumps(register struct parse *p, register struct re_guts *g);
+ *
+ * This algorithm assumes g->must exists and is has size greater than
+ * zero. It's based on the algorithm found on Computer Algorithms by
+ * Sara Baase.
+ *
+ * A match jump is the number of characters one needs to advance based
+ * on the already-matched suffix.
+ * Notice that all values here are minus (g->mlen-1), because of the way
+ * the search algorithm works.
+ */
+static void
+computematchjumps(p, g)
+struct parse *p;
+struct re_guts *g;
+{
+ int mindex; /* General "must" iterator */
+ int suffix; /* Keeps track of matching suffix */
+ int ssuffix; /* Keeps track of suffixes' suffix */
+ int* pmatches; /* pmatches[k] points to the next i
+ * such that i+1...mlen is a substring
+ * of k+1...k+mlen-i-1
+ */
+
+ /* Avoid making errors worse */
+ if (p->error != 0)
+ return;
+
+ pmatches = malloc(g->mlen * sizeof(unsigned int));
+ if (pmatches == NULL) {
+ g->matchjump = NULL;
+ return;
+ }
+
+ g->matchjump = malloc(g->mlen * sizeof(unsigned int));
+ if (g->matchjump == NULL) /* Not a fatal error */
+ return;
+
+ /* Set maximum possible jump for each character in the pattern */
+ for (mindex = 0; mindex < g->mlen; mindex++)
+ g->matchjump[mindex] = 2*g->mlen - mindex - 1;
+
+ /* Compute pmatches[] */
+ for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0;
+ mindex--, suffix--) {
+ pmatches[mindex] = suffix;
+
+ /* If a mismatch is found, interrupting the substring,
+ * compute the matchjump for that position. If no
+ * mismatch is found, then a text substring mismatched
+ * against the suffix will also mismatch against the
+ * substring.
+ */
+ while (suffix < g->mlen
+ && g->must[mindex] != g->must[suffix]) {
+ g->matchjump[suffix] = MIN(g->matchjump[suffix],
+ g->mlen - mindex - 1);
+ suffix = pmatches[suffix];
+ }
+ }
+
+ /* Compute the matchjump up to the last substring found to jump
+ * to the beginning of the largest must pattern prefix matching
+ * it's own suffix.
+ */
+ for (mindex = 0; mindex <= suffix; mindex++)
+ g->matchjump[mindex] = MIN(g->matchjump[mindex],
+ g->mlen + suffix - mindex);
+
+ ssuffix = pmatches[suffix];
+ while(suffix < g->mlen) {
+ while(suffix <= ssuffix) {
+ g->matchjump[suffix] = MIN(g->matchjump[suffix],
+ g->mlen + ssuffix - suffix);
+ suffix++;
+ }
+ ssuffix = pmatches[ssuffix];
+ }
+
+ free(pmatches);
+}
+
+/*
- pluscount - count + nesting
== static sopno pluscount(register struct parse *p, register struct re_guts *g);
*/
diff --git a/lib/libc/regex/regex2.h b/lib/libc/regex/regex2.h
index cd7b962..af9cc58 100644
--- a/lib/libc/regex/regex2.h
+++ b/lib/libc/regex/regex2.h
@@ -35,6 +35,8 @@
* SUCH DAMAGE.
*
* @(#)regex2.h 8.4 (Berkeley) 3/20/94
+ *
+ * $FreeBSD$
*/
/*
@@ -160,6 +162,8 @@ struct re_guts {
int ncategories; /* how many character categories */
cat_t *categories; /* ->catspace[-CHAR_MIN] */
char *must; /* match must contain this string */
+ int *charjump; /* Boyer-Moore char jump table */
+ int *matchjump; /* Boyer-Moore match jump table */
int mlen; /* length of must */
size_t nsub; /* copy of re_nsub */
int backrefs; /* does it use back references? */
OpenPOWER on IntegriCloud