summaryrefslogtreecommitdiffstats
path: root/lib/libc/regex/regcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libc/regex/regcomp.c')
-rw-r--r--lib/libc/regex/regcomp.c144
1 files changed, 144 insertions, 0 deletions
diff --git a/lib/libc/regex/regcomp.c b/lib/libc/regex/regcomp.c
index a4e9e1d..f71d63c 100644
--- a/lib/libc/regex/regcomp.c
+++ b/lib/libc/regex/regcomp.c
@@ -35,6 +35,8 @@
* SUCH DAMAGE.
*
* @(#)regcomp.c 8.5 (Berkeley) 3/20/94
+ *
+ * $FreeBSD$
*/
#if defined(LIBC_SCCS) && !defined(lint)
@@ -122,6 +124,8 @@ static void dofwd __P((struct parse *p, sopno pos, sop value));
static void enlarge __P((struct parse *p, sopno size));
static void stripsnug __P((struct parse *p, struct re_guts *g));
static void findmust __P((struct parse *p, struct re_guts *g));
+static void computejumps __P((struct parse *p, struct re_guts *g));
+static void computematchjumps __P((struct parse *p, struct re_guts *g));
static sopno pluscount __P((struct parse *p, struct re_guts *g));
#ifdef __cplusplus
@@ -167,6 +171,9 @@ static int never = 0; /* for use in asserts; shuts lint up */
#define never 0 /* some <assert.h>s have bugs too */
#endif
+/* Macro used by computejump()/computematchjump() */
+#define MIN(a,b) ((a)<(b)?(a):(b))
+
/*
- regcomp - interface for parser and compilation
= extern int regcomp(regex_t *, const char *, int);
@@ -262,6 +269,17 @@ int cflags;
categorize(p, g);
stripsnug(p, g);
findmust(p, g);
+ /* only use Boyer-Moore algorithm if the pattern is bigger
+ * than three characters
+ */
+ if(g->mlen > 3) {
+ computejumps(p, g);
+ computematchjumps(p, g);
+ if(g->matchjump == NULL) {
+ free(g->charjump);
+ g->charjump = NULL;
+ }
+ }
g->nplus = pluscount(p, g);
g->magic = MAGIC2;
preg->re_nsub = g->nsub;
@@ -1741,6 +1759,132 @@ register struct re_guts *g;
}
/*
+ - computejumps - compute char jumps for BM scan
+ == static void computejumps(register struct parse *p, register struct re_guts *g);
+ *
+ * This algorithm assumes g->must exists and is has size greater than
+ * zero. It's based on the algorithm found on Computer Algorithms by
+ * Sara Baase.
+ *
+ * A char jump is the number of characters one needs to jump based on
+ * the value of the character from the text that was mismatched.
+ */
+static void
+computejumps(p, g)
+struct parse *p;
+struct re_guts *g;
+{
+ int ch;
+ int mindex;
+
+ /* Avoid making errors worse */
+ if (p->error != 0)
+ return;
+
+ g->charjump = malloc(256 * sizeof(int));
+ if (g->charjump == NULL) /* Not a fatal error */
+ return;
+
+ /* If the character does not exist in the pattern, the jump
+ * is equal to the number of characters in the pattern.
+ */
+ for (ch = 0; ch < 256; ch++)
+ g->charjump[ch] = g->mlen;
+
+ /* If the character does exist, compute the jump that would
+ * take us to the last character in the pattern equal to it
+ * (notice that we match right to left, so that last character
+ * is the first one that would be matched).
+ */
+ for (mindex = 0; mindex < g->mlen; mindex++)
+ g->charjump[g->must[mindex]] = g->mlen - mindex - 1;
+}
+
+/*
+ - computematchjumps - compute match jumps for BM scan
+ == static void computematchjumps(register struct parse *p, register struct re_guts *g);
+ *
+ * This algorithm assumes g->must exists and is has size greater than
+ * zero. It's based on the algorithm found on Computer Algorithms by
+ * Sara Baase.
+ *
+ * A match jump is the number of characters one needs to advance based
+ * on the already-matched suffix.
+ * Notice that all values here are minus (g->mlen-1), because of the way
+ * the search algorithm works.
+ */
+static void
+computematchjumps(p, g)
+struct parse *p;
+struct re_guts *g;
+{
+ int mindex; /* General "must" iterator */
+ int suffix; /* Keeps track of matching suffix */
+ int ssuffix; /* Keeps track of suffixes' suffix */
+ int* pmatches; /* pmatches[k] points to the next i
+ * such that i+1...mlen is a substring
+ * of k+1...k+mlen-i-1
+ */
+
+ /* Avoid making errors worse */
+ if (p->error != 0)
+ return;
+
+ pmatches = malloc(g->mlen * sizeof(unsigned int));
+ if (pmatches == NULL) {
+ g->matchjump = NULL;
+ return;
+ }
+
+ g->matchjump = malloc(g->mlen * sizeof(unsigned int));
+ if (g->matchjump == NULL) /* Not a fatal error */
+ return;
+
+ /* Set maximum possible jump for each character in the pattern */
+ for (mindex = 0; mindex < g->mlen; mindex++)
+ g->matchjump[mindex] = 2*g->mlen - mindex - 1;
+
+ /* Compute pmatches[] */
+ for (mindex = g->mlen - 1, suffix = g->mlen; mindex >= 0;
+ mindex--, suffix--) {
+ pmatches[mindex] = suffix;
+
+ /* If a mismatch is found, interrupting the substring,
+ * compute the matchjump for that position. If no
+ * mismatch is found, then a text substring mismatched
+ * against the suffix will also mismatch against the
+ * substring.
+ */
+ while (suffix < g->mlen
+ && g->must[mindex] != g->must[suffix]) {
+ g->matchjump[suffix] = MIN(g->matchjump[suffix],
+ g->mlen - mindex - 1);
+ suffix = pmatches[suffix];
+ }
+ }
+
+ /* Compute the matchjump up to the last substring found to jump
+ * to the beginning of the largest must pattern prefix matching
+ * it's own suffix.
+ */
+ for (mindex = 0; mindex <= suffix; mindex++)
+ g->matchjump[mindex] = MIN(g->matchjump[mindex],
+ g->mlen + suffix - mindex);
+
+ ssuffix = pmatches[suffix];
+ while(suffix < g->mlen) {
+ while(suffix <= ssuffix) {
+ g->matchjump[suffix] = MIN(g->matchjump[suffix],
+ g->mlen + ssuffix - suffix);
+ suffix++;
+ }
+ ssuffix = pmatches[ssuffix];
+ }
+
+ free(pmatches);
+}
+
+/*
- pluscount - count + nesting
== static sopno pluscount(register struct parse *p, register struct re_guts *g);
*/
OpenPOWER on IntegriCloud