summaryrefslogtreecommitdiffstats
path: root/lib/libc/regex
diff options
context:
space:
mode:
authortjr <tjr@FreeBSD.org>2004-07-11 05:58:31 +0000
committertjr <tjr@FreeBSD.org>2004-07-11 05:58:31 +0000
commitdb66ea27a00160961db238cc97c52107e5bc0e83 (patch)
tree03ee4b5f714c812347f96fe7409d21cb197a628a /lib/libc/regex
parent5fd437f0d8c2d245eacc18886dab3b6f4462cf07 (diff)
downloadFreeBSD-src-db66ea27a00160961db238cc97c52107e5bc0e83.zip
FreeBSD-src-db66ea27a00160961db238cc97c52107e5bc0e83.tar.gz
Remove incomplete support for multi-character collating elements. Remove
unused character category calculations.
Diffstat (limited to 'lib/libc/regex')
-rw-r--r--lib/libc/regex/regcomp.c253
-rw-r--r--lib/libc/regex/regex2.h17
2 files changed, 8 insertions, 262 deletions
diff --git a/lib/libc/regex/regcomp.c b/lib/libc/regex/regcomp.c
index e342fda..95a2874 100644
--- a/lib/libc/regex/regcomp.c
+++ b/lib/libc/regex/regcomp.c
@@ -106,17 +106,6 @@ static void freeset(struct parse *p, cset *cs);
static int freezeset(struct parse *p, cset *cs);
static int firstch(struct parse *p, cset *cs);
static int nch(struct parse *p, cset *cs);
-static void mcadd(struct parse *p, cset *cs, char *cp) __unused;
-#if used
-static void mcsub(cset *cs, char *cp);
-static int mcin(cset *cs, char *cp);
-static char *mcfind(cset *cs, char *cp);
-#endif
-static void mcinvert(struct parse *p, cset *cs);
-static void mccase(struct parse *p, cset *cs);
-static int isinsets(struct re_guts *g, int c);
-static int samesets(struct re_guts *g, int c1, int c2);
-static void categorize(struct parse *p, struct re_guts *g);
static sopno dupl(struct parse *p, sopno start, sopno finish);
static void doemit(struct parse *p, sop op, size_t opnd);
static void doinsert(struct parse *p, sop op, size_t opnd, sopno pos);
@@ -124,7 +113,7 @@ static void dofwd(struct parse *p, sopno pos, sop value);
static void enlarge(struct parse *p, sopno size);
static void stripsnug(struct parse *p, struct re_guts *g);
static void findmust(struct parse *p, struct re_guts *g);
-static int altoffset(sop *scan, int offset, int mccs);
+static int altoffset(sop *scan, int offset);
static void computejumps(struct parse *p, struct re_guts *g);
static void computematchjumps(struct parse *p, struct re_guts *g);
static sopno pluscount(struct parse *p, struct re_guts *g);
@@ -216,8 +205,7 @@ int cflags;
len = strlen((char *)pattern);
/* do the mallocs early so failure handling is easy */
- g = (struct re_guts *)malloc(sizeof(struct re_guts) +
- (NC-1)*sizeof(cat_t));
+ g = (struct re_guts *)malloc(sizeof(struct re_guts));
if (g == NULL)
return(REG_ESPACE);
p->ssize = len/(size_t)2*(size_t)3 + (size_t)1; /* ugh */
@@ -252,9 +240,6 @@ int cflags;
g->matchjump = NULL;
g->mlen = 0;
g->nsub = 0;
- g->ncategories = 1; /* category 0 is "everything else" */
- g->categories = &g->catspace[-(CHAR_MIN)];
- (void) memset((char *)g->catspace, 0, NC*sizeof(cat_t));
g->backrefs = 0;
/* do it */
@@ -270,7 +255,6 @@ int cflags;
g->laststate = THERE();
/* tidy up loose ends and fill things in */
- categorize(p, g);
stripsnug(p, g);
findmust(p, g);
/* only use Boyer-Moore algorithm if the pattern is bigger
@@ -516,9 +500,7 @@ struct parse *p;
* Giving end1 as OUT essentially eliminates the end1/end2 check.
*
* This implementation is a bit of a kludge, in that a trailing $ is first
- * taken as an ordinary character and then revised to be an anchor. The
- * only undesirable side effect is that '$' gets included as a character
- * category in such cases. This is fairly harmless; not worth fixing.
+ * taken as an ordinary character and then revised to be an anchor.
* The amount of lookahead needed to avoid this kludge is excessive.
*/
static void
@@ -739,8 +721,6 @@ struct parse *p;
if (ci != i)
CHadd(cs, ci);
}
- if (cs->multis != NULL)
- mccase(p, cs);
}
if (invert) {
int i;
@@ -752,12 +732,8 @@ struct parse *p;
CHadd(cs, i);
if (p->g->cflags&REG_NEWLINE)
CHsub(cs, '\n');
- if (cs->multis != NULL)
- mcinvert(p, cs);
}
- assert(cs->multis == NULL); /* xxx */
-
if (nch(p, cs) == 1) { /* optimize singleton sets */
ordinary(p, firstch(p, cs));
freeset(p, cs);
@@ -812,7 +788,6 @@ cset *cs;
(void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE);
break;
default: /* symbol, ordinary character, or range */
-/* xxx revision needed for multichar stuff */
start = p_b_symbol(p);
if (SEE('-') && MORE2() && PEEK2() != ']') {
/* range */
@@ -932,10 +907,6 @@ cset *cs;
CHadd(cs, c);
break;
}
-#if 0
- for (u = cp->multis; *u != '\0'; u += strlen(u) + 1)
- MCadd(p, cs, u);
-#endif
}
/*
@@ -1059,15 +1030,11 @@ ordinary(p, ch)
struct parse *p;
int ch;
{
- cat_t *cap = p->g->categories;
if ((p->g->cflags&REG_ICASE) && isalpha((uch)ch) && othercase(ch) != ch)
bothcases(p, ch);
- else {
+ else
EMIT(OCHAR, (uch)ch);
- if (cap[ch] == 0)
- cap[ch] = p->g->ncategories++;
- }
}
/*
@@ -1233,8 +1200,6 @@ struct parse *p;
cs->ptr = p->g->setbits + css*((no)/CHAR_BIT);
cs->mask = 1 << ((no) % CHAR_BIT);
cs->hash = 0;
- cs->smultis = 0;
- cs->multis = NULL;
return(cs);
}
@@ -1337,193 +1302,6 @@ cset *cs;
}
/*
- - mcadd - add a collating element to a cset
- == static void mcadd(struct parse *p, cset *cs, \
- == char *cp);
- */
-static void
-mcadd(p, cs, cp)
-struct parse *p;
-cset *cs;
-char *cp;
-{
- size_t oldend = cs->smultis;
-
- cs->smultis += strlen(cp) + 1;
- if (cs->multis == NULL)
- cs->multis = malloc(cs->smultis);
- else
- cs->multis = reallocf(cs->multis, cs->smultis);
- if (cs->multis == NULL) {
- SETERROR(REG_ESPACE);
- return;
- }
-
- (void) strcpy(cs->multis + oldend - 1, cp);
- cs->multis[cs->smultis - 1] = '\0';
-}
-
-#if used
-/*
- - mcsub - subtract a collating element from a cset
- == static void mcsub(cset *cs, char *cp);
- */
-static void
-mcsub(cs, cp)
-cset *cs;
-char *cp;
-{
- char *fp = mcfind(cs, cp);
- size_t len = strlen(fp);
-
- assert(fp != NULL);
- (void) memmove(fp, fp + len + 1,
- cs->smultis - (fp + len + 1 - cs->multis));
- cs->smultis -= len;
-
- if (cs->smultis == 0) {
- free(cs->multis);
- cs->multis = NULL;
- return;
- }
-
- cs->multis = reallocf(cs->multis, cs->smultis);
- assert(cs->multis != NULL);
-}
-
-/*
- - mcin - is a collating element in a cset?
- == static int mcin(cset *cs, char *cp);
- */
-static int
-mcin(cs, cp)
-cset *cs;
-char *cp;
-{
- return(mcfind(cs, cp) != NULL);
-}
-
-/*
- - mcfind - find a collating element in a cset
- == static char *mcfind(cset *cs, char *cp);
- */
-static char *
-mcfind(cs, cp)
-cset *cs;
-char *cp;
-{
- char *p;
-
- if (cs->multis == NULL)
- return(NULL);
- for (p = cs->multis; *p != '\0'; p += strlen(p) + 1)
- if (strcmp(cp, p) == 0)
- return(p);
- return(NULL);
-}
-#endif
-
-/*
- - mcinvert - invert the list of collating elements in a cset
- == static void mcinvert(struct parse *p, cset *cs);
- *
- * This would have to know the set of possibilities. Implementation
- * is deferred.
- */
-static void
-mcinvert(p, cs)
-struct parse *p;
-cset *cs;
-{
- assert(cs->multis == NULL); /* xxx */
-}
-
-/*
- - mccase - add case counterparts of the list of collating elements in a cset
- == static void mccase(struct parse *p, cset *cs);
- *
- * This would have to know the set of possibilities. Implementation
- * is deferred.
- */
-static void
-mccase(p, cs)
-struct parse *p;
-cset *cs;
-{
- assert(cs->multis == NULL); /* xxx */
-}
-
-/*
- - isinsets - is this character in any sets?
- == static int isinsets(struct re_guts *g, int c);
- */
-static int /* predicate */
-isinsets(g, c)
-struct re_guts *g;
-int c;
-{
- uch *col;
- int i;
- int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
- unsigned uc = (uch)c;
-
- for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
- if (col[uc] != 0)
- return(1);
- return(0);
-}
-
-/*
- - samesets - are these two characters in exactly the same sets?
- == static int samesets(struct re_guts *g, int c1, int c2);
- */
-static int /* predicate */
-samesets(g, c1, c2)
-struct re_guts *g;
-int c1;
-int c2;
-{
- uch *col;
- int i;
- int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT;
- unsigned uc1 = (uch)c1;
- unsigned uc2 = (uch)c2;
-
- for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize)
- if (col[uc1] != col[uc2])
- return(0);
- return(1);
-}
-
-/*
- - categorize - sort out character categories
- == static void categorize(struct parse *p, struct re_guts *g);
- */
-static void
-categorize(p, g)
-struct parse *p;
-struct re_guts *g;
-{
- cat_t *cats = g->categories;
- int c;
- int c2;
- cat_t cat;
-
- /* avoid making error situations worse */
- if (p->error != 0)
- return;
-
- for (c = CHAR_MIN; c <= CHAR_MAX; c++)
- if (cats[c] == 0 && isinsets(g, c)) {
- cat = g->ncategories++;
- cats[c] = cat;
- for (c2 = c+1; c2 <= CHAR_MAX; c2++)
- if (cats[c2] == 0 && samesets(g, c, c2))
- cats[c2] = cat;
- }
-}
-
-/*
- dupl - emit a duplicate of a bunch of sops
== static sopno dupl(struct parse *p, sopno start, sopno finish);
*/
@@ -1698,18 +1476,11 @@ struct re_guts *g;
char *cp;
sopno i;
int offset;
- int cs, mccs;
/* avoid making error situations worse */
if (p->error != 0)
return;
- /* Find out if we can handle OANYOF or not */
- mccs = 0;
- for (cs = 0; cs < g->ncsets; cs++)
- if (g->sets[cs].multis != NULL)
- mccs = 1;
-
/* find the longest OCHAR sequence in strip */
newlen = 0;
offset = 0;
@@ -1729,7 +1500,7 @@ struct re_guts *g;
break;
case OQUEST_: /* things that must be skipped */
case OCH_:
- offset = altoffset(scan, offset, mccs);
+ offset = altoffset(scan, offset);
scan--;
do {
scan += OPND(s);
@@ -1797,11 +1568,6 @@ struct re_guts *g;
if (offset > -1)
offset++;
newlen = 0;
- /* And, now, if we found out we can't deal with
- * it, make offset = -1.
- */
- if (mccs)
- offset = -1;
break;
default:
/* Anything here makes it impossible or too hard
@@ -1849,16 +1615,15 @@ struct re_guts *g;
/*
- altoffset - choose biggest offset among multiple choices
- == static int altoffset(sop *scan, int offset, int mccs);
+ == static int altoffset(sop *scan, int offset);
*
* Compute, recursively if necessary, the largest offset among multiple
* re paths.
*/
static int
-altoffset(scan, offset, mccs)
+altoffset(scan, offset)
sop *scan;
int offset;
-int mccs;
{
int largest;
int try;
@@ -1880,7 +1645,7 @@ int mccs;
break;
case OQUEST_:
case OCH_:
- try = altoffset(scan, try, mccs);
+ try = altoffset(scan, try);
if (try == -1)
return -1;
scan--;
@@ -1897,8 +1662,6 @@ int mccs;
scan++;
break;
case OANYOF:
- if (mccs)
- return -1;
case OCHAR:
case OANY:
try++;
diff --git a/lib/libc/regex/regex2.h b/lib/libc/regex/regex2.h
index 8c645ce..4678824 100644
--- a/lib/libc/regex/regex2.h
+++ b/lib/libc/regex/regex2.h
@@ -113,29 +113,16 @@ typedef long sopno;
* The individual set therefore has both a pointer to the byte vector
* and a mask to pick out the relevant bit of each byte. A hash code
* simplifies testing whether two sets could be identical.
- *
- * This will get trickier for multicharacter collating elements. As
- * preliminary hooks for dealing with such things, we also carry along
- * a string of multi-character elements, and decide the size of the
- * vectors at run time.
*/
typedef struct {
uch *ptr; /* -> uch [csetsize] */
uch mask; /* bit within array */
short hash; /* hash code */
- size_t smultis;
- char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */
} cset;
/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */
#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (uch)(c))
#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (uch)(c))
#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask)
-#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */
-#define MCsub(p, cs, cp) mcsub(p, cs, cp)
-#define MCin(p, cs, cp) mcin(p, cs, cp)
-
-/* stuff for character categories */
-typedef unsigned char cat_t;
/*
* main compiled-expression structure
@@ -158,8 +145,6 @@ struct re_guts {
# define BAD 04 /* something wrong */
int nbol; /* number of ^ used */
int neol; /* number of $ used */
- int ncategories; /* how many character categories */
- cat_t *categories; /* ->catspace[-CHAR_MIN] */
char *must; /* match must contain this string */
int moffset; /* latest point at which must may be located */
int *charjump; /* Boyer-Moore char jump table */
@@ -168,8 +153,6 @@ struct re_guts {
size_t nsub; /* copy of re_nsub */
int backrefs; /* does it use back references? */
sopno nplus; /* how deep does it nest +s? */
- /* catspace must be last */
- cat_t catspace[1]; /* actually [NC] */
};
/* misc utilities */
OpenPOWER on IntegriCloud