diff options
Diffstat (limited to 'lib/libc/regex/regcomp.c')
-rw-r--r-- | lib/libc/regex/regcomp.c | 157 |
1 files changed, 115 insertions, 42 deletions
diff --git a/lib/libc/regex/regcomp.c b/lib/libc/regex/regcomp.c index feb24ec..98d3755 100644 --- a/lib/libc/regex/regcomp.c +++ b/lib/libc/regex/regcomp.c @@ -49,6 +49,8 @@ static char sccsid[] = "@(#)regcomp.c 8.5 (Berkeley) 3/20/94"; #include <stdlib.h> #include <regex.h> +#include "collate.h" + #include "utils.h" #include "regex2.h" @@ -103,9 +105,11 @@ static int freezeset __P((struct parse *p, cset *cs)); static int firstch __P((struct parse *p, cset *cs)); static int nch __P((struct parse *p, cset *cs)); static void mcadd __P((struct parse *p, cset *cs, char *cp)); +#if used static void mcsub __P((cset *cs, char *cp)); static int mcin __P((cset *cs, char *cp)); static char *mcfind __P((cset *cs, char *cp)); +#endif static void mcinvert __P((struct parse *p, cset *cs)); static void mccase __P((struct parse *p, cset *cs)); static int isinsets __P((struct re_guts *g, int c)); @@ -295,7 +299,7 @@ int stop; /* character this ERE should end at */ conc = HERE(); while (MORE() && (c = PEEK()) != '|' && c != stop) p_ere_exp(p); - REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ + (void)REQUIRE(HERE() != conc, REG_EMPTY); /* require nonempty */ if (!EAT('|')) break; /* NOTE BREAK OUT */ @@ -342,7 +346,7 @@ register struct parse *p; pos = HERE(); switch (c) { case '(': - REQUIRE(MORE(), REG_EPAREN); + (void)REQUIRE(MORE(), REG_EPAREN); p->g->nsub++; subno = p->g->nsub; if (subno < NPAREN) @@ -355,7 +359,7 @@ register struct parse *p; assert(p->pend[subno] != 0); } EMIT(ORPAREN, subno); - MUSTEAT(')', REG_EPAREN); + (void)MUSTEAT(')', REG_EPAREN); break; #ifndef POSIX_MISTAKE case ')': /* happens only if no current unmatched ( */ @@ -398,12 +402,12 @@ register struct parse *p; p_bracket(p); break; case '\\': - REQUIRE(MORE(), REG_EESCAPE); + (void)REQUIRE(MORE(), REG_EESCAPE); c = GETNEXT(); ordinary(p, c); break; case '{': /* okay as ordinary except if digit follows */ - REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); + (void)REQUIRE(!MORE() || !isdigit(PEEK()), REG_BADRPT); /* FALLTHROUGH */ default: ordinary(p, c); @@ -419,7 +423,7 @@ register struct parse *p; return; /* no repetition, we're done */ NEXT(); - REQUIRE(!wascaret, REG_BADRPT); + (void)REQUIRE(!wascaret, REG_BADRPT); switch (c) { case '*': /* implemented as +? */ /* this case does not require the (y|) trick, noKLUDGE */ @@ -446,7 +450,7 @@ register struct parse *p; if (EAT(',')) { if (isdigit(PEEK())) { count2 = p_count(p); - REQUIRE(count <= count2, REG_BADBR); + (void)REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ count2 = INFINITY; } else /* just a single number */ @@ -455,7 +459,7 @@ register struct parse *p; if (!EAT('}')) { /* error heuristics */ while (MORE() && PEEK() != '}') NEXT(); - REQUIRE(MORE(), REG_EBRACE); + (void)REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } break; @@ -478,7 +482,7 @@ static void p_str(p) register struct parse *p; { - REQUIRE(MORE(), REG_EMPTY); + (void)REQUIRE(MORE(), REG_EMPTY); while (MORE()) ordinary(p, GETNEXT()); } @@ -521,7 +525,7 @@ register int end2; /* second terminating character */ p->g->neol++; } - REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ + (void)REQUIRE(HERE() != start, REG_EMPTY); /* require nonempty */ } /* @@ -546,7 +550,7 @@ int starordinary; /* is a leading * an ordinary character? */ assert(MORE()); /* caller should have ensured this */ c = GETNEXT(); if (c == '\\') { - REQUIRE(MORE(), REG_EESCAPE); + (void)REQUIRE(MORE(), REG_EESCAPE); c = BACKSL | (unsigned char)GETNEXT(); } switch (c) { @@ -576,7 +580,7 @@ int starordinary; /* is a leading * an ordinary character? */ assert(p->pend[subno] != 0); } EMIT(ORPAREN, subno); - REQUIRE(EATTWO('\\', ')'), REG_EPAREN); + (void)REQUIRE(EATTWO('\\', ')'), REG_EPAREN); break; case BACKSL|')': /* should not get here -- must be user */ case BACKSL|'}': @@ -606,7 +610,7 @@ int starordinary; /* is a leading * an ordinary character? */ p->g->backrefs = 1; break; case '*': - REQUIRE(starordinary, REG_BADRPT); + (void)REQUIRE(starordinary, REG_BADRPT); /* FALLTHROUGH */ default: ordinary(p, c &~ BACKSL); @@ -624,7 +628,7 @@ int starordinary; /* is a leading * an ordinary character? */ if (EAT(',')) { if (MORE() && isdigit(PEEK())) { count2 = p_count(p); - REQUIRE(count <= count2, REG_BADBR); + (void)REQUIRE(count <= count2, REG_BADBR); } else /* single number with comma */ count2 = INFINITY; } else /* just a single number */ @@ -633,7 +637,7 @@ int starordinary; /* is a leading * an ordinary character? */ if (!EATTWO('\\', '}')) { /* error heuristics */ while (MORE() && !SEETWO('\\', '}')) NEXT(); - REQUIRE(MORE(), REG_EBRACE); + (void)REQUIRE(MORE(), REG_EBRACE); SETERROR(REG_BADBR); } } else if (c == (unsigned char)'$') /* $ (but not \$) ends it */ @@ -658,7 +662,7 @@ register struct parse *p; ndigits++; } - REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); + (void)REQUIRE(ndigits > 0 && count <= DUPMAX, REG_BADBR); return(count); } @@ -673,7 +677,6 @@ static void p_bracket(p) register struct parse *p; { - register char c; register cset *cs = allocset(p); register int invert = 0; @@ -699,7 +702,7 @@ register struct parse *p; p_b_term(p, cs); if (EAT('-')) CHadd(cs, '-'); - MUSTEAT(']', REG_EBRACK); + (void)MUSTEAT(']', REG_EBRACK); if (p->error != 0) /* don't mess things up further */ return; @@ -770,21 +773,21 @@ register cset *cs; switch (c) { case ':': /* character class */ NEXT2(); - REQUIRE(MORE(), REG_EBRACK); + (void)REQUIRE(MORE(), REG_EBRACK); c = PEEK(); - REQUIRE(c != '-' && c != ']', REG_ECTYPE); + (void)REQUIRE(c != '-' && c != ']', REG_ECTYPE); p_b_cclass(p, cs); - REQUIRE(MORE(), REG_EBRACK); - REQUIRE(EATTWO(':', ']'), REG_ECTYPE); + (void)REQUIRE(MORE(), REG_EBRACK); + (void)REQUIRE(EATTWO(':', ']'), REG_ECTYPE); break; case '=': /* equivalence class */ NEXT2(); - REQUIRE(MORE(), REG_EBRACK); + (void)REQUIRE(MORE(), REG_EBRACK); c = PEEK(); - REQUIRE(c != '-' && c != ']', REG_ECOLLATE); + (void)REQUIRE(c != '-' && c != ']', REG_ECOLLATE); p_b_eclass(p, cs); - REQUIRE(MORE(), REG_EBRACK); - REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); + (void)REQUIRE(MORE(), REG_EBRACK); + (void)REQUIRE(EATTWO('=', ']'), REG_ECOLLATE); break; default: /* symbol, ordinary character, or range */ /* xxx revision needed for multichar stuff */ @@ -798,10 +801,17 @@ register cset *cs; finish = p_b_symbol(p); } else finish = start; -/* xxx what about signed chars here... */ - REQUIRE(start <= finish, REG_ERANGE); - for (i = start; i <= finish; i++) - CHadd(cs, i); + if (start == finish) + CHadd(cs, start); + else { + (void)REQUIRE(__collate_range_cmp(start, finish) <= 0, REG_ERANGE); + for (i = CHAR_MIN; i <= CHAR_MAX; i++) { + if ( __collate_range_cmp(start, i) <= 0 + && __collate_range_cmp(i, finish) <= 0 + ) + CHadd(cs, i); + } + } break; } } @@ -815,13 +825,12 @@ p_b_cclass(p, cs) register struct parse *p; register cset *cs; { + register int c; register char *sp = p->next; register struct cclass *cp; register size_t len; - register char *u; - register char c; - while (MORE() && isalpha(PEEK())) + while (MORE() && isalpha((uch)PEEK())) NEXT(); len = p->next - sp; for (cp = cclasses; cp->name != NULL; cp++) @@ -833,11 +842,72 @@ register cset *cs; return; } - u = cp->chars; - while ((c = *u++) != '\0') - CHadd(cs, c); + switch (cp->fidx) { + case CALNUM: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isalnum((uch)c)) + CHadd(cs, c); + break; + case CALPHA: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isalpha((uch)c)) + CHadd(cs, c); + break; + case CBLANK: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isblank((uch)c)) + CHadd(cs, c); + break; + case CCNTRL: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (iscntrl((uch)c)) + CHadd(cs, c); + break; + case CDIGIT: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isdigit((uch)c)) + CHadd(cs, c); + break; + case CGRAPH: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isgraph((uch)c)) + CHadd(cs, c); + break; + case CLOWER: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (islower((uch)c)) + CHadd(cs, c); + break; + case CPRINT: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isprint((uch)c)) + CHadd(cs, c); + break; + case CPUNCT: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (ispunct((uch)c)) + CHadd(cs, c); + break; + case CSPACE: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isspace((uch)c)) + CHadd(cs, c); + break; + case CUPPER: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isupper((uch)c)) + CHadd(cs, c); + break; + case CXDIGIT: + for (c = CHAR_MIN; c <= CHAR_MAX; c++) + if (isxdigit((uch)c)) + CHadd(cs, c); + break; + } +#if 0 for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) MCadd(p, cs, u); +#endif } /* @@ -867,13 +937,13 @@ register struct parse *p; { register char value; - REQUIRE(MORE(), REG_EBRACK); + (void)REQUIRE(MORE(), REG_EBRACK); if (!EATTWO('[', '.')) return(GETNEXT()); /* collating symbol */ value = p_b_coll_elem(p, '.'); - REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); + (void)REQUIRE(EATTWO('.', ']'), REG_ECOLLATE); return(value); } @@ -889,7 +959,6 @@ int endc; /* name ended by endc,']' */ register char *sp = p->next; register struct cname *cp; register int len; - register char c; while (MORE() && !SEETWO(endc, ']')) NEXT(); @@ -915,6 +984,7 @@ static char /* if no counterpart, return ch */ othercase(ch) int ch; { + ch = (unsigned char)ch; assert(isalpha(ch)); if (isupper(ch)) return(tolower(ch)); @@ -939,6 +1009,7 @@ int ch; register char *oldend = p->end; char bracket[3]; + ch = (unsigned char)ch; assert(othercase(ch) != ch); /* p_bracket() would recurse */ p->next = bracket; p->end = bracket+2; @@ -962,7 +1033,7 @@ register int ch; { register cat_t *cap = p->g->categories; - if ((p->g->cflags®_ICASE) && isalpha(ch) && othercase(ch) != ch) + if ((p->g->cflags®_ICASE) && isalpha((unsigned char)ch) && othercase(ch) != ch) bothcases(p, ch); else { EMIT(OCHAR, (unsigned char)ch); @@ -1174,7 +1245,7 @@ freezeset(p, cs) register struct parse *p; register cset *cs; { - register uch h = cs->hash; + register short h = cs->hash; register int i; register cset *top = &p->g->sets[p->g->ncsets]; register cset *cs2; @@ -1213,7 +1284,7 @@ register cset *cs; for (i = 0; i < css; i++) if (CHIN(cs, i)) - return((char)i); + return((unsigned char)i); assert(never); return(0); /* arbitrary */ } @@ -1264,6 +1335,7 @@ register char *cp; cs->multis[cs->smultis - 1] = '\0'; } +#if used /* - mcsub - subtract a collating element from a cset == static void mcsub(register cset *cs, register char *cp); @@ -1321,6 +1393,7 @@ register char *cp; return(p); return(NULL); } +#endif /* - mcinvert - invert the list of collating elements in a cset |