From c55fd8f4ccc405c7f10da674a10d06bb63267295 Mon Sep 17 00:00:00 2001 From: ru Date: Mon, 31 Jan 2000 13:28:08 +0000 Subject: Merge FreeBSD changes into 2.4d. FreeBSD changes OBE'ed by 2.4d: * rev 1.5 - use collate for alpha character ranges. --- gnu/usr.bin/grep/dfa.c | 285 ++++++++++++++++--------------------------------- 1 file changed, 93 insertions(+), 192 deletions(-) (limited to 'gnu/usr.bin/grep') diff --git a/gnu/usr.bin/grep/dfa.c b/gnu/usr.bin/grep/dfa.c index 1fcf549..76dd0b2 100644 --- a/gnu/usr.bin/grep/dfa.c +++ b/gnu/usr.bin/grep/dfa.c @@ -79,6 +79,16 @@ extern void free(); #define ISCNTRL(C) (isascii(C) && iscntrl(C)) #endif +/* ISASCIIDIGIT differs from ISDIGIT, as follows: + - Its arg may be any int or unsigned int; it need not be an unsigned char. + - It's guaranteed to evaluate its argument exactly once. + - It's typically faster. + Posix 1003.2-1992 section 2.5.2.1 page 50 lines 1556-1558 says that + only '0' through '9' are digits. Prefer ISASCIIDIGIT to ISDIGIT unless + it's important to use the locale's definition of `digit' even when the + host does not conform to Posix. */ +#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9) + /* If we (don't) have I18N. */ /* glibc defines _ */ #ifndef _ @@ -150,29 +160,8 @@ static char **comsubs PARAMS ((char *left, char *right)); static char **addlists PARAMS ((char **old, char **new)); static char **inboth PARAMS ((char **left, char **right)); -#ifdef __FreeBSD__ -static int -collate_range_cmp(c1, c2) - int c1, c2; -{ - static char s1[2], s2[2]; - int r; - - if (c1 == c2) - return 0; - s1[0] = c1; - s2[0] = c2; - if ((r = strcoll(s1, s2)) == 0) - r = c1 - c2; - - return r; -} -#endif - static ptr_t -xcalloc(n, s) - size_t n; - size_t s; +xcalloc (size_t n, size_t s) { ptr_t r = calloc(n, s); @@ -182,8 +171,7 @@ xcalloc(n, s) } static ptr_t -xmalloc(n) - size_t n; +xmalloc (size_t n) { ptr_t r = malloc(n); @@ -194,9 +182,7 @@ xmalloc(n) } static ptr_t -xrealloc(p, n) - ptr_t p; - size_t n; +xrealloc (ptr_t p, size_t n) { ptr_t r = realloc(p, n); @@ -222,8 +208,7 @@ xrealloc(p, n) #ifdef DEBUG static void -prtok(t) - token t; +prtok (token t) { char *s; @@ -261,33 +246,25 @@ prtok(t) /* Stuff pertaining to charclasses. */ static int -tstbit(b, c) - int b; - charclass c; +tstbit (int b, charclass c) { return c[b / INTBITS] & 1 << b % INTBITS; } static void -setbit(b, c) - int b; - charclass c; +setbit (int b, charclass c) { c[b / INTBITS] |= 1 << b % INTBITS; } static void -clrbit(b, c) - int b; - charclass c; +clrbit (int b, charclass c) { c[b / INTBITS] &= ~(1 << b % INTBITS); } static void -copyset(src, dst) - charclass src; - charclass dst; +copyset (charclass src, charclass dst) { int i; @@ -296,8 +273,7 @@ copyset(src, dst) } static void -zeroset(s) - charclass s; +zeroset (charclass s) { int i; @@ -306,8 +282,7 @@ zeroset(s) } static void -notset(s) - charclass s; +notset (charclass s) { int i; @@ -316,9 +291,7 @@ notset(s) } static int -equal(s1, s2) - charclass s1; - charclass s2; +equal (charclass s1, charclass s2) { int i; @@ -333,8 +306,7 @@ static struct dfa *dfa; /* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */ static int -charclass_index(s) - charclass s; +charclass_index (charclass s) { int i; @@ -358,10 +330,7 @@ static unsigned char eolbyte; /* Entry point to set syntax options. */ void -dfasyntax(bits, fold, eol) - reg_syntax_t bits; - int fold; - int eol; +dfasyntax (reg_syntax_t bits, int fold, int eol) { syntax_bits_set = 1; syntax_bits = bits; @@ -387,10 +356,12 @@ static int minrep, maxrep; /* Repeat counts for {m,n}. */ #define FETCH(c, eoferr) \ { \ if (! lexleft) \ - if (eoferr != 0) \ - dfaerror(eoferr); \ - else \ - return lasttok = END; \ + { \ + if (eoferr != 0) \ + dfaerror (eoferr); \ + else \ + return lasttok = END; \ + } \ (c) = (unsigned char) *lexptr++; \ --lexleft; \ } @@ -413,8 +384,8 @@ FUNC(is_print, ISPRINT) FUNC(is_graph, ISGRAPH) FUNC(is_cntrl, ISCNTRL) -static int is_blank(c) -int c; +static int +is_blank (int c) { return (c == ' ' || c == '\t'); } @@ -445,8 +416,7 @@ static struct { #define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_') static int -looking_at(s) - const char *s; +looking_at (char const *s) { size_t len; @@ -457,12 +427,14 @@ looking_at(s) } static token -lex() +lex (void) { token c, c1, c2; int backslash = 0, invert; charclass ccl; int i; + char lo[2]; + char hi[2]; /* Basic plan: We fetch a character. If it's a backslash, we set the backslash flag and go through the loop again. @@ -595,10 +567,10 @@ lex() int lo = -1, hi = -1; char const *p = lexptr; char const *lim = p + lexleft; - for (; p != lim && ISDIGIT (*p); p++) + for (; p != lim && ISASCIIDIGIT (*p); p++) lo = (lo < 0 ? 0 : lo * 10) + *p - '0'; if (p != lim && *p == ',') - while (++p != lim && ISDIGIT (*p)) + while (++p != lim && ISASCIIDIGIT (*p)) hi = (hi < 0 ? 0 : hi * 10) + *p - '0'; else hi = lo; @@ -613,13 +585,13 @@ lex() {M,} - minimum count, maximum is infinity {M,N} - M through N */ FETCH(c, _("unfinished repeat count")); - if (ISDIGIT(c)) + if (ISASCIIDIGIT (c)) { minrep = c - '0'; for (;;) { FETCH(c, _("unfinished repeat count")); - if (!ISDIGIT(c)) + if (! ISASCIIDIGIT (c)) break; minrep = 10 * minrep + c - '0'; } @@ -629,7 +601,7 @@ lex() if (c == ',') { FETCH (c, _("unfinished repeat count")); - if (! ISDIGIT (c)) + if (! ISASCIIDIGIT (c)) maxrep = -1; else { @@ -637,7 +609,7 @@ lex() for (;;) { FETCH (c, _("unfinished repeat count")); - if (! ISDIGIT (c)) + if (! ISASCIIDIGIT (c)) break; maxrep = 10 * maxrep + c - '0'; } @@ -776,35 +748,26 @@ lex() } else c2 = c; -#ifdef __FreeBSD__ - if (collate_range_cmp(c, c2) <= 0) - { - token c3; - - for (c3 = 0; c3 < NOTCHAR; ++c3) { - if (collate_range_cmp(c, c3) <= 0 && - collate_range_cmp(c3, c2) <= 0) { - setbit(c3, ccl); - if (case_fold) - if (ISUPPER(c3)) - setbit(tolower(c3), ccl); - else if (ISLOWER(c)) - setbit(toupper(c3), ccl); - } - } - } -#else - while (c <= c2) + + lo[0] = c; lo[1] = '\0'; + hi[0] = c2; hi[1] = '\0'; + for (c = 0; c < NOTCHAR; c++) { - setbit(c, ccl); - if (case_fold) - if (ISUPPER(c)) - setbit(tolower(c), ccl); - else if (ISLOWER(c)) - setbit(toupper(c), ccl); - ++c; + char ch[2]; + ch[0] = c; ch[1] = '\0'; + if (strcoll (lo, ch) <= 0 && strcoll (ch, hi) <= 0) + { + setbit (c, ccl); + if (case_fold) + { + if (ISUPPER (c)) + setbit (tolower (c), ccl); + else if (ISLOWER (c)) + setbit (toupper (c), ccl); + } + } } -#endif + skip: ; } @@ -853,8 +816,7 @@ static int depth; /* Current depth of a hypothetical stack /* Add the given token to the parse tree, maintaining the depth count and updating the maximum depth if necessary. */ static void -addtok(t) - token t; +addtok (token t) { REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex); dfa->tokens[dfa->tindex++] = t; @@ -913,7 +875,7 @@ addtok(t) The parser builds a parse tree in postfix form in an array of tokens. */ static void -atom() +atom (void) { if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD @@ -936,8 +898,7 @@ atom() /* Return the number of tokens in the given subexpression. */ static int -nsubtoks(tindex) -int tindex; +nsubtoks (int tindex) { int ntoks1; @@ -959,8 +920,7 @@ int tindex; /* Copy the given subexpression to the top of the tree. */ static void -copytoks(tindex, ntokens) - int tindex, ntokens; +copytoks (int tindex, int ntokens) { int i; @@ -969,7 +929,7 @@ copytoks(tindex, ntokens) } static void -closure() +closure (void) { int tindex, ntokens, i; @@ -1004,7 +964,7 @@ closure() } static void -branch() +branch (void) { closure(); while (tok != RPAREN && tok != OR && tok >= 0) @@ -1015,8 +975,7 @@ branch() } static void -regexp(toplevel) - int toplevel; +regexp (int toplevel) { branch(); while (tok == OR) @@ -1034,11 +993,7 @@ regexp(toplevel) length of the string, so s can include NUL characters. D is a pointer to the struct dfa to parse into. */ void -dfaparse(s, len, d) - char *s; - size_t len; - struct dfa *d; - +dfaparse (char *s, size_t len, struct dfa *d) { dfa = d; lexstart = lexptr = s; @@ -1071,9 +1026,7 @@ dfaparse(s, len, d) /* Copy one set to another; the destination must be large enough. */ static void -copy(src, dst) - position_set *src; - position_set *dst; +copy (position_set *src, position_set *dst) { int i; @@ -1087,9 +1040,7 @@ copy(src, dst) the same index then their constraints are logically or'd together. S->elems must point to an array large enough to hold the resulting set. */ static void -insert(p, s) - position p; - position_set *s; +insert (position p, position_set *s) { int i; position t1, t2; @@ -1114,10 +1065,7 @@ insert(p, s) /* Merge two sets of positions into a third. The result is exactly as if the positions of both sets were inserted into an initially empty set. */ static void -merge(s1, s2, m) - position_set *s1; - position_set *s2; - position_set *m; +merge (position_set *s1, position_set *s2, position_set *m) { int i = 0, j = 0; @@ -1140,9 +1088,7 @@ merge(s1, s2, m) /* Delete a position from a set. */ static void -delete(p, s) - position p; - position_set *s; +delete (position p, position_set *s) { int i; @@ -1159,11 +1105,7 @@ delete(p, s) state. Newline and letter tell whether we got here on a newline or letter, respectively. */ static int -state_index(d, s, newline, letter) - struct dfa *d; - position_set *s; - int newline; - int letter; +state_index (struct dfa *d, position_set *s, int newline, int letter) { int hash = 0; int constraint; @@ -1228,12 +1170,8 @@ state_index(d, s, newline, letter) that position with the elements of its follow labeled with an appropriate constraint. Repeat exhaustively until no funny positions are left. S->elems must be large enough to hold the result. */ -static void epsclosure PARAMS ((position_set *s, struct dfa *d)); - static void -epsclosure(s, d) - position_set *s; - struct dfa *d; +epsclosure (position_set *s, struct dfa *d) { int i, j; int *visited; @@ -1345,9 +1283,7 @@ epsclosure(s, d) scheme; the number of elements in each set deeper in the stack can be used to determine the address of a particular set's array. */ void -dfaanalyze(d, searchflag) - struct dfa *d; - int searchflag; +dfaanalyze (struct dfa *d, int searchflag) { int *nullable; /* Nullable stack. */ int *nfirstpos; /* Element count stack for firstpos sets. */ @@ -1608,10 +1544,7 @@ dfaanalyze(d, searchflag) create a new group labeled with the characters of C and insert this position in that group. */ void -dfastate(s, d, trans) - int s; - struct dfa *d; - int trans[]; +dfastate (int s, struct dfa *d, int trans[]) { position_set grps[NOTCHAR]; /* As many as will ever be needed. */ charclass labels[NOTCHAR]; /* Labels corresponding to the groups. */ @@ -1853,9 +1786,7 @@ dfastate(s, d, trans) TODO: Improve this comment, get rid of the unnecessary redundancy. */ static void -build_state(s, d) - int s; - struct dfa *d; +build_state (int s, struct dfa *d) { int *trans; /* The new transition table. */ int i; @@ -1931,8 +1862,7 @@ build_state(s, d) } static void -build_state_zero(d) - struct dfa *d; +build_state_zero (struct dfa *d) { d->tralloc = 1; d->trcount = 0; @@ -1958,13 +1888,8 @@ build_state_zero(d) match needs to be verified by a backtracking matcher. Otherwise we store a 0 in *backref. */ char * -dfaexec(d, begin, end, newline, count, backref) - struct dfa *d; - char *begin; - char *end; - int newline; - int *count; - int *backref; +dfaexec (struct dfa *d, char *begin, char *end, + int newline, int *count, int *backref) { register int s, s1, tmp; /* Current state. */ register unsigned char *p; /* Current input character. */ @@ -2045,8 +1970,7 @@ dfaexec(d, begin, end, newline, count, backref) /* Initialize the components of a dfa that the other routines don't initialize for themselves. */ void -dfainit(d) - struct dfa *d; +dfainit (struct dfa *d) { d->calloc = 1; MALLOC(d->charclasses, charclass, d->calloc); @@ -2064,11 +1988,7 @@ dfainit(d) /* Parse and analyze a single string of the given length. */ void -dfacomp(s, len, d, searchflag) - char *s; - size_t len; - struct dfa *d; - int searchflag; +dfacomp (char *s, size_t len, struct dfa *d, int searchflag) { if (case_fold) /* dummy folding in service of dfamust() */ { @@ -2107,8 +2027,7 @@ dfacomp(s, len, d, searchflag) /* Free the storage held by the components of a dfa. */ void -dfafree(d) - struct dfa *d; +dfafree (struct dfa *d) { int i; struct dfamust *dm, *ndm; @@ -2220,9 +2139,7 @@ dfafree(d) 'psi|epsilon' is likelier)? */ static char * -icatalloc(old, new) - char *old; - char *new; +icatalloc (char *old, char *new) { char *result; size_t oldsize, newsize; @@ -2243,16 +2160,13 @@ icatalloc(old, new) } static char * -icpyalloc(string) - char *string; +icpyalloc (char *string) { return icatalloc((char *) NULL, string); } static char * -istrstr(lookin, lookfor) - char *lookin; - char *lookfor; +istrstr (char *lookin, char *lookfor) { char *cp; size_t len; @@ -2265,16 +2179,14 @@ istrstr(lookin, lookfor) } static void -ifree(cp) - char *cp; +ifree (char *cp) { if (cp != NULL) free(cp); } static void -freelist(cpp) - char **cpp; +freelist (char **cpp) { int i; @@ -2288,10 +2200,7 @@ freelist(cpp) } static char ** -enlist(cpp, new, len) - char **cpp; - char *new; - size_t len; +enlist (char **cpp, char *new, size_t len) { int i, j; @@ -2336,9 +2245,7 @@ enlist(cpp, new, len) list of their distinct common substrings. Return NULL if something seems wild. */ static char ** -comsubs(left, right) - char *left; - char *right; +comsubs (char *left, char *right) { char **cpp; char *lcp; @@ -2372,9 +2279,7 @@ comsubs(left, right) } static char ** -addlists(old, new) -char **old; -char **new; +addlists (char **old, char **new) { int i; @@ -2392,9 +2297,7 @@ char **new; /* Given two lists of substrings, return a new list giving substrings common to both. */ static char ** -inboth(left, right) - char **left; - char **right; +inboth (char **left, char **right) { char **both; char **temp; @@ -2435,16 +2338,14 @@ typedef struct } must; static void -resetmust(mp) -must *mp; +resetmust (must *mp) { mp->left[0] = mp->right[0] = mp->is[0] = '\0'; freelist(mp->in); } static void -dfamust(dfa) -struct dfa *dfa; +dfamust (struct dfa *dfa) { must *musts; must *mp; -- cgit v1.1